DENG-2492 Create new GA4 derived table: blogs_sessions_v2 (#5018)
* DENG-2492 initial commit for new table blogs_sessions_v2 * DENG-2492 wrap keywords with backticks
This commit is contained in:
Родитель
63a4d72197
Коммит
ee8de94705
|
@ -0,0 +1,7 @@
|
||||||
|
CREATE OR REPLACE VIEW
|
||||||
|
`moz-fx-data-marketing-prod.ga.blogs_sessions`
|
||||||
|
AS
|
||||||
|
SELECT
|
||||||
|
*
|
||||||
|
FROM
|
||||||
|
`moz-fx-data-marketing-prod.ga_derived.blogs_sessions_v2`
|
|
@ -0,0 +1,21 @@
|
||||||
|
friendly_name: Blogs Sessions V2
|
||||||
|
description: |-
|
||||||
|
Intermediate table containing normalized sessions for blog.mozilla.org, sourced from Google Analytics 4 (GA4)
|
||||||
|
owners:
|
||||||
|
- kwindau@mozilla.com
|
||||||
|
labels:
|
||||||
|
incremental: true
|
||||||
|
owner1: kwindau@mozilla.com
|
||||||
|
scheduling:
|
||||||
|
dag_name: bqetl_google_analytics_derived_ga4
|
||||||
|
bigquery:
|
||||||
|
time_partitioning:
|
||||||
|
type: day
|
||||||
|
field: date
|
||||||
|
require_partition_filter: true
|
||||||
|
expiration_days: null
|
||||||
|
clustering:
|
||||||
|
fields:
|
||||||
|
- country
|
||||||
|
references: {}
|
||||||
|
deprecated: false
|
|
@ -0,0 +1,205 @@
|
||||||
|
--Get all page views with the page location, and a flag for whether it was an entrance or not to the session
|
||||||
|
WITH all_page_views AS (
|
||||||
|
SELECT
|
||||||
|
PARSE_DATE('%Y%m%d', event_date) AS `date`,
|
||||||
|
event_timestamp,
|
||||||
|
user_pseudo_id || '-' || CAST(
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
`value`
|
||||||
|
FROM
|
||||||
|
UNNEST(event_params)
|
||||||
|
WHERE
|
||||||
|
key = 'ga_session_id'
|
||||||
|
LIMIT
|
||||||
|
1
|
||||||
|
).int_value AS STRING
|
||||||
|
) AS visit_identifier,
|
||||||
|
device.category AS device_category,
|
||||||
|
device.operating_system AS operating_system,
|
||||||
|
device.web_info.browser AS browser,
|
||||||
|
device.language AS `language`,
|
||||||
|
geo.country AS country,
|
||||||
|
collected_traffic_source.manual_source AS source,
|
||||||
|
collected_traffic_source.manual_medium AS medium,
|
||||||
|
collected_traffic_source.manual_campaign_name AS campaign,
|
||||||
|
collected_traffic_source.manual_content AS content,
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
`value`
|
||||||
|
FROM
|
||||||
|
UNNEST(event_params)
|
||||||
|
WHERE
|
||||||
|
key = 'page_location'
|
||||||
|
LIMIT
|
||||||
|
1
|
||||||
|
).string_value AS page_location,
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
`value`
|
||||||
|
FROM
|
||||||
|
UNNEST(event_params)
|
||||||
|
WHERE
|
||||||
|
key = 'entrances'
|
||||||
|
LIMIT
|
||||||
|
1
|
||||||
|
).int_value AS is_entrance
|
||||||
|
FROM
|
||||||
|
`moz-fx-data-marketing-prod.analytics_314399816.events_*`
|
||||||
|
WHERE
|
||||||
|
_TABLE_SUFFIX = FORMAT_DATE('%Y%m%d', @submission_date)
|
||||||
|
AND event_name = 'page_view'
|
||||||
|
),
|
||||||
|
--Filter to entrance pages only, and then filter to ensure only 1 entrance page per session
|
||||||
|
--Theoretically Google should always only send 1 per session, but in case there is ever more than 1, which happens occasionally
|
||||||
|
entrance_page_views_only AS (
|
||||||
|
SELECT
|
||||||
|
`date`,
|
||||||
|
visit_identifier,
|
||||||
|
device_category,
|
||||||
|
operating_system,
|
||||||
|
browser,
|
||||||
|
`language`,
|
||||||
|
country,
|
||||||
|
source,
|
||||||
|
medium,
|
||||||
|
campaign,
|
||||||
|
content,
|
||||||
|
REGEXP_REPLACE(
|
||||||
|
SPLIT(page_location, '?')[SAFE_OFFSET(0)],
|
||||||
|
'^https://blog.mozilla.org',
|
||||||
|
''
|
||||||
|
) AS page_path,
|
||||||
|
SPLIT(
|
||||||
|
REGEXP_REPLACE(SPLIT(page_location, '?')[SAFE_OFFSET(0)], '^https://blog.mozilla.org', ''),
|
||||||
|
'/'
|
||||||
|
)[SAFE_OFFSET(1)] AS blog,
|
||||||
|
SPLIT(
|
||||||
|
REGEXP_REPLACE(SPLIT(page_location, '?')[SAFE_OFFSET(0)], '^https://blog.mozilla.org', ''),
|
||||||
|
'/'
|
||||||
|
)[SAFE_OFFSET(2)] AS subblog
|
||||||
|
FROM
|
||||||
|
all_page_views
|
||||||
|
WHERE
|
||||||
|
is_entrance = 1
|
||||||
|
QUALIFY
|
||||||
|
ROW_NUMBER() OVER (PARTITION BY visit_identifier ORDER BY event_timestamp ASC) = 1
|
||||||
|
),
|
||||||
|
staging AS (
|
||||||
|
SELECT
|
||||||
|
epvo.date,
|
||||||
|
epvo.visit_identifier,
|
||||||
|
epvo.device_category,
|
||||||
|
epvo.operating_system,
|
||||||
|
epvo.browser,
|
||||||
|
epvo.language,
|
||||||
|
epvo.country,
|
||||||
|
epvo.source,
|
||||||
|
epvo.medium,
|
||||||
|
epvo.campaign,
|
||||||
|
epvo.content,
|
||||||
|
epvo.blog,
|
||||||
|
epvo.subblog,
|
||||||
|
COUNT(DISTINCT(visit_identifier)) AS sessions
|
||||||
|
FROM
|
||||||
|
entrance_page_views_only epvo
|
||||||
|
GROUP BY
|
||||||
|
epvo.date,
|
||||||
|
epvo.visit_identifier,
|
||||||
|
epvo.device_category,
|
||||||
|
epvo.operating_system,
|
||||||
|
epvo.browser,
|
||||||
|
epvo.language,
|
||||||
|
epvo.country,
|
||||||
|
epvo.source,
|
||||||
|
epvo.medium,
|
||||||
|
epvo.campaign,
|
||||||
|
epvo.content,
|
||||||
|
epvo.blog,
|
||||||
|
epvo.subblog
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
`date`,
|
||||||
|
visit_identifier,
|
||||||
|
device_category,
|
||||||
|
operating_system,
|
||||||
|
browser,
|
||||||
|
`language`,
|
||||||
|
country,
|
||||||
|
source,
|
||||||
|
medium,
|
||||||
|
campaign,
|
||||||
|
content,
|
||||||
|
CASE
|
||||||
|
WHEN blog LIKE "press%"
|
||||||
|
THEN "press"
|
||||||
|
WHEN blog = 'firefox'
|
||||||
|
THEN 'The Firefox Frontier'
|
||||||
|
WHEN blog = 'netPolicy'
|
||||||
|
THEN 'Open Policy & Advocacy'
|
||||||
|
WHEN LOWER(blog) = 'internetcitizen'
|
||||||
|
THEN 'Internet Citizen'
|
||||||
|
WHEN blog = 'futurereleases'
|
||||||
|
THEN 'Future Releases'
|
||||||
|
WHEN blog = 'careers'
|
||||||
|
THEN 'Careers'
|
||||||
|
WHEN blog = 'opendesign'
|
||||||
|
THEN 'Open Design'
|
||||||
|
WHEN blog = ""
|
||||||
|
THEN "Blog Home Page"
|
||||||
|
WHEN LOWER(blog) IN (
|
||||||
|
'blog',
|
||||||
|
'addons',
|
||||||
|
'security',
|
||||||
|
'opendesign',
|
||||||
|
'nnethercote',
|
||||||
|
'thunderbird',
|
||||||
|
'community',
|
||||||
|
'l10n',
|
||||||
|
'theden',
|
||||||
|
'webrtc',
|
||||||
|
'berlin',
|
||||||
|
'webdev',
|
||||||
|
'services',
|
||||||
|
'tanvi',
|
||||||
|
'laguaridadefirefox',
|
||||||
|
'ux',
|
||||||
|
'fxtesteng',
|
||||||
|
'foundation-archive',
|
||||||
|
'nfroyd',
|
||||||
|
'sumo',
|
||||||
|
'javascript',
|
||||||
|
'page',
|
||||||
|
'data'
|
||||||
|
)
|
||||||
|
THEN LOWER(blog)
|
||||||
|
ELSE 'other'
|
||||||
|
END AS blog,
|
||||||
|
CASE
|
||||||
|
WHEN blog = "firefox"
|
||||||
|
AND subblog IN ('ru', 'pt-br', 'pl', 'it', 'id', 'fr', 'es', 'de')
|
||||||
|
THEN subblog
|
||||||
|
WHEN blog = "firefox"
|
||||||
|
THEN "Main"
|
||||||
|
WHEN blog LIKE "press-%"
|
||||||
|
AND blog IN (
|
||||||
|
'press-de',
|
||||||
|
'press-fr',
|
||||||
|
'press-es',
|
||||||
|
'press-uk',
|
||||||
|
'press-pl',
|
||||||
|
'press-it',
|
||||||
|
'press-br',
|
||||||
|
'press-nl'
|
||||||
|
)
|
||||||
|
THEN blog
|
||||||
|
WHEN blog LIKE "press%"
|
||||||
|
THEN "Main"
|
||||||
|
WHEN blog = 'internetcitizen'
|
||||||
|
AND subblog IN ('de', 'fr')
|
||||||
|
THEN subblog
|
||||||
|
ELSE "Main"
|
||||||
|
END AS subblog,
|
||||||
|
`sessions`
|
||||||
|
FROM
|
||||||
|
staging
|
|
@ -0,0 +1,57 @@
|
||||||
|
fields:
|
||||||
|
- mode: NULLABLE
|
||||||
|
name: date
|
||||||
|
type: DATE
|
||||||
|
description: Date of the visit
|
||||||
|
- mode: NULLABLE
|
||||||
|
name: visit_identifier
|
||||||
|
type: STRING
|
||||||
|
description: Visit Identifier - Uniquely identifies a visit; concatenation of user_pseudo_id and ga_session_id
|
||||||
|
- mode: NULLABLE
|
||||||
|
name: device_category
|
||||||
|
type: STRING
|
||||||
|
description: Device Category - The device category the visitor used to visit the site
|
||||||
|
- mode: NULLABLE
|
||||||
|
name: operating_system
|
||||||
|
type: STRING
|
||||||
|
description: Operating System - The operating system the visitor used to visit the site
|
||||||
|
- mode: NULLABLE
|
||||||
|
name: browser
|
||||||
|
type: STRING
|
||||||
|
description: Browser - The browser the visiting device was using when it visited the site
|
||||||
|
- mode: NULLABLE
|
||||||
|
name: language
|
||||||
|
type: STRING
|
||||||
|
description: Language - The language the visiting device was using when it visited the site
|
||||||
|
- mode: NULLABLE
|
||||||
|
name: country
|
||||||
|
type: STRING
|
||||||
|
description: Country - The country from which events were reported, based on IP address
|
||||||
|
- mode: NULLABLE
|
||||||
|
name: source
|
||||||
|
type: STRING
|
||||||
|
description: Source - Referring partner domain
|
||||||
|
- mode: NULLABLE
|
||||||
|
name: medium
|
||||||
|
type: STRING
|
||||||
|
description: Medium - Category of the source, such as 'organic' for a search engine
|
||||||
|
- mode: NULLABLE
|
||||||
|
name: campaign
|
||||||
|
type: STRING
|
||||||
|
description: Campaign - Identifier for the marketing campaign
|
||||||
|
- mode: NULLABLE
|
||||||
|
name: content
|
||||||
|
type: STRING
|
||||||
|
description: Content - Indicates the particular link within a campaign
|
||||||
|
- mode: NULLABLE
|
||||||
|
name: blog
|
||||||
|
type: STRING
|
||||||
|
description: Blog
|
||||||
|
- mode: NULLABLE
|
||||||
|
name: subblog
|
||||||
|
type: STRING
|
||||||
|
description: Sub-Blog
|
||||||
|
- mode: NULLABLE
|
||||||
|
name: sessions
|
||||||
|
type: INT64
|
||||||
|
description: Number of Sessions
|
Загрузка…
Ссылка в новой задаче