Include GA intraday sessions tables (#4582)
* Include GA intraday sessions tables * Update doc string on backfilling ga_sessions * Dont dryrun stub_attribution view
This commit is contained in:
Родитель
f3b13c652e
Коммит
05fed88b07
|
@ -178,6 +178,7 @@ dry_run:
|
|||
- sql/moz-fx-data-shared-prod/firefox_desktop/top_sites/view.sql
|
||||
- sql/moz-fx-data-shared-prod/firefox_desktop/quick_suggest/view.sql
|
||||
- sql/moz-fx-data-shared-prod/stub_attribution_service_derived/dl_token_ga_attribution_lookup_v1/query.sql
|
||||
- sql/moz-fx-data-shared-prod/stub_attribution_service/dl_token_ga_attribution_lookup/view.sql
|
||||
# Materialized views
|
||||
- sql/moz-fx-data-shared-prod/telemetry_derived/experiment_search_events_live_v1/init.sql
|
||||
- sql/moz-fx-data-shared-prod/telemetry_derived/experiment_events_live_v1/init.sql
|
||||
|
|
|
@ -34,7 +34,39 @@ RETURNS STRING AS (
|
|||
END
|
||||
);
|
||||
|
||||
WITH daily_sessions AS (
|
||||
WITH historic_and_intraday AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`moz-fx-data-marketing-prod.65789850.ga_sessions_*`
|
||||
WHERE
|
||||
-- This table is partitioned, so we only process the data from session_date
|
||||
-- To handle late-arriving data, we process 3 days of data each day (re-processing the past 2)
|
||||
-- as separate Airflow tasks
|
||||
--
|
||||
-- Here, we need to take data from yesterday, just in case some of our sessions from today
|
||||
-- actually started yesterday. If they did, they'll be filtered out in the HAVING clause
|
||||
_TABLE_SUFFIX
|
||||
BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(@session_date, INTERVAL 1 DAY))
|
||||
-- However, we have data for today that will arrive _tomorrow_! Some inter-day sessions
|
||||
-- will be present in two days, with the same ids. A session should never span more
|
||||
-- than two days though, see https://sql.telemetry.mozilla.org/queries/95882/source
|
||||
-- If one does, our uniqueness check will alert us
|
||||
AND FORMAT_DATE('%Y%m%d', DATE_ADD(@session_date, INTERVAL 1 DAY))
|
||||
UNION ALL
|
||||
-- Intraday sessions are "real-time" exports of sessions of the current day
|
||||
-- usually we wouldn't need these, but sometimes GA is slow in adding the
|
||||
-- intraday sessions back into ga_sessions
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`moz-fx-data-marketing-prod.65789850.ga_sessions_intraday_*`
|
||||
WHERE
|
||||
_TABLE_SUFFIX
|
||||
BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(@session_date, INTERVAL 1 DAY))
|
||||
AND FORMAT_DATE('%Y%m%d', DATE_ADD(@session_date, INTERVAL 1 DAY))
|
||||
),
|
||||
daily_sessions AS (
|
||||
SELECT
|
||||
mozfun.ga.nullify_string(clientId) AS ga_client_id,
|
||||
-- visitId (or sessionId in GA4) is guaranteed unique only among one client, look at visitId here https://support.google.com/analytics/answer/3437719?hl=en
|
||||
|
@ -59,9 +91,9 @@ WITH daily_sessions AS (
|
|||
MIN_BY(trafficSource.medium, visitStartTime) AS medium,
|
||||
MIN_BY(trafficSource.keyword, visitStartTime) AS term,
|
||||
MIN_BY(trafficSource.adContent, visitStartTime) AS content,
|
||||
ARRAY_AGG(
|
||||
mozfun.ga.nullify_string(trafficSource.adwordsClickInfo.gclId) IGNORE NULLS
|
||||
)[0] AS gclid,
|
||||
ARRAY_AGG(mozfun.ga.nullify_string(trafficSource.adwordsClickInfo.gclId) IGNORE NULLS)[
|
||||
0
|
||||
] AS gclid,
|
||||
/* Device */
|
||||
MIN_BY(device.deviceCategory, visitStartTime) AS device_category,
|
||||
MIN_BY(device.mobileDeviceModel, visitStartTime) AS mobile_device_model,
|
||||
|
@ -72,26 +104,12 @@ WITH daily_sessions AS (
|
|||
MIN_BY(device.browser, visitStartTime) AS browser,
|
||||
MIN_BY(device.browserVersion, visitStartTime) AS browser_version,
|
||||
FROM
|
||||
`moz-fx-data-marketing-prod.65789850.ga_sessions_*`
|
||||
WHERE
|
||||
-- This table is partitioned, so we only process the data from session_date
|
||||
-- To handle late-arriving data, we process 3 days of data each day (re-processing the past 2)
|
||||
-- as separate Airflow tasks (or via bqetl backfill, I haven't decided yet)
|
||||
--
|
||||
-- Here, we need to take data from yesterday, just in case some of our sessions from today
|
||||
-- actually started yesterday. If they did, they'll be filtered out in the HAVING clause
|
||||
_TABLE_SUFFIX
|
||||
BETWEEN FORMAT_DATE('%Y%m%d', DATE_SUB(@session_date, INTERVAL 1 DAY))
|
||||
-- However, we have data for today that will arrive _tomorrow_! Some inter-day sessions
|
||||
-- will be present in two days, with the same ids. A session should never span more
|
||||
-- than two days though, see https://sql.telemetry.mozilla.org/queries/95882/source
|
||||
-- If one does, our uniqueness check will alert us
|
||||
AND FORMAT_DATE('%Y%m%d', DATE_ADD(@session_date, INTERVAL 1 DAY))
|
||||
historic_and_intraday
|
||||
GROUP BY
|
||||
ga_client_id,
|
||||
ga_session_id
|
||||
HAVING
|
||||
-- Don't include entries from today that started yesterday
|
||||
-- Don't include entries from that started yesterday or tomorrow
|
||||
session_date = @session_date
|
||||
)
|
||||
SELECT
|
||||
|
|
|
@ -0,0 +1,225 @@
|
|||
[
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "visitId",
|
||||
"type": "INTEGER"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "visitNumber",
|
||||
"type": "INTEGER"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "visitStartTime",
|
||||
"type": "INTEGER"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "date",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"fields": [
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "timeOnSite",
|
||||
"type": "INTEGER"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "pageviews",
|
||||
"type": "INTEGER"
|
||||
}
|
||||
],
|
||||
"mode": "NULLABLE",
|
||||
"name": "totals",
|
||||
"type": "RECORD"
|
||||
},
|
||||
{
|
||||
"fields": [
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "campaign",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "source",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "medium",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "adContent",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "keyword",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "adwordsClickInfo",
|
||||
"type": "RECORD",
|
||||
"fields": [
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "gclId",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "campaignId",
|
||||
"type": "INTEGER"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"mode": "NULLABLE",
|
||||
"name": "trafficSource",
|
||||
"type": "RECORD"
|
||||
},
|
||||
{
|
||||
"fields": [
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "browser",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "browserVersion",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "operatingSystem",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "operatingSystemVersion",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "language",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "deviceCategory",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "mobileDeviceModel",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "mobileDeviceInfo",
|
||||
"type": "STRING"
|
||||
}
|
||||
],
|
||||
"mode": "NULLABLE",
|
||||
"name": "device",
|
||||
"type": "RECORD"
|
||||
},
|
||||
{
|
||||
"fields": [
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "country",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "region",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "city",
|
||||
"type": "STRING"
|
||||
}
|
||||
],
|
||||
"mode": "NULLABLE",
|
||||
"name": "geoNetwork",
|
||||
"type": "RECORD"
|
||||
},
|
||||
{
|
||||
"fields": [
|
||||
{
|
||||
"fields": [
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "pagePath",
|
||||
"type": "STRING"
|
||||
}
|
||||
],
|
||||
"mode": "NULLABLE",
|
||||
"name": "page",
|
||||
"type": "RECORD"
|
||||
},
|
||||
{
|
||||
"fields": [
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "landingScreenName",
|
||||
"type": "STRING"
|
||||
}
|
||||
],
|
||||
"mode": "NULLABLE",
|
||||
"name": "appInfo",
|
||||
"type": "RECORD"
|
||||
},
|
||||
{
|
||||
"fields": [
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "eventCategory",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "eventAction",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "eventLabel",
|
||||
"type": "STRING"
|
||||
}
|
||||
],
|
||||
"mode": "NULLABLE",
|
||||
"name": "eventInfo",
|
||||
"type": "RECORD"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "type",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "hitNumber",
|
||||
"type": "INTEGER"
|
||||
}
|
||||
],
|
||||
"mode": "REPEATED",
|
||||
"name": "hits",
|
||||
"type": "RECORD"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "clientId",
|
||||
"type": "STRING"
|
||||
}
|
||||
]
|
|
@ -29,3 +29,34 @@
|
|||
last_reported_stub_session_id: "laterStubSessionId"
|
||||
all_reported_stub_session_ids: ["earlierStubSessionId", "laterStubSessionId"]
|
||||
landing_screen: first
|
||||
- ga_client_id: clientIntraday
|
||||
ga_session_id: clientIntraday1
|
||||
session_date: 2023-03-31
|
||||
is_first_session: true
|
||||
session_number: 1
|
||||
time_on_site: 11
|
||||
pageviews: 1
|
||||
country: earlierCountry
|
||||
region: earlierRegion
|
||||
city: earlierCity
|
||||
campaign_id: "1"
|
||||
gclid: "earlierGclid"
|
||||
campaign: "earlierCampaign"
|
||||
source: "earlierSource"
|
||||
medium: "earlierMedium"
|
||||
content: "earlierContent"
|
||||
term: "earlierKeyword"
|
||||
device_category: "earlierDeviceCategory"
|
||||
mobile_device_model: "earlierMobileDeviceModel"
|
||||
mobile_device_string: "earlierMobileDeviceInfo"
|
||||
os: "earlierOperatingSystem"
|
||||
os_version: "earlierOperatingSystemVersion"
|
||||
language: "earlierLanguage"
|
||||
browser: "earlierBrowser"
|
||||
browser_version: "earlierBrowserVersion"
|
||||
had_download_event: true
|
||||
last_reported_install_target: "desktop_release"
|
||||
all_reported_install_targets: ["desktop_release"]
|
||||
last_reported_stub_session_id: "laterStubSessionId"
|
||||
all_reported_stub_session_ids: ["earlierStubSessionId", "laterStubSessionId"]
|
||||
landing_screen: first
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
- clientId: clientIntraday
|
||||
visitId: '1'
|
||||
visitNumber: 1
|
||||
date: '20230331'
|
||||
visitStartTime: 1
|
||||
geoNetwork:
|
||||
country: "earlierCountry"
|
||||
region: "earlierRegion"
|
||||
city: "earlierCity"
|
||||
trafficSource:
|
||||
adwordsClickInfo:
|
||||
campaignId: 1
|
||||
gclId: "earlierGclid"
|
||||
campaign: "earlierCampaign"
|
||||
source: "earlierSource"
|
||||
medium: "earlierMedium"
|
||||
adContent: "earlierContent"
|
||||
keyword: "earlierKeyword"
|
||||
device:
|
||||
deviceCategory: "earlierDeviceCategory"
|
||||
mobileDeviceModel: "earlierMobileDeviceModel"
|
||||
mobileDeviceInfo: "earlierMobileDeviceInfo"
|
||||
operatingSystem: "earlierOperatingSystem"
|
||||
operatingSystemVersion: "earlierOperatingSystemVersion"
|
||||
language: "earlierLanguage"
|
||||
browser: "earlierBrowser"
|
||||
browserVersion: "earlierBrowserVersion"
|
||||
totals:
|
||||
pageviews: 1
|
||||
timeOnSite: 11
|
||||
hits:
|
||||
- page:
|
||||
pagePath: "/en-GB/firefox/session"
|
||||
appInfo:
|
||||
landingScreenName: first
|
||||
eventInfo:
|
||||
eventCategory: "/firefox/ Interactions"
|
||||
eventAction: Stub Session ID
|
||||
eventLabel: earlierStubSessionId
|
||||
hitNumber: 1
|
||||
type: EVENT
|
||||
- page:
|
||||
pagePath: "/en-GB/firefox/session"
|
||||
appInfo:
|
||||
landingScreenName: second
|
||||
eventInfo:
|
||||
eventCategory: "/firefox/ Interactions"
|
||||
eventAction: Stub Session ID
|
||||
eventLabel: laterStubSessionId
|
||||
hitNumber: 2
|
||||
type: EVENT
|
||||
- page:
|
||||
pagePath: "/en-GB/firefox/"
|
||||
appInfo:
|
||||
landingScreenName: third
|
||||
eventInfo:
|
||||
eventCategory: "/firefox/ Interactions"
|
||||
eventAction: Firefox Download
|
||||
eventLabel: Firefox for Desktop
|
||||
hitNumber: 3
|
||||
type: EVENT
|
Загрузка…
Ссылка в новой задаче