From 54341c597fd3bbe6a89b66a876b7b3a7a989d540 Mon Sep 17 00:00:00 2001 From: Katie Windau <153020235+kwindau@users.noreply.github.com> Date: Tue, 2 Jul 2024 17:14:24 -0500 Subject: [PATCH] DENG-4201 create www_site_hits_v1 & firefox_whatsnew_summary_v1 in shared-prod (#5879) * DENG-4201 create www_site_hits_v1 in shared-prod * DENG-4201 add firefox_whatsnew_summary_v1 to shared prod * DENG-4201 fix schema.yaml --- .../firefox_whatsnew_summary_v1/metadata.yaml | 20 ++++ .../firefox_whatsnew_summary_v1/query.sql | 42 +++++++ .../firefox_whatsnew_summary_v1/schema.yaml | 31 +++++ .../www_site_hits_v1/metadata.yaml | 21 ++++ .../www_site_hits_v1/query.sql | 75 ++++++++++++ .../www_site_hits_v1/schema.yaml | 112 ++++++++++++++++++ 6 files changed, 301 insertions(+) create mode 100644 sql/moz-fx-data-shared-prod/mozilla_org_derived/firefox_whatsnew_summary_v1/metadata.yaml create mode 100644 sql/moz-fx-data-shared-prod/mozilla_org_derived/firefox_whatsnew_summary_v1/query.sql create mode 100644 sql/moz-fx-data-shared-prod/mozilla_org_derived/firefox_whatsnew_summary_v1/schema.yaml create mode 100644 sql/moz-fx-data-shared-prod/mozilla_org_derived/www_site_hits_v1/metadata.yaml create mode 100644 sql/moz-fx-data-shared-prod/mozilla_org_derived/www_site_hits_v1/query.sql create mode 100644 sql/moz-fx-data-shared-prod/mozilla_org_derived/www_site_hits_v1/schema.yaml diff --git a/sql/moz-fx-data-shared-prod/mozilla_org_derived/firefox_whatsnew_summary_v1/metadata.yaml b/sql/moz-fx-data-shared-prod/mozilla_org_derived/firefox_whatsnew_summary_v1/metadata.yaml new file mode 100644 index 0000000000..e432ea353c --- /dev/null +++ b/sql/moz-fx-data-shared-prod/mozilla_org_derived/firefox_whatsnew_summary_v1/metadata.yaml @@ -0,0 +1,20 @@ +description: This table aggregates the traffic to Firefox "what's new" pages and the number of bounces +friendly_name: Firefox "what's new" hits and bounces +labels: + incremental: true + schedule: daily +owners: + - rbaffourawuah@mozilla.com +scheduling: + dag_name: bqetl_google_analytics_derived +bigquery: + time_partitioning: + type: day + field: date + require_partition_filter: false + clustering: + fields: + - country + - locale + - version +deprecated: true diff --git a/sql/moz-fx-data-shared-prod/mozilla_org_derived/firefox_whatsnew_summary_v1/query.sql b/sql/moz-fx-data-shared-prod/mozilla_org_derived/firefox_whatsnew_summary_v1/query.sql new file mode 100644 index 0000000000..7b3c2ceb67 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/mozilla_org_derived/firefox_whatsnew_summary_v1/query.sql @@ -0,0 +1,42 @@ +WITH wnp_visits AS ( + SELECT + date, + visit_identifier, + TRIM(page_path_level1, '/') AS locale, + page_level_2 AS version, + mozfun.norm.browser_version_info(page_level_2) AS version_info, + country, + IF(hit_number = first_interaction AND bounces = 1, TRUE, FALSE) AS is_bounce + FROM + `moz-fx-data-shared-prod.mozilla_org_derived.www_site_hits_v1` + WHERE + date = @submission_date + AND hit_type = 'PAGE' + -- Match page paths like "/{locale}/firefox/{version}/whatsnew/..." + -- Version regular expression is adapted from https://github.com/mozilla/bedrock/blob/main/bedrock/releasenotes/__init__.py + AND page_level_1 = 'firefox' + AND REGEXP_CONTAINS(page_level_2, r'^\d{1,3}(\.\d{1,3}){1,3}((a|b(eta)?)\d*)?(pre\d*)?(esr)?$') + AND page_level_3 = 'whatsnew' +) +SELECT + date, + country, + locale, + version, + version_info.major_version, + version_info.minor_version, + version_info.patch_revision, + version_info.is_major_release, + COUNT(DISTINCT visit_identifier) AS visits, + COUNT(DISTINCT CASE WHEN is_bounce = TRUE THEN visit_identifier END) AS bounces +FROM + wnp_visits +GROUP BY + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8 diff --git a/sql/moz-fx-data-shared-prod/mozilla_org_derived/firefox_whatsnew_summary_v1/schema.yaml b/sql/moz-fx-data-shared-prod/mozilla_org_derived/firefox_whatsnew_summary_v1/schema.yaml new file mode 100644 index 0000000000..a9feee8c37 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/mozilla_org_derived/firefox_whatsnew_summary_v1/schema.yaml @@ -0,0 +1,31 @@ +fields: +- mode: NULLABLE + name: date + type: DATE +- mode: NULLABLE + name: country + type: STRING +- mode: NULLABLE + name: locale + type: STRING +- mode: NULLABLE + name: version + type: STRING +- mode: NULLABLE + name: major_version + type: NUMERIC +- mode: NULLABLE + name: minor_version + type: NUMERIC +- mode: NULLABLE + name: patch_revision + type: NUMERIC +- mode: NULLABLE + name: is_major_release + type: BOOLEAN +- mode: NULLABLE + name: visits + type: INTEGER +- mode: NULLABLE + name: bounces + type: INTEGER diff --git a/sql/moz-fx-data-shared-prod/mozilla_org_derived/www_site_hits_v1/metadata.yaml b/sql/moz-fx-data-shared-prod/mozilla_org_derived/www_site_hits_v1/metadata.yaml new file mode 100644 index 0000000000..68f29226b3 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/mozilla_org_derived/www_site_hits_v1/metadata.yaml @@ -0,0 +1,21 @@ +friendly_name: WWW Site Hits +description: |- + Normalized individual hits for www.mozilla.org +owners: +- ascholtz@mozilla.com +labels: + incremental: true + schedule: daily + dag: bqetl_google_analytics_derived + owner1: ascholtz +scheduling: + dag_name: bqetl_google_analytics_derived +bigquery: + time_partitioning: + type: day + field: date + require_partition_filter: false + expiration_days: null + clustering: null +references: {} +deprecated: true diff --git a/sql/moz-fx-data-shared-prod/mozilla_org_derived/www_site_hits_v1/query.sql b/sql/moz-fx-data-shared-prod/mozilla_org_derived/www_site_hits_v1/query.sql new file mode 100644 index 0000000000..93ab865744 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/mozilla_org_derived/www_site_hits_v1/query.sql @@ -0,0 +1,75 @@ +WITH hits AS ( + SELECT + PARSE_DATE('%Y%m%d', date) AS date, + CONCAT(CAST(fullVisitorId AS STRING), CAST(visitId AS STRING)) AS visit_identifier, + fullVisitorId AS full_visitor_id, + visitStartTime AS visit_start_time, + hit.page.pagePath AS page_path, + hit.page.pagePathLevel1 AS page_path_level1, + -- splitting the pagePath to make it easier to filter on pages in dashboards + SPLIT(SPLIT(hit.page.pagePath, '?')[OFFSET(0)], '/') AS split_page_path, + hit.type AS hit_type, + hit.isExit AS is_exit, + hit.isEntrance AS is_entrance, + hit.hitNumber AS hit_number, + hit.eventInfo.eventCategory AS event_category, + hit.eventInfo.eventLabel AS event_label, + hit.eventInfo.eventAction AS event_action, + device.deviceCategory AS device_category, + device.operatingSystem AS operating_system, + device.language, + device.browser, + SPLIT(device.browserVersion, '.')[OFFSET(0)] AS browser_version, + geoNetwork.country, + trafficSource.source, + trafficSource.medium, + trafficSource.campaign, + trafficSource.adContent AS ad_content, + totals.visits, + totals.bounces, + hit.time / 1000 AS hit_time, + MIN(IF(hit.isInteraction IS NOT NULL, hit.hitNumber, 0)) OVER ( + PARTITION BY + fullVisitorId, + visitStartTime + ) AS first_interaction, + MAX(IF(hit.isInteraction IS NOT NULL, hit.time / 1000, 0)) OVER ( + PARTITION BY + fullVisitorId, + visitStartTime + ) AS last_interaction, + IF(hit.isEntrance IS NOT NULL, 1, 0) AS entrances, + IF(hit.isExit IS NOT NULL, 1, 0) AS exits, + CONCAT( + hit.eventInfo.eventCategory, + COALESCE(hit.eventInfo.eventaction, ''), + COALESCE(hit.eventInfo.eventLabel, '') + ) AS event_id, + FROM + `moz-fx-data-marketing-prod.65789850.ga_sessions_*` + CROSS JOIN + UNNEST(hits) AS hit + WHERE + _TABLE_SUFFIX = FORMAT_DATE('%Y%m%d', @submission_date) +), +page_levels AS ( + SELECT + * EXCEPT (split_page_path), + split_page_path[SAFE_OFFSET(2)] AS page_level_1, + split_page_path[SAFE_OFFSET(3)] AS page_level_2, + split_page_path[SAFE_OFFSET(4)] AS page_level_3, + split_page_path[SAFE_OFFSET(5)] AS page_level_4, + split_page_path[SAFE_OFFSET(6)] AS page_level_5, + FROM + hits +) +SELECT + *, + -- Page name without locale and query string + IF( + page_level_2 IS NULL, + CONCAT('/', page_level_1, '/'), + ARRAY_TO_STRING(['', page_level_1, page_level_2, page_level_3, page_level_4, page_level_5], '/') + ) AS page_name, +FROM + page_levels diff --git a/sql/moz-fx-data-shared-prod/mozilla_org_derived/www_site_hits_v1/schema.yaml b/sql/moz-fx-data-shared-prod/mozilla_org_derived/www_site_hits_v1/schema.yaml new file mode 100644 index 0000000000..d814d4bb1f --- /dev/null +++ b/sql/moz-fx-data-shared-prod/mozilla_org_derived/www_site_hits_v1/schema.yaml @@ -0,0 +1,112 @@ +fields: +- name: date + type: DATE + mode: NULLABLE +- name: visit_identifier + type: STRING + mode: NULLABLE +- name: full_visitor_id + type: STRING + mode: NULLABLE +- name: visit_start_time + type: INTEGER + mode: NULLABLE +- name: page_path + type: STRING + mode: NULLABLE +- name: page_path_level1 + type: STRING + mode: NULLABLE +- name: hit_type + type: STRING + mode: NULLABLE +- name: is_exit + type: BOOLEAN + mode: NULLABLE +- name: is_entrance + type: BOOLEAN + mode: NULLABLE +- name: hit_number + type: INTEGER + mode: NULLABLE +- name: event_category + type: STRING + mode: NULLABLE +- name: event_label + type: STRING + mode: NULLABLE +- name: event_action + type: STRING + mode: NULLABLE +- name: device_category + type: STRING + mode: NULLABLE +- name: operating_system + type: STRING + mode: NULLABLE +- name: language + type: STRING + mode: NULLABLE +- name: browser + type: STRING + mode: NULLABLE +- name: browser_version + type: STRING + mode: NULLABLE +- name: country + type: STRING + mode: NULLABLE +- name: source + type: STRING + mode: NULLABLE +- name: medium + type: STRING + mode: NULLABLE +- name: campaign + type: STRING + mode: NULLABLE +- name: ad_content + type: STRING + mode: NULLABLE +- name: visits + type: INTEGER + mode: NULLABLE +- name: bounces + type: INTEGER + mode: NULLABLE +- name: hit_time + type: FLOAT + mode: NULLABLE +- name: first_interaction + type: INTEGER + mode: NULLABLE +- name: last_interaction + type: FLOAT + mode: NULLABLE +- name: entrances + type: INTEGER + mode: NULLABLE +- name: exits + type: INTEGER + mode: NULLABLE +- name: event_id + type: STRING + mode: NULLABLE +- name: page_level_1 + type: STRING + mode: NULLABLE +- name: page_level_2 + type: STRING + mode: NULLABLE +- name: page_level_3 + type: STRING + mode: NULLABLE +- name: page_level_4 + type: STRING + mode: NULLABLE +- name: page_level_5 + type: STRING + mode: NULLABLE +- name: page_name + type: STRING + mode: NULLABLE