Add derived stub attribution logs (#4557)
* Add derived stub attribution logs This table keeps triplets from the stub attribution logs. The triplet of (dl_token, ga_client_id, stub_session_id) will only ever appear once here. See the associated decision brief: https://docs.google.com/document/d/1L4vOR0nCGawwSRPA9xiR8Hmu_8ozCGUecXAtBWmGGA0/edit * Move stub attribution table to new dataset In order to ensure limited access to the stub attribution service data without significantly decreasing developer velocity, we move these tables to a new dataset. That dataset has the defaults we want for all stub attribution log data: - Defaults to just read access to data-science/DUET workgroup - No read/write access for DE We will backfill via the bqetl_backfill DAG. * Rename view * Use correct dataset name in view * Skip dryrun; no access
This commit is contained in:
Родитель
5cf8d30153
Коммит
104ece82d9
|
@ -177,6 +177,7 @@ dry_run:
|
|||
- sql/moz-fx-data-shared-prod/fenix/installs_by_country/view.sql
|
||||
- sql/moz-fx-data-shared-prod/firefox_desktop/top_sites/view.sql
|
||||
- sql/moz-fx-data-shared-prod/firefox_desktop/quick_suggest/view.sql
|
||||
- sql/moz-fx-data-shared-prod/stub_attribution_service_derived/dl_token_ga_attribution_lookup_v1/query.sql
|
||||
# Materialized views
|
||||
- sql/moz-fx-data-shared-prod/telemetry_derived/experiment_search_events_live_v1/init.sql
|
||||
- sql/moz-fx-data-shared-prod/telemetry_derived/experiment_events_live_v1/init.sql
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
friendly_name: Stub Attribution Service
|
||||
description: |-
|
||||
Stub attribution service data, usually from the logs.
|
||||
dataset_base_acl: view_restricted
|
||||
user_facing: true
|
||||
labels: {}
|
||||
default_table_workgroup_access:
|
||||
- role: roles/bigquery.dataViewer
|
||||
members:
|
||||
- workgroup:data-science/duet
|
||||
workgroup_access:
|
||||
- role: roles/bigquery.dataViewer
|
||||
members:
|
||||
- workgroup:data-science/duet
|
|
@ -0,0 +1,7 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`moz-fx-data-shared-prod.stub_attribution_service.dl_token_ga_attribution_lookup`
|
||||
AS
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.stub_attribution_service_derived.dl_token_ga_attribution_lookup_v1`
|
|
@ -0,0 +1,15 @@
|
|||
friendly_name: Stub Attribution Service Derived
|
||||
description: |-
|
||||
Stub Attribution Service data.
|
||||
Separated into a new dataset to ensure correct workgroup access.
|
||||
dataset_base_acl: derived_restricted
|
||||
user_facing: false
|
||||
labels: {}
|
||||
default_table_workgroup_access:
|
||||
- role: roles/bigquery.dataViewer
|
||||
members:
|
||||
- workgroup:data-science/duet
|
||||
workgroup_access:
|
||||
- role: roles/bigquery.dataViewer
|
||||
members:
|
||||
- workgroup:data-science/duet
|
|
@ -0,0 +1,6 @@
|
|||
#fail
|
||||
{{ is_unique(['dl_token', 'ga_client_id', 'stub_session_id']) }}
|
||||
|
||||
#fail
|
||||
{{ min_row_count(1000) }}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
friendly_name: DL Token GA Attribution Lookup
|
||||
description: |-
|
||||
This table lets you lookup GA attribution data for dl_tokens.
|
||||
|
||||
1 row per-(dl_token, ga_client_id, stub_session_id) triplet.
|
||||
|
||||
dl_token - Available in Stub Attribution Service and Telemetry
|
||||
ga_client_id - Available in Stub Attribution Service and GA
|
||||
stub_session_id - Available in Stub Attribution Service and GA
|
||||
owners:
|
||||
- frank@mozilla.com
|
||||
labels:
|
||||
incremental: true
|
||||
owner1: frank@mozilla.com
|
||||
scheduling:
|
||||
dag_name: bqetl_mozilla_org_derived
|
||||
date_partition_parameter: null
|
||||
parameters: ["download_date:DATE:{{ds}}"]
|
||||
bigquery:
|
||||
clustering:
|
||||
fields: [first_seen_date]
|
||||
references: {}
|
||||
deprecated: false
|
|
@ -0,0 +1,35 @@
|
|||
WITH historical_triplets AS (
|
||||
SELECT
|
||||
dl_token,
|
||||
ga_client_id,
|
||||
stub_session_id,
|
||||
first_seen_date,
|
||||
FROM
|
||||
stub_attribution_service_derived.dl_token_ga_attribution_lookup_v1
|
||||
),
|
||||
new_downloads AS (
|
||||
SELECT DISTINCT
|
||||
mozfun.ga.nullify_string(jsonPayload.fields.dltoken) AS dl_token,
|
||||
mozfun.ga.nullify_string(jsonPayload.fields.visit_id) AS ga_client_id,
|
||||
mozfun.ga.nullify_string(jsonPayload.fields.session_id) AS stub_session_id,
|
||||
@download_date AS first_seen_date,
|
||||
FROM
|
||||
`moz-fx-stubattribut-prod-32a5`.stubattribution_prod.stdout
|
||||
WHERE
|
||||
DATE(timestamp) = @download_date
|
||||
)
|
||||
SELECT
|
||||
dl_token,
|
||||
ga_client_id,
|
||||
stub_session_id,
|
||||
-- Least and greatest return NULL if any input is NULL, so we coalesce each value first
|
||||
LEAST(
|
||||
COALESCE(_previous.first_seen_date, _current.first_seen_date),
|
||||
COALESCE(_current.first_seen_date, _previous.first_seen_date)
|
||||
) AS first_seen_date,
|
||||
FROM
|
||||
historical_triplets AS _previous
|
||||
FULL OUTER JOIN
|
||||
new_downloads AS _current
|
||||
USING
|
||||
(dl_token, ga_client_id, stub_session_id)
|
|
@ -0,0 +1,17 @@
|
|||
fields:
|
||||
- name: dl_token
|
||||
mode: NULLABLE
|
||||
type: STRING
|
||||
description: "A download token (dl_token). Associated with a single Firefox binary generated by the stub attribution service."
|
||||
- name: ga_client_id
|
||||
mode: NULLABLE
|
||||
type: STRING
|
||||
description: "Uniquely identifiers a GA client, using a cookie on moz.org."
|
||||
- name: stub_session_id
|
||||
mode: NULLABLE
|
||||
type: STRING
|
||||
description: "An ID identifying a single stub attribution session. Can be found in GA logs, in the 'Stub Session ID' Event."
|
||||
- name: first_seen_date
|
||||
mode: NULLABLE
|
||||
type: DATE
|
||||
description: "The first date we saw this triplet."
|
|
@ -0,0 +1,46 @@
|
|||
[
|
||||
{
|
||||
"fields": [
|
||||
{
|
||||
"fields": [
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "log_type",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "visit_id",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "dltoken",
|
||||
"type": "STRING"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "session_id",
|
||||
"type": "STRING"
|
||||
}
|
||||
],
|
||||
"mode": "NULLABLE",
|
||||
"name": "fields",
|
||||
"type": "RECORD"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "timestamp",
|
||||
"type": "FLOAT"
|
||||
}
|
||||
],
|
||||
"mode": "NULLABLE",
|
||||
"name": "jsonPayload",
|
||||
"type": "RECORD"
|
||||
},
|
||||
{
|
||||
"mode": "NULLABLE",
|
||||
"name": "timestamp",
|
||||
"type": "TIMESTAMP"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,16 @@
|
|||
- name: dl_token
|
||||
mode: NULLABLE
|
||||
type: STRING
|
||||
description: "A download token (dl_token). Associated with a single Firefox binary generated by the stub attribution service."
|
||||
- name: ga_client_id
|
||||
mode: NULLABLE
|
||||
type: STRING
|
||||
description: "Uniquely identifiers a GA client, using a cookie on moz.org."
|
||||
- name: stub_session_id
|
||||
mode: NULLABLE
|
||||
type: STRING
|
||||
description: "An ID identifying a single stub attribution session. Can be found in GA logs, in the 'Stub Session ID' Event."
|
||||
- name: first_seen_date
|
||||
mode: NULLABLE
|
||||
type: DATE
|
||||
description: "The first date we saw this triplet."
|
|
@ -0,0 +1,12 @@
|
|||
- dl_token: dltoken_1
|
||||
ga_client_id: ga_client_id_1
|
||||
stub_session_id: stub_session_id_1
|
||||
first_seen_date: 2023-03-31
|
||||
- dl_token: dltoken_2
|
||||
ga_client_id: also_present_today
|
||||
stub_session_id: stub_session_id_2
|
||||
first_seen_date: 2023-01-01
|
||||
- dl_token: dltoken_3
|
||||
ga_client_id: only_present_historically
|
||||
stub_session_id: stub_session_id_3
|
||||
first_seen_date: 2023-01-01
|
|
@ -0,0 +1,21 @@
|
|||
- jsonPayload:
|
||||
fields:
|
||||
visit_id: ga_client_id_1
|
||||
dltoken: dltoken_1
|
||||
session_id: stub_session_id_1
|
||||
log_type: download_started
|
||||
timestamp: '2023-03-31 01:16:43.101135 UTC'
|
||||
- jsonPayload:
|
||||
fields:
|
||||
visit_id: ga_client_id_1
|
||||
dltoken: dltoken_1
|
||||
session_id: stub_session_id_1
|
||||
log_type: download_started
|
||||
timestamp: '2023-03-31 01:16:43.101135 UTC'
|
||||
- jsonPayload:
|
||||
fields:
|
||||
visit_id: also_present_today
|
||||
dltoken: dltoken_2
|
||||
session_id: stub_session_id_2
|
||||
log_type: download_started
|
||||
timestamp: '2023-03-31 01:16:43.101135 UTC'
|
|
@ -0,0 +1,4 @@
|
|||
---
|
||||
- name: download_date
|
||||
type: DATE
|
||||
value: 2023-03-31
|
|
@ -0,0 +1,8 @@
|
|||
- dl_token: dltoken_3
|
||||
ga_client_id: only_present_historically
|
||||
stub_session_id: stub_session_id_3
|
||||
first_seen_date: 2023-01-01
|
||||
- dl_token: dltoken_2
|
||||
ga_client_id: also_present_today
|
||||
stub_session_id: stub_session_id_2
|
||||
first_seen_date: 2023-01-01
|
Загрузка…
Ссылка в новой задаче