Add derived stub attribution logs (#4557)

* Add derived stub attribution logs

This table keeps triplets from the stub attribution logs.
The triplet of (dl_token, ga_client_id, stub_session_id)
will only ever appear once here.

See the associated decision brief:
https://docs.google.com/document/d/1L4vOR0nCGawwSRPA9xiR8Hmu_8ozCGUecXAtBWmGGA0/edit

* Move stub attribution table to new dataset

In order to ensure limited access to the stub attribution service
data without significantly decreasing developer velocity, we
move these tables to a new dataset. That dataset has the defaults
we want for all stub attribution log data:
- Defaults to just read access to data-science/DUET workgroup
- No read/write access for DE

We will backfill via the bqetl_backfill DAG.

* Rename view

* Use correct dataset name in view

* Skip dryrun; no access
This commit is contained in:
Frank Bertsch 2023-11-17 16:36:48 -05:00 коммит произвёл GitHub
Родитель 5cf8d30153
Коммит 104ece82d9
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
14 изменённых файлов: 225 добавлений и 0 удалений

Просмотреть файл

@ -177,6 +177,7 @@ dry_run:
- sql/moz-fx-data-shared-prod/fenix/installs_by_country/view.sql
- sql/moz-fx-data-shared-prod/firefox_desktop/top_sites/view.sql
- sql/moz-fx-data-shared-prod/firefox_desktop/quick_suggest/view.sql
- sql/moz-fx-data-shared-prod/stub_attribution_service_derived/dl_token_ga_attribution_lookup_v1/query.sql
# Materialized views
- sql/moz-fx-data-shared-prod/telemetry_derived/experiment_search_events_live_v1/init.sql
- sql/moz-fx-data-shared-prod/telemetry_derived/experiment_events_live_v1/init.sql

Просмотреть файл

@ -0,0 +1,14 @@
friendly_name: Stub Attribution Service
description: |-
Stub attribution service data, usually from the logs.
dataset_base_acl: view_restricted
user_facing: true
labels: {}
default_table_workgroup_access:
- role: roles/bigquery.dataViewer
members:
- workgroup:data-science/duet
workgroup_access:
- role: roles/bigquery.dataViewer
members:
- workgroup:data-science/duet

Просмотреть файл

@ -0,0 +1,7 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.stub_attribution_service.dl_token_ga_attribution_lookup`
AS
SELECT
*
FROM
`moz-fx-data-shared-prod.stub_attribution_service_derived.dl_token_ga_attribution_lookup_v1`

Просмотреть файл

@ -0,0 +1,15 @@
friendly_name: Stub Attribution Service Derived
description: |-
Stub Attribution Service data.
Separated into a new dataset to ensure correct workgroup access.
dataset_base_acl: derived_restricted
user_facing: false
labels: {}
default_table_workgroup_access:
- role: roles/bigquery.dataViewer
members:
- workgroup:data-science/duet
workgroup_access:
- role: roles/bigquery.dataViewer
members:
- workgroup:data-science/duet

Просмотреть файл

@ -0,0 +1,6 @@
#fail
{{ is_unique(['dl_token', 'ga_client_id', 'stub_session_id']) }}
#fail
{{ min_row_count(1000) }}

Просмотреть файл

@ -0,0 +1,23 @@
friendly_name: DL Token GA Attribution Lookup
description: |-
This table lets you lookup GA attribution data for dl_tokens.
1 row per-(dl_token, ga_client_id, stub_session_id) triplet.
dl_token - Available in Stub Attribution Service and Telemetry
ga_client_id - Available in Stub Attribution Service and GA
stub_session_id - Available in Stub Attribution Service and GA
owners:
- frank@mozilla.com
labels:
incremental: true
owner1: frank@mozilla.com
scheduling:
dag_name: bqetl_mozilla_org_derived
date_partition_parameter: null
parameters: ["download_date:DATE:{{ds}}"]
bigquery:
clustering:
fields: [first_seen_date]
references: {}
deprecated: false

Просмотреть файл

@ -0,0 +1,35 @@
WITH historical_triplets AS (
SELECT
dl_token,
ga_client_id,
stub_session_id,
first_seen_date,
FROM
stub_attribution_service_derived.dl_token_ga_attribution_lookup_v1
),
new_downloads AS (
SELECT DISTINCT
mozfun.ga.nullify_string(jsonPayload.fields.dltoken) AS dl_token,
mozfun.ga.nullify_string(jsonPayload.fields.visit_id) AS ga_client_id,
mozfun.ga.nullify_string(jsonPayload.fields.session_id) AS stub_session_id,
@download_date AS first_seen_date,
FROM
`moz-fx-stubattribut-prod-32a5`.stubattribution_prod.stdout
WHERE
DATE(timestamp) = @download_date
)
SELECT
dl_token,
ga_client_id,
stub_session_id,
-- Least and greatest return NULL if any input is NULL, so we coalesce each value first
LEAST(
COALESCE(_previous.first_seen_date, _current.first_seen_date),
COALESCE(_current.first_seen_date, _previous.first_seen_date)
) AS first_seen_date,
FROM
historical_triplets AS _previous
FULL OUTER JOIN
new_downloads AS _current
USING
(dl_token, ga_client_id, stub_session_id)

Просмотреть файл

@ -0,0 +1,17 @@
fields:
- name: dl_token
mode: NULLABLE
type: STRING
description: "A download token (dl_token). Associated with a single Firefox binary generated by the stub attribution service."
- name: ga_client_id
mode: NULLABLE
type: STRING
description: "Uniquely identifiers a GA client, using a cookie on moz.org."
- name: stub_session_id
mode: NULLABLE
type: STRING
description: "An ID identifying a single stub attribution session. Can be found in GA logs, in the 'Stub Session ID' Event."
- name: first_seen_date
mode: NULLABLE
type: DATE
description: "The first date we saw this triplet."

Просмотреть файл

@ -0,0 +1,46 @@
[
{
"fields": [
{
"fields": [
{
"mode": "NULLABLE",
"name": "log_type",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "visit_id",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "dltoken",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "session_id",
"type": "STRING"
}
],
"mode": "NULLABLE",
"name": "fields",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "timestamp",
"type": "FLOAT"
}
],
"mode": "NULLABLE",
"name": "jsonPayload",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "timestamp",
"type": "TIMESTAMP"
}
]

Просмотреть файл

@ -0,0 +1,16 @@
- name: dl_token
mode: NULLABLE
type: STRING
description: "A download token (dl_token). Associated with a single Firefox binary generated by the stub attribution service."
- name: ga_client_id
mode: NULLABLE
type: STRING
description: "Uniquely identifiers a GA client, using a cookie on moz.org."
- name: stub_session_id
mode: NULLABLE
type: STRING
description: "An ID identifying a single stub attribution session. Can be found in GA logs, in the 'Stub Session ID' Event."
- name: first_seen_date
mode: NULLABLE
type: DATE
description: "The first date we saw this triplet."

Просмотреть файл

@ -0,0 +1,12 @@
- dl_token: dltoken_1
ga_client_id: ga_client_id_1
stub_session_id: stub_session_id_1
first_seen_date: 2023-03-31
- dl_token: dltoken_2
ga_client_id: also_present_today
stub_session_id: stub_session_id_2
first_seen_date: 2023-01-01
- dl_token: dltoken_3
ga_client_id: only_present_historically
stub_session_id: stub_session_id_3
first_seen_date: 2023-01-01

Просмотреть файл

@ -0,0 +1,21 @@
- jsonPayload:
fields:
visit_id: ga_client_id_1
dltoken: dltoken_1
session_id: stub_session_id_1
log_type: download_started
timestamp: '2023-03-31 01:16:43.101135 UTC'
- jsonPayload:
fields:
visit_id: ga_client_id_1
dltoken: dltoken_1
session_id: stub_session_id_1
log_type: download_started
timestamp: '2023-03-31 01:16:43.101135 UTC'
- jsonPayload:
fields:
visit_id: also_present_today
dltoken: dltoken_2
session_id: stub_session_id_2
log_type: download_started
timestamp: '2023-03-31 01:16:43.101135 UTC'

Просмотреть файл

@ -0,0 +1,4 @@
---
- name: download_date
type: DATE
value: 2023-03-31

Просмотреть файл

@ -0,0 +1,8 @@
- dl_token: dltoken_3
ga_client_id: only_present_historically
stub_session_id: stub_session_id_3
first_seen_date: 2023-01-01
- dl_token: dltoken_2
ga_client_id: also_present_today
stub_session_id: stub_session_id_2
first_seen_date: 2023-01-01