Add `fxa_delete_events_v2` ETL based on FxA logs from GCP (#4843)
* Add `fxa_delete_events_v2` ETL based on FxA logs from GCP. * Add `fxa_delete_events` view combining `fxa_delete_events_v1` and `fxa_delete_events_v2` data. * Use `fxa_delete_events` view for Shredder. * Update sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_delete_events_v2/metadata.yaml --------- Co-authored-by: kik-kik <42538694+kik-kik@users.noreply.github.com>
This commit is contained in:
Родитель
369b05aa26
Коммит
9aea89370b
|
@ -101,11 +101,9 @@ FIREFOX_IOS_SRC = DeleteSource(
|
|||
table="firefox_ios.deletion_request", field=GLEAN_CLIENT_ID
|
||||
)
|
||||
FXA_HMAC_SRC = DeleteSource(
|
||||
table="firefox_accounts_derived.fxa_delete_events_v1", field="hmac_user_id"
|
||||
)
|
||||
FXA_SRC = DeleteSource(
|
||||
table="firefox_accounts_derived.fxa_delete_events_v1", field=USER_ID
|
||||
table="firefox_accounts.fxa_delete_events", field="hmac_user_id"
|
||||
)
|
||||
FXA_SRC = DeleteSource(table="firefox_accounts.fxa_delete_events", field=USER_ID)
|
||||
REGRETS_SRC = DeleteSource(
|
||||
table="regrets_reporter_stable.regrets_reporter_update_v1",
|
||||
field="data_deletion_request.extension_installation_uuid",
|
||||
|
@ -118,7 +116,7 @@ SYNC_SOURCES = (
|
|||
field="payload.scalars.parent.deletion_request_sync_device_id",
|
||||
),
|
||||
DeleteSource(
|
||||
table="firefox_accounts_derived.fxa_delete_events_v1",
|
||||
table="firefox_accounts.fxa_delete_events",
|
||||
field="SUBSTR(hmac_user_id, 0, 32)",
|
||||
),
|
||||
)
|
||||
|
|
|
@ -62,6 +62,7 @@ dry_run:
|
|||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_auth_events_v1/query.sql
|
||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_delete_events_v1/init.sql
|
||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_delete_events_v1/query.sql
|
||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_delete_events_v2/query.sql
|
||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_oauth_events_v1/query.sql
|
||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_log_auth_events_v1/query.sql
|
||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_log_content_events_v1/query.sql
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`moz-fx-data-shared-prod.firefox_accounts.fxa_delete_events`
|
||||
AS
|
||||
SELECT
|
||||
submission_timestamp,
|
||||
user_id,
|
||||
hmac_user_id
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.firefox_accounts_derived.fxa_delete_events_v1`
|
||||
UNION ALL
|
||||
SELECT
|
||||
submission_timestamp,
|
||||
user_id,
|
||||
hmac_user_id
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.firefox_accounts_derived.fxa_delete_events_v2`
|
|
@ -0,0 +1,10 @@
|
|||
fields:
|
||||
- name: submission_timestamp
|
||||
type: TIMESTAMP
|
||||
mode: NULLABLE
|
||||
- name: user_id
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: hmac_user_id
|
||||
type: STRING
|
||||
mode: NULLABLE
|
|
@ -0,0 +1,23 @@
|
|||
---
|
||||
friendly_name: FxA Delete Events
|
||||
description: Deletion events extracted from FxA auth server logs
|
||||
used as signal for Mozilla to delete analysis data associated with
|
||||
the user
|
||||
owners:
|
||||
- kik@mozilla.com
|
||||
labels:
|
||||
application: fxa
|
||||
incremental: true
|
||||
schedule: daily
|
||||
scheduling:
|
||||
dag_name: bqetl_fxa_events
|
||||
# This query references secret keys that are not available for dry runs,
|
||||
# so we must explicitly write out dependencies. In this case, the query
|
||||
# depends only on fxa logs produced via Cloud Monitoring integration, so no other
|
||||
# scheduled tasks are involved and the referenced_tables list is empty.
|
||||
referenced_tables: []
|
||||
bigquery:
|
||||
time_partitioning:
|
||||
type: day
|
||||
field: submission_timestamp
|
||||
require_partition_filter: false
|
|
@ -0,0 +1,30 @@
|
|||
WITH hmac_key AS (
|
||||
SELECT
|
||||
AEAD.DECRYPT_BYTES(
|
||||
(SELECT keyset FROM `moz-fx-dataops-secrets.airflow_query_keys.fxa_prod`),
|
||||
ciphertext,
|
||||
CAST(key_id AS BYTES)
|
||||
) AS value
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.firefox_accounts_derived.encrypted_keys_v1`
|
||||
WHERE
|
||||
key_id = 'fxa_hmac_prod'
|
||||
)
|
||||
SELECT
|
||||
`timestamp` AS submission_timestamp,
|
||||
TO_HEX(SHA256(jsonPayload.fields.uid)) AS user_id,
|
||||
TO_HEX(
|
||||
udf.hmac_sha256((SELECT * FROM hmac_key), CAST(jsonPayload.fields.uid AS BYTES))
|
||||
) AS hmac_user_id,
|
||||
FROM
|
||||
`moz-fx-fxa-prod.gke_fxa_prod_log.stderr`
|
||||
WHERE
|
||||
(
|
||||
DATE(_PARTITIONTIME)
|
||||
BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY)
|
||||
AND DATE_ADD(@submission_date, INTERVAL 1 DAY)
|
||||
)
|
||||
AND DATE(`timestamp`) = @submission_date
|
||||
AND jsonPayload.type = 'activityEvent'
|
||||
AND jsonPayload.fields.event = 'account.deleted'
|
||||
AND jsonPayload.fields.uid IS NOT NULL
|
|
@ -0,0 +1,10 @@
|
|||
fields:
|
||||
- name: submission_timestamp
|
||||
type: TIMESTAMP
|
||||
mode: NULLABLE
|
||||
- name: user_id
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: hmac_user_id
|
||||
type: STRING
|
||||
mode: NULLABLE
|
Загрузка…
Ссылка в новой задаче