Add `fxa_delete_events_v2` ETL based on FxA logs from GCP (#4843)

* Add `fxa_delete_events_v2` ETL based on FxA logs from GCP.

* Add `fxa_delete_events` view combining `fxa_delete_events_v1` and `fxa_delete_events_v2` data.

* Use `fxa_delete_events` view for Shredder.

* Update sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_delete_events_v2/metadata.yaml

---------

Co-authored-by: kik-kik <42538694+kik-kik@users.noreply.github.com>
This commit is contained in:
Sean Rose 2024-01-17 05:34:54 -08:00 коммит произвёл GitHub
Родитель 369b05aa26
Коммит 9aea89370b
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
7 изменённых файлов: 93 добавлений и 5 удалений

Просмотреть файл

@ -101,11 +101,9 @@ FIREFOX_IOS_SRC = DeleteSource(
table="firefox_ios.deletion_request", field=GLEAN_CLIENT_ID
)
FXA_HMAC_SRC = DeleteSource(
table="firefox_accounts_derived.fxa_delete_events_v1", field="hmac_user_id"
)
FXA_SRC = DeleteSource(
table="firefox_accounts_derived.fxa_delete_events_v1", field=USER_ID
table="firefox_accounts.fxa_delete_events", field="hmac_user_id"
)
FXA_SRC = DeleteSource(table="firefox_accounts.fxa_delete_events", field=USER_ID)
REGRETS_SRC = DeleteSource(
table="regrets_reporter_stable.regrets_reporter_update_v1",
field="data_deletion_request.extension_installation_uuid",
@ -118,7 +116,7 @@ SYNC_SOURCES = (
field="payload.scalars.parent.deletion_request_sync_device_id",
),
DeleteSource(
table="firefox_accounts_derived.fxa_delete_events_v1",
table="firefox_accounts.fxa_delete_events",
field="SUBSTR(hmac_user_id, 0, 32)",
),
)

Просмотреть файл

@ -62,6 +62,7 @@ dry_run:
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_auth_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_delete_events_v1/init.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_delete_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_delete_events_v2/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_oauth_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_log_auth_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_log_content_events_v1/query.sql

Просмотреть файл

@ -0,0 +1,16 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.firefox_accounts.fxa_delete_events`
AS
SELECT
submission_timestamp,
user_id,
hmac_user_id
FROM
`moz-fx-data-shared-prod.firefox_accounts_derived.fxa_delete_events_v1`
UNION ALL
SELECT
submission_timestamp,
user_id,
hmac_user_id
FROM
`moz-fx-data-shared-prod.firefox_accounts_derived.fxa_delete_events_v2`

Просмотреть файл

@ -0,0 +1,10 @@
fields:
- name: submission_timestamp
type: TIMESTAMP
mode: NULLABLE
- name: user_id
type: STRING
mode: NULLABLE
- name: hmac_user_id
type: STRING
mode: NULLABLE

Просмотреть файл

@ -0,0 +1,23 @@
---
friendly_name: FxA Delete Events
description: Deletion events extracted from FxA auth server logs
used as signal for Mozilla to delete analysis data associated with
the user
owners:
- kik@mozilla.com
labels:
application: fxa
incremental: true
schedule: daily
scheduling:
dag_name: bqetl_fxa_events
# This query references secret keys that are not available for dry runs,
# so we must explicitly write out dependencies. In this case, the query
# depends only on fxa logs produced via Cloud Monitoring integration, so no other
# scheduled tasks are involved and the referenced_tables list is empty.
referenced_tables: []
bigquery:
time_partitioning:
type: day
field: submission_timestamp
require_partition_filter: false

Просмотреть файл

@ -0,0 +1,30 @@
WITH hmac_key AS (
SELECT
AEAD.DECRYPT_BYTES(
(SELECT keyset FROM `moz-fx-dataops-secrets.airflow_query_keys.fxa_prod`),
ciphertext,
CAST(key_id AS BYTES)
) AS value
FROM
`moz-fx-data-shared-prod.firefox_accounts_derived.encrypted_keys_v1`
WHERE
key_id = 'fxa_hmac_prod'
)
SELECT
`timestamp` AS submission_timestamp,
TO_HEX(SHA256(jsonPayload.fields.uid)) AS user_id,
TO_HEX(
udf.hmac_sha256((SELECT * FROM hmac_key), CAST(jsonPayload.fields.uid AS BYTES))
) AS hmac_user_id,
FROM
`moz-fx-fxa-prod.gke_fxa_prod_log.stderr`
WHERE
(
DATE(_PARTITIONTIME)
BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY)
AND DATE_ADD(@submission_date, INTERVAL 1 DAY)
)
AND DATE(`timestamp`) = @submission_date
AND jsonPayload.type = 'activityEvent'
AND jsonPayload.fields.event = 'account.deleted'
AND jsonPayload.fields.uid IS NOT NULL

Просмотреть файл

@ -0,0 +1,10 @@
fields:
- name: submission_timestamp
type: TIMESTAMP
mode: NULLABLE
- name: user_id
type: STRING
mode: NULLABLE
- name: hmac_user_id
type: STRING
mode: NULLABLE