Bug 1635906 Add client ID lookup table for AET (#1335)
* Bug 1635906 Add client ID lookup table for AET Builds on top of the eco_uid lookup table added in #1323 * Exempt from dry run
This commit is contained in:
Родитель
7e4054fb5e
Коммит
504c007d6c
|
@ -20,6 +20,7 @@ import sys
|
|||
|
||||
SKIP = {
|
||||
# Access Denied
|
||||
"sql/account_ecosystem_derived/ecosystem_client_id_lookup_v1/query.sql",
|
||||
"sql/activity_stream/impression_stats_flat/view.sql",
|
||||
"sql/activity_stream/tile_id_types/view.sql",
|
||||
"sql/monitoring/deletion_request_volume_v1/query.sql",
|
||||
|
|
|
@ -20,6 +20,18 @@ with DAG(
|
|||
"bqetl_account_ecosystem", default_args=default_args, schedule_interval="0 2 * * *"
|
||||
) as dag:
|
||||
|
||||
account_ecosystem_derived__ecosystem_client_id_lookup__v1 = bigquery_etl_query(
|
||||
task_id="account_ecosystem_derived__ecosystem_client_id_lookup__v1",
|
||||
destination_table="ecosystem_client_id_lookup_v1",
|
||||
dataset_id="account_ecosystem_derived",
|
||||
project_id="moz-fx-data-shared-prod",
|
||||
owner="jklukas@mozilla.com",
|
||||
email=["jklukas@mozilla.com"],
|
||||
date_partition_parameter="submission_date",
|
||||
depends_on_past=True,
|
||||
dag=dag,
|
||||
)
|
||||
|
||||
account_ecosystem_derived__ecosystem_user_id_lookup__v1 = bigquery_etl_query(
|
||||
task_id="account_ecosystem_derived__ecosystem_user_id_lookup__v1",
|
||||
destination_table="ecosystem_user_id_lookup_v1",
|
||||
|
@ -34,6 +46,9 @@ with DAG(
|
|||
dag=dag,
|
||||
)
|
||||
|
||||
account_ecosystem_derived__ecosystem_client_id_lookup__v1.set_upstream(
|
||||
account_ecosystem_derived__ecosystem_user_id_lookup__v1
|
||||
)
|
||||
wait_for_copy_deduplicate_all = ExternalTaskSensor(
|
||||
task_id="wait_for_copy_deduplicate_all",
|
||||
external_dag_id="copy_deduplicate",
|
||||
|
@ -44,6 +59,10 @@ with DAG(
|
|||
pool="DATA_ENG_EXTERNALTASKSENSOR",
|
||||
)
|
||||
|
||||
account_ecosystem_derived__ecosystem_client_id_lookup__v1.set_upstream(
|
||||
wait_for_copy_deduplicate_all
|
||||
)
|
||||
|
||||
account_ecosystem_derived__ecosystem_user_id_lookup__v1.set_upstream(
|
||||
wait_for_copy_deduplicate_all
|
||||
)
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
CREATE TABLE IF NOT EXISTS
|
||||
`moz-fx-data-shared-prod.account_ecosystem_derived.ecosystem_client_id_lookup_v1`(
|
||||
ecosystem_client_id_hash STRING NOT NULL,
|
||||
canonical_id STRING NOT NULL,
|
||||
first_seen_date DATE NOT NULL
|
||||
)
|
||||
PARTITION BY
|
||||
first_seen_date
|
|
@ -0,0 +1,17 @@
|
|||
friendly_name: Ecosystem Client ID Lookup
|
||||
description: >
|
||||
Lookup table of ecosystem_client_id_hash to canonical_id.
|
||||
owners:
|
||||
- jklukas@mozilla.com
|
||||
labels:
|
||||
application: aet
|
||||
schedule: daily
|
||||
incremental: true
|
||||
scheduling:
|
||||
dag_name: bqetl_account_ecosystem
|
||||
depends_on_past: True
|
||||
# We access a restricted table for getting an HMAC key, so cannot dry run
|
||||
# and must explicitly list referenced tables.
|
||||
referenced_tables:
|
||||
- ['telemetry_stable', 'account_ecosystem_v4']
|
||||
- ['account_ecosystem_derived', 'ecosystem_user_id_lookup_v1']
|
|
@ -0,0 +1,50 @@
|
|||
WITH hmac_key AS (
|
||||
SELECT
|
||||
AEAD.DECRYPT_BYTES(
|
||||
(SELECT keyset FROM `moz-fx-dataops-secrets.airflow_query_keys.aet_prod`),
|
||||
ciphertext,
|
||||
CAST(key_id AS BYTES)
|
||||
) AS value
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.account_ecosystem_restricted.encrypted_keys_v1`
|
||||
WHERE
|
||||
key_id = 'aet_hmac_prod'
|
||||
),
|
||||
unioned AS (
|
||||
SELECT
|
||||
submission_timestamp,
|
||||
payload.ecosystem_user_id,
|
||||
payload.ecosystem_client_id,
|
||||
FROM
|
||||
telemetry_stable.account_ecosystem_v4
|
||||
-- As we add AET to additional applications, they will be added here via UNION ALL
|
||||
-- and also added to the list of referenced tables in metadata.yaml
|
||||
),
|
||||
hashed AS (
|
||||
SELECT
|
||||
DISTINCT TO_HEX(
|
||||
udf.hmac_sha256((SELECT * FROM hmac_key), CAST(ecosystem_client_id AS BYTES))
|
||||
) AS ecosystem_client_id_hash,
|
||||
ecosystem_user_id,
|
||||
DATE(submission_timestamp) AS first_seen_date,
|
||||
FROM
|
||||
unioned
|
||||
WHERE
|
||||
DATE(submission_timestamp) = @submission_date
|
||||
)
|
||||
SELECT
|
||||
ecosystem_client_id_hash,
|
||||
euil.canonical_id,
|
||||
hashed.first_seen_date,
|
||||
FROM
|
||||
hashed
|
||||
JOIN
|
||||
ecosystem_user_id_lookup_v1 AS euil
|
||||
USING
|
||||
(ecosystem_user_id)
|
||||
LEFT JOIN
|
||||
ecosystem_client_id_lookup_v1 AS existing
|
||||
USING
|
||||
(ecosystem_client_id_hash)
|
||||
WHERE
|
||||
existing.ecosystem_client_id_hash IS NULL
|
Загрузка…
Ссылка в новой задаче