DENG-1879 Setup import of emails table from FxA prod CloudSQL (#4494)

This commit is contained in:
akkomar 2023-10-31 10:31:27 +01:00 коммит произвёл GitHub
Родитель 511894d181
Коммит 2754d7d7c0
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 89 добавлений и 0 удалений

Просмотреть файл

@ -50,6 +50,7 @@ dry_run:
- sql/moz-fx-data-shared-prod/accounts_backend_external/nonprod_emails_v1/query.sql
- sql/moz-fx-data-shared-prod/accounts_backend/nonprod_accounts/view.sql
- sql/moz-fx-data-shared-prod/accounts_backend_external/accounts_v1/query.sql
- sql/moz-fx-data-shared-prod/accounts_backend_external/emails_v1/query.sql
- sql/moz-fx-data-shared-prod/accounts_backend/accounts/view.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_content_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_auth_bounce_events_v1/query.sql

Просмотреть файл

@ -58,6 +58,18 @@ with DAG(
task_concurrency=1,
)
accounts_backend_external__emails__v1 = bigquery_etl_query(
task_id="accounts_backend_external__emails__v1",
destination_table="emails_v1",
dataset_id="accounts_backend_external",
project_id="moz-fx-data-shared-prod",
owner="akomar@mozilla.com",
email=["akomar@mozilla.com", "telemetry-alerts@mozilla.com"],
date_partition_parameter=None,
depends_on_past=False,
task_concurrency=1,
)
accounts_backend_external__nonprod_accounts__v1 = bigquery_etl_query(
task_id="accounts_backend_external__nonprod_accounts__v1",
destination_table="nonprod_accounts_v1",

Просмотреть файл

@ -0,0 +1,22 @@
---
friendly_name: Emails table from production FxA database
description: |-
A mirror of the `emails` table from the production FxA CloudSQL database,
updated daily to match the current state of the table.
Some fields in this table are converted to a more user-friendly, BigQuery-native format:
- `uid` is converted from bytes to a hex string
- boolean integer columns are converted to BOOL
- timestamp columns are converted to TIMESTAMP
See https://mozilla.github.io/ecosystem-platform/reference/database-structure#database-fxa
owners:
- akomar@mozilla.com
labels:
application: accounts_backend
schedule: daily
scheduling:
dag_name: bqetl_accounts_backend_external
# destination is the whole table, not a single partition,
# so don't use date_partition_parameter
date_partition_parameter: null
referenced_tables: []

Просмотреть файл

@ -0,0 +1,25 @@
SELECT
id,
normalizedEmail,
email,
TO_HEX(uid) AS uid,
SAFE_CAST(isVerified AS BOOL) AS isVerified,
SAFE_CAST(isPrimary AS BOOL) AS isPrimary,
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(verifiedAt AS INT)) AS verifiedAt,
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(createdAt AS INT)) AS createdAt,
FROM
EXTERNAL_QUERY(
"moz-fx-fxa-prod.us.fxa-rds-prod-prod-fxa",
"""SELECT
id,
normalizedEmail,
email,
uid,
isVerified,
isPrimary,
verifiedAt,
createdAt
FROM
emails
"""
)

Просмотреть файл

@ -0,0 +1,29 @@
fields:
- name: id
type: INTEGER
mode: NULLABLE
- name: normalizedEmail
type: STRING
mode: NULLABLE
- name: email
type: STRING
mode: NULLABLE
- name: uid
type: STRING
mode: NULLABLE
description: |-
Account ID in hexadecimal format.
FxA stores this as bytes, for purposes of logging or integration
with other systems we convert it to a hex string.
- name: isVerified
type: BOOLEAN
mode: NULLABLE
- name: isPrimary
type: BOOLEAN
mode: NULLABLE
- name: verifiedAt
type: TIMESTAMP
mode: NULLABLE
- name: createdAt
type: TIMESTAMP
mode: NULLABLE