FXA-6721 Setup import of accounts table from FxA production CloudSQL (#4423)

This commit is contained in:
akkomar 2023-10-25 09:50:25 +02:00 коммит произвёл GitHub
Родитель ae7d0f4766
Коммит 66729aa702
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
8 изменённых файлов: 160 добавлений и 5 удалений

Просмотреть файл

@ -45,6 +45,9 @@ dry_run:
- sql/moz-fx-data-shared-prod/pocket/pocket_reach_mau/view.sql
- sql/moz-fx-data-shared-prod/telemetry/buildhub2/view.sql
- sql/moz-fx-data-shared-prod/accounts_backend_external/nonprod_accounts_v1/query.sql
- sql/moz-fx-data-shared-prod/accounts_backend/nonprod_accounts/view.sql
- sql/moz-fx-data-shared-prod/accounts_backend_external/accounts_v1/query.sql
- sql/moz-fx-data-shared-prod/accounts_backend/accounts/view.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_content_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_auth_bounce_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_auth_events_v1/query.sql

Просмотреть файл

@ -119,12 +119,11 @@ bqetl_accounts_backend_external:
default_args:
owner: akomar@mozilla.com
start_date: "2023-09-19"
email: ["akomar@mozilla.com"]
email: ["akomar@mozilla.com", "telemetry-alerts@mozilla.com"]
retries: 1
retry_delay: 10m
tags:
- impact/tier_3
- triage/no_triage
- repo/bigquery-etl
bqetl_subplat:

Просмотреть файл

@ -29,7 +29,7 @@ default_args = {
"owner": "akomar@mozilla.com",
"start_date": datetime.datetime(2023, 9, 19, 0, 0),
"end_date": None,
"email": ["akomar@mozilla.com"],
"email": ["akomar@mozilla.com", "telemetry-alerts@mozilla.com"],
"depends_on_past": False,
"retry_delay": datetime.timedelta(seconds=600),
"email_on_failure": True,
@ -37,7 +37,7 @@ default_args = {
"retries": 1,
}
tags = ["impact/tier_3", "repo/bigquery-etl", "triage/no_triage"]
tags = ["impact/tier_3", "repo/bigquery-etl"]
with DAG(
"bqetl_accounts_backend_external",
@ -46,13 +46,25 @@ with DAG(
doc_md=docs,
tags=tags,
) as dag:
accounts_backend_external__accounts__v1 = bigquery_etl_query(
task_id="accounts_backend_external__accounts__v1",
destination_table="accounts_v1",
dataset_id="accounts_backend_external",
project_id="moz-fx-data-shared-prod",
owner="akomar@mozilla.com",
email=["akomar@mozilla.com", "telemetry-alerts@mozilla.com"],
date_partition_parameter=None,
depends_on_past=False,
task_concurrency=1,
)
accounts_backend_external__nonprod_accounts__v1 = bigquery_etl_query(
task_id="accounts_backend_external__nonprod_accounts__v1",
destination_table="nonprod_accounts_v1",
dataset_id="accounts_backend_external",
project_id="moz-fx-data-shared-prod",
owner="akomar@mozilla.com",
email=["akomar@mozilla.com"],
email=["akomar@mozilla.com", "telemetry-alerts@mozilla.com"],
date_partition_parameter=None,
depends_on_past=False,
task_concurrency=1,

Просмотреть файл

@ -0,0 +1,17 @@
---
friendly_name: Accounts table from production FxA database
description: |-
An authorized view on top of the `accounts_backend_external.accounts_v1` table
that only includes non-sensitive fields.
Some fields in this table are converted to a more user-friendly, BigQuery-native format:
- `uid` is converted from bytes to a hex string
- boolean integer columns are converted to BOOL
- timestamp columns are converted to TIMESTAMP
See https://mozilla.github.io/ecosystem-platform/reference/database-structure#database-fxa
labels:
authorized: true
workgroup_access:
- role: roles/bigquery.dataViewer
members:
- workgroup:mozilla-confidential

Просмотреть файл

@ -0,0 +1,18 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.accounts_backend.accounts`
AS
SELECT
uid,
emailVerified,
verifierVersion,
verifierSetAt,
createdAt,
locale,
lockedAt,
profileChangedAt,
keysChangedAt,
ecosystemAnonId,
disabledAt,
metricsOptOutAt,
FROM
`moz-fx-data-shared-prod.accounts_backend_external.accounts_v1`

Просмотреть файл

@ -0,0 +1,22 @@
---
friendly_name: Accounts table from production FxA database
description: >
A mirror of the `accounts` table from the production FxA CloudSQL database,
updated daily to match the current state of the table.
Some fields in this table are converted to a more user-friendly, BigQuery-native format:
- `uid` is converted from bytes to a hex string
- boolean integer columns are converted to BOOL
- timestamp columns are converted to TIMESTAMP
See https://mozilla.github.io/ecosystem-platform/reference/database-structure#database-fxa
owners:
- akomar@mozilla.com
labels:
application: accounts_backend
schedule: daily
scheduling:
dag_name: bqetl_accounts_backend_external
# destination is the whole table, not a single partition,
# so don't use date_partition_parameter
date_partition_parameter: null
referenced_tables: []

Просмотреть файл

@ -0,0 +1,37 @@
SELECT
TO_HEX(uid) AS uid,
normalizedEmail,
email,
SAFE_CAST(emailVerified AS BOOL) AS emailVerified,
verifierVersion,
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(verifierSetAt AS INT)) AS verifierSetAt,
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(createdAt AS INT)) AS createdAt,
locale,
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(lockedAt AS INT)) AS lockedAt,
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(profileChangedAt AS INT)) AS profileChangedAt,
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(keysChangedAt AS INT)) AS keysChangedAt,
ecosystemAnonId,
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(disabledAt AS INT)) AS disabledAt,
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(metricsOptOutAt AS INT)) AS metricsOptOutAt,
FROM
EXTERNAL_QUERY(
"moz-fx-fxa-prod.us.fxa-rds-prod-prod-fxa",
"""SELECT
uid,
normalizedEmail,
email,
emailVerified,
verifierVersion,
verifierSetAt,
createdAt,
locale,
lockedAt,
profileChangedAt,
keysChangedAt,
ecosystemAnonId,
disabledAt,
metricsOptOutAt
FROM
accounts
"""
)

Просмотреть файл

@ -0,0 +1,47 @@
fields:
- name: uid
type: STRING
mode: NULLABLE
description: |-
Account ID in hexadecimal format.
FxA stores this as bytes, for purposes of logging or integration
with other systems we convert it to a hex string.
- name: normalizedEmail
type: STRING
mode: NULLABLE
- name: email
type: STRING
mode: NULLABLE
- name: emailVerified
type: BOOLEAN
mode: NULLABLE
- name: verifierVersion
type: INTEGER
mode: NULLABLE
- name: verifierSetAt
type: TIMESTAMP
mode: NULLABLE
- name: createdAt
type: TIMESTAMP
mode: NULLABLE
- name: locale
type: STRING
mode: NULLABLE
- name: lockedAt
type: TIMESTAMP
mode: NULLABLE
- name: profileChangedAt
type: TIMESTAMP
mode: NULLABLE
- name: keysChangedAt
type: TIMESTAMP
mode: NULLABLE
- name: ecosystemAnonId
type: STRING
mode: NULLABLE
- name: disabledAt
type: TIMESTAMP
mode: NULLABLE
- name: metricsOptOutAt
type: TIMESTAMP
mode: NULLABLE