FXA-6721 Setup import of accounts table from FxA stage CloudSQL (#4327)

* FXA-6721 Setup import of accounts table from FxA stage CloudSQL

* Fix typo
This commit is contained in:
akkomar 2023-09-22 15:27:07 +02:00 коммит произвёл GitHub
Родитель c9cabdcedd
Коммит e3208aeecc
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
7 изменённых файлов: 163 добавлений и 0 удалений

Просмотреть файл

@ -44,6 +44,7 @@ dry_run:
- sql/moz-fx-data-shared-prod/monitoring*/topsites*_rate*_live*/*.sql
- sql/moz-fx-data-shared-prod/pocket/pocket_reach_mau/view.sql
- sql/moz-fx-data-shared-prod/telemetry/buildhub2/view.sql
- sql/moz-fx-data-shared-prod/accounts_backend_external/nonprod_accounts_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_content_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_auth_bounce_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_auth_events_v1/query.sql

Просмотреть файл

@ -110,6 +110,23 @@ bqetl_fxa_events:
tags:
- impact/tier_1
bqetl_accounts_backend_external:
schedule_interval: 30 1 * * *
description: |
Copies data from Firefox Accounts (FxA) CloudSQL databases.
This DAG is under active development.
default_args:
owner: akomar@mozilla.com
start_date: "2023-09-19"
email: ["akomar@mozilla.com"]
retries: 1
retry_delay: 10m
tags:
- impact/tier_3
- triage/no_triage
- repo/bigquery-etl
bqetl_subplat:
schedule_interval: 45 1 * * *
description: |

Просмотреть файл

@ -0,0 +1,58 @@
# Generated via https://github.com/mozilla/bigquery-etl/blob/main/bigquery_etl/query_scheduling/generate_airflow_dags.py
from airflow import DAG
from airflow.sensors.external_task import ExternalTaskMarker
from airflow.sensors.external_task import ExternalTaskSensor
from airflow.utils.task_group import TaskGroup
import datetime
from utils.constants import ALLOWED_STATES, FAILED_STATES
from utils.gcp import bigquery_etl_query, gke_command, bigquery_dq_check
docs = """
### bqetl_accounts_backend_external
Built from bigquery-etl repo, [`dags/bqetl_accounts_backend_external.py`](https://github.com/mozilla/bigquery-etl/blob/main/dags/bqetl_accounts_backend_external.py)
#### Description
Copies data from Firefox Accounts (FxA) CloudSQL databases.
This DAG is under active development.
#### Owner
akomar@mozilla.com
"""
default_args = {
"owner": "akomar@mozilla.com",
"start_date": datetime.datetime(2023, 9, 19, 0, 0),
"end_date": None,
"email": ["akomar@mozilla.com"],
"depends_on_past": False,
"retry_delay": datetime.timedelta(seconds=600),
"email_on_failure": True,
"email_on_retry": True,
"retries": 1,
}
tags = ["impact/tier_3", "repo/bigquery-etl", "triage/no_triage"]
with DAG(
"bqetl_accounts_backend_external",
default_args=default_args,
schedule_interval="30 1 * * *",
doc_md=docs,
tags=tags,
) as dag:
accounts_backend_external__nonprod_accounts__v1 = bigquery_etl_query(
task_id="accounts_backend_external__nonprod_accounts__v1",
destination_table="nonprod_accounts_v1",
dataset_id="accounts_backend_external",
project_id="moz-fx-data-shared-prod",
owner="akomar@mozilla.com",
email=["akomar@mozilla.com"],
date_partition_parameter=None,
depends_on_past=True,
)

Просмотреть файл

@ -0,0 +1,11 @@
friendly_name: Firefox Accounts Backend External
description: |-
Data extracted from the FxA backend services databases.
See https://mozilla.github.io/ecosystem-platform/reference/database-structure#database-fxa for more information.
dataset_base_acl: restricted
user_facing: false
labels: {}
workgroup_access:
- role: roles/bigquery.dataEditor
members:
- workgroup:platform-infra/internal

Просмотреть файл

@ -0,0 +1,17 @@
---
friendly_name: FxA Accounts nonprod
description: >
A mirror of the `accounts` table from the staging FxA CloudSQL database,
updated daily to match the current state of the table.
owners:
- akomar@mozilla.com
labels:
application: accounts_backend
schedule: daily
scheduling:
dag_name: bqetl_accounts_backend_external
# destination is the whole table, not a single partition,
# so don't use date_partition_parameter
date_partition_parameter: null
depends_on_past: true
referenced_tables: []

Просмотреть файл

@ -0,0 +1,22 @@
SELECT
*
FROM
EXTERNAL_QUERY(
"moz-fx-fxa-nonprod.us.fxa-rds-nonprod-stage-fxa",
"""SELECT
uid,
normalizedEmail,
emailVerified,
verifierVersion,
verifierSetAt,
createdAt,
locale,
lockedAt,
profileChangedAt,
ecosystemAnonId,
disabledAt,
metricsOptOutAt
FROM
accounts
"""
)

Просмотреть файл

@ -0,0 +1,37 @@
fields:
- name: uid
type: BYTES
mode: NULLABLE
- name: normalizedEmail
type: STRING
mode: NULLABLE
- name: emailVerified
type: INTEGER
mode: NULLABLE
- name: verifierVersion
type: INTEGER
mode: NULLABLE
- name: verifierSetAt
type: NUMERIC
mode: NULLABLE
- name: createdAt
type: NUMERIC
mode: NULLABLE
- name: locale
type: STRING
mode: NULLABLE
- name: lockedAt
type: NUMERIC
mode: NULLABLE
- name: profileChangedAt
type: NUMERIC
mode: NULLABLE
- name: ecosystemAnonId
type: STRING
mode: NULLABLE
- name: disabledAt
type: NUMERIC
mode: NULLABLE
- name: metricsOptOutAt
type: NUMERIC
mode: NULLABLE