Use time travel to avoid losing data during backfills

This commit is contained in:
Arkadiusz Komarzewski 2024-11-20 10:33:13 +01:00
Родитель d89e24a779
Коммит 0c1ca59aca
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 519D6B58C3D95D09
4 изменённых файлов: 141 добавлений и 42 удалений

Просмотреть файл

@ -42,6 +42,8 @@ dry_run:
- sql/moz-fx-data-shared-prod/accounts_backend_external/nonprod_emails_v1/query.sql
- sql/moz-fx-data-shared-prod/accounts_backend_external/accounts_v1/query.sql
- sql/moz-fx-data-shared-prod/accounts_backend_external/emails_v1/query.sql
# uses time travel, will error on current date
- sql/moz-fx-data-shared-prod/accounts_backend_derived/monitoring_db_counts_v1/query.sql
- sql/moz-fx-data-shared-prod/accounts_db_external/**/*.sql
- sql/moz-fx-data-shared-prod/accounts_db_nonprod_external/**/*.sql
- sql/moz-fx-data-shared-prod/ads/ppa_measurements/*.sql

Просмотреть файл

@ -3,8 +3,6 @@ description: |-
Simple aggregation of counts of records in the FxA DB tables.
Enables to identify trends within accounts data. E.g. "How many
inactive accounts are there?"
Note: because its source tables are overwritten daily, query used to
populate this table is not idempotent so this table should not be backfilled.
owners:
- wclouser@mozilla.com
labels:
@ -12,9 +10,11 @@ labels:
owner1: wclouser@mozilla.com
scheduling:
dag_name: bqetl_accounts_derived
parameters:
- "as_of_date:DATE:{{ ds }}"
bigquery:
time_partitioning:
type: day
field: date
field: as_of_date
require_partition_filter: false
expiration_days: null

Просмотреть файл

@ -3,176 +3,267 @@ WITH table_counts AS (
'account_customers' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_account_customers_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_account_customers_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'account_groups' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_account_groups_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_account_groups_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'account_reset_tokens' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_account_reset_tokens_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_account_reset_tokens_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'accounts' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_accounts_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_accounts_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'carts' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_carts_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_carts_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'device_commands' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_device_commands_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_device_commands_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'devices' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_devices_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_devices_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'email_bounces' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_email_bounces_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_email_bounces_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'emails' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_emails_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_emails_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'linked_accounts' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_linked_accounts_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_linked_accounts_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'oauth_codes' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_oauth_codes_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_oauth_codes_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'oauth_refresh_tokens' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_oauth_refresh_tokens_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_oauth_refresh_tokens_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'oauth_tokens' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_oauth_tokens_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_oauth_tokens_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'password_change_tokens' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_password_change_tokens_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_password_change_tokens_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'password_forgot_tokens' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_password_forgot_tokens_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_password_forgot_tokens_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'paypal_customers' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_paypal_customers_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_paypal_customers_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'recovery_codes' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_recovery_codes_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_recovery_codes_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'security_events' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_security_events_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_security_events_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'sent_emails' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_sent_emails_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_sent_emails_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'session_tokens' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_session_tokens_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_session_tokens_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'signin_codes' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_signin_codes_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_signin_codes_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'totp' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_totp_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_totp_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'unblock_codes' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_unblock_codes_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_unblock_codes_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'unverified_tokens' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_unverified_tokens_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_unverified_tokens_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
SELECT
'verification_reminders' AS table_name,
COUNT(*) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_verification_reminders_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_verification_reminders_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
UNION ALL
(
SELECT
"accounts_with_secondary_emails" AS table_name,
COUNT(DISTINCT accounts.uid) AS total_rows
COUNT(
DISTINCT `moz-fx-data-shared-prod.accounts_db_external.fxa_accounts_v1`.uid
) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_accounts_v1` accounts
`moz-fx-data-shared-prod.accounts_db_external.fxa_accounts_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
JOIN
`moz-fx-data-shared-prod.accounts_db_external.fxa_emails_v1` emails
ON accounts.uid = emails.uid
`moz-fx-data-shared-prod.accounts_db_external.fxa_emails_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
ON `moz-fx-data-shared-prod.accounts_db_external.fxa_accounts_v1`.uid = `moz-fx-data-shared-prod.accounts_db_external.fxa_emails_v1`.uid
WHERE
emails.isPrimary = FALSE
`moz-fx-data-shared-prod.accounts_db_external.fxa_emails_v1`.isPrimary = FALSE
)
UNION ALL
(
SELECT
"accounts_with_unverified_emails" AS table_name,
COUNT(DISTINCT accounts.uid) AS total_rows
COUNT(
DISTINCT `moz-fx-data-shared-prod.accounts_db_external.fxa_accounts_v1`.uid
) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_accounts_v1` accounts
`moz-fx-data-shared-prod.accounts_db_external.fxa_accounts_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
JOIN
`moz-fx-data-shared-prod.accounts_db_external.fxa_emails_v1` emails
ON accounts.uid = emails.uid
`moz-fx-data-shared-prod.accounts_db_external.fxa_emails_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
ON `moz-fx-data-shared-prod.accounts_db_external.fxa_accounts_v1`.uid = `moz-fx-data-shared-prod.accounts_db_external.fxa_emails_v1`.uid
WHERE
emails.isVerified = FALSE
`moz-fx-data-shared-prod.accounts_db_external.fxa_emails_v1`.isVerified = FALSE
)
UNION ALL
(
@ -180,7 +271,10 @@ WITH table_counts AS (
"accounts_linked_to_google" AS table_name,
COUNT(uid) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_linked_accounts_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_linked_accounts_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
WHERE
providerId = 1 -- see LinkedAccountProviderIds at https://github.com/mozilla/fxa/blob/main/packages/fxa-settings/src/lib/types.ts
)
@ -190,13 +284,16 @@ WITH table_counts AS (
"accounts_linked_to_apple" AS table_name,
COUNT(uid) AS total_rows
FROM
`moz-fx-data-shared-prod.accounts_db_external.fxa_linked_accounts_v1`
`moz-fx-data-shared-prod.accounts_db_external.fxa_linked_accounts_v1` FOR SYSTEM_TIME AS OF TIMESTAMP(
@as_of_date,
'UTC'
)
WHERE
providerId = 2 -- see LinkedAccountProviderIds at https://github.com/mozilla/fxa/blob/main/packages/fxa-settings/src/lib/types.ts
)
)
SELECT
@submission_date AS date,
@as_of_date AS as_of_date,
table_name,
total_rows
FROM

Просмотреть файл

@ -1,5 +1,5 @@
fields:
- name: date
- name: as_of_date
type: DATE
mode: NULLABLE
- name: table_name