# feat(): updated fxa nonprod queries updated to be in line with production queries (#4297)

* updated fxa nonprod/staging queries to be in line with what production queries look like

* Apply suggestions from code review provided by srose

Co-authored-by: Sean Rose <1994030+sean-rose@users.noreply.github.com>

* Apply suggestions from code review

Co-authored-by: Sean Rose <1994030+sean-rose@users.noreply.github.com>

* tweaks made as suggested by srose in PR#4297

---------

Co-authored-by: Sean Rose <1994030+sean-rose@users.noreply.github.com>
This commit is contained in:
kik-kik 2023-09-21 16:54:17 +02:00 коммит произвёл GitHub
Родитель 2356bfeca7
Коммит 69592dab81
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
15 изменённых файлов: 1558 добавлений и 82 удалений

Просмотреть файл

@ -65,7 +65,8 @@ dry_run:
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_auth_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_content_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_stdout_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_server_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_gcp_stdout_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_gcp_stderr_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/docker_fxa_admin_server_sanitized_v1/init.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/docker_fxa_admin_server_sanitized_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/docker_fxa_customs_sanitized_v1/init.sql

Просмотреть файл

@ -435,8 +435,8 @@ with DAG(
destination_table="nonprod_fxa_auth_events_v1",
dataset_id="firefox_accounts_derived",
project_id="moz-fx-data-shared-prod",
owner="dthorn@mozilla.com",
email=["dthorn@mozilla.com", "telemetry-alerts@mozilla.com"],
owner="kik@mozilla.com",
email=["dthorn@mozilla.com", "kik@mozilla.com", "telemetry-alerts@mozilla.com"],
date_partition_parameter="submission_date",
depends_on_past=False,
arguments=["--schema_update_option=ALLOW_FIELD_ADDITION"],
@ -454,9 +454,22 @@ with DAG(
arguments=["--schema_update_option=ALLOW_FIELD_ADDITION"],
)
firefox_accounts_derived__nonprod_fxa_server_events__v1 = bigquery_etl_query(
task_id="firefox_accounts_derived__nonprod_fxa_server_events__v1",
destination_table="nonprod_fxa_server_events_v1",
firefox_accounts_derived__nonprod_fxa_gcp_stderr_events__v1 = bigquery_etl_query(
task_id="firefox_accounts_derived__nonprod_fxa_gcp_stderr_events__v1",
destination_table="nonprod_fxa_gcp_stderr_events_v1",
dataset_id="firefox_accounts_derived",
project_id="moz-fx-data-shared-prod",
owner="kik@mozilla.com",
email=["dthorn@mozilla.com", "kik@mozilla.com", "telemetry-alerts@mozilla.com"],
start_date=datetime.datetime(2023, 5, 26, 0, 0),
date_partition_parameter="submission_date",
depends_on_past=False,
arguments=["--schema_update_option=ALLOW_FIELD_ADDITION"],
)
firefox_accounts_derived__nonprod_fxa_gcp_stdout_events__v1 = bigquery_etl_query(
task_id="firefox_accounts_derived__nonprod_fxa_gcp_stdout_events__v1",
destination_table="nonprod_fxa_gcp_stdout_events_v1",
dataset_id="firefox_accounts_derived",
project_id="moz-fx-data-shared-prod",
owner="kik@mozilla.com",

Просмотреть файл

@ -69,13 +69,12 @@ stdout_events AS (
FROM
`moz-fx-data-shared-prod.firefox_accounts_derived.nonprod_fxa_stdout_events_v1`
),
-- New fxa event table (nonprod) includes, content and auth events
server_events AS (
gcp_stdout_events AS (
SELECT
fxa_server,
`timestamp`,
receiveTimestamp,
TIMESTAMP_MILLIS(CAST(jsonPayload.fields.time AS INT64)) AS event_time,
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(jsonPayload.fields.time AS INT64)) AS event_time,
jsonPayload.fields.user_id,
jsonPayload.fields.country,
JSON_VALUE(jsonPayload.fields.event_properties, "$.country_code") AS country_code,
@ -89,9 +88,28 @@ server_events AS (
jsonPayload.fields.event_properties,
jsonPayload.fields.device_id,
FROM
`moz-fx-data-shared-prod.firefox_accounts_derived.nonprod_fxa_server_events_v1`
WHERE
DATE(`timestamp`) >= "2023-05-26"
`moz-fx-data-shared-prod.firefox_accounts_derived.nonprod_fxa_gcp_stdout_events_v1`
),
gcp_stderr_events AS (
SELECT
fxa_server,
`timestamp`,
receiveTimestamp,
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(jsonPayload.fields.time AS INT64)) AS event_time,
jsonPayload.fields.user_id,
jsonPayload.fields.country,
JSON_VALUE(jsonPayload.fields.event_properties, "$.country_code") AS country_code,
jsonPayload.fields.language,
jsonPayload.fields.app_version,
jsonPayload.fields.os_name,
jsonPayload.fields.os_version,
jsonPayload.fields.event_type,
jsonPayload.logger,
jsonPayload.fields.user_properties,
jsonPayload.fields.event_properties,
jsonPayload.fields.device_id,
FROM
`moz-fx-data-shared-prod.firefox_accounts_derived.nonprod_fxa_gcp_stderr_events_v1`
),
unioned AS (
SELECT
@ -112,7 +130,12 @@ unioned AS (
SELECT
*
FROM
server_events
gcp_stdout_events
UNION ALL
SELECT
*
FROM
gcp_stderr_events
)
SELECT
fxa_server AS fxa_log,

Просмотреть файл

@ -3,7 +3,7 @@ friendly_name: Non-Prod FxA Auth Events
description:
Selected Amplitude events extracted from Non-Prod FxA auth server logs
owners:
- dthorn@mozilla.com
- kik@mozilla.com
labels:
application: fxa
incremental: true

Просмотреть файл

@ -5,7 +5,11 @@ SELECT
jsonPayload.* REPLACE (
(
SELECT AS STRUCT
jsonPayload.fields.* EXCEPT (user_id, device_id, deviceid) REPLACE(
jsonPayload.fields.* EXCEPT (deviceid) REPLACE(
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
TO_HEX(
SHA256(COALESCE(jsonPayload.fields.device_id, jsonPayload.fields.deviceid))
) AS device_id,
-- See https://bugzilla.mozilla.org/show_bug.cgi?id=1707571
CAST(NULL AS FLOAT64) AS emailverified,
CAST(NULL AS FLOAT64) AS isprimary,
@ -13,10 +17,6 @@ SELECT
-- casting id as field type in source tables inconsistent
CAST(jsonPayload.fields.id AS STRING) AS id
),
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
TO_HEX(
SHA256(COALESCE(jsonPayload.fields.device_id, jsonPayload.fields.deviceid))
) AS device_id
) AS fields
)
) AS jsonPayload

Просмотреть файл

@ -5,9 +5,10 @@ SELECT
jsonPayload.* REPLACE (
(
SELECT AS STRUCT
jsonPayload.fields.* EXCEPT (device_id, user_id),
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
jsonPayload.fields.* REPLACE (
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
)
) AS fields
)
) AS jsonPayload

Просмотреть файл

@ -0,0 +1,27 @@
friendly_name: Non-Prod FxA GCP Stderr Events
description: |-
Staging table containing stderr fxa logs
owners:
- kik@mozilla.com
labels:
application: fxa
incremental: true
schedule: daily
dag: bqetl_fxa_events
owner1: kik
scheduling:
dag_name: bqetl_fxa_events
start_date: "2023-05-26"
arguments:
- --schema_update_option=ALLOW_FIELD_ADDITION
bigquery:
time_partitioning:
type: day
field: timestamp
require_partition_filter: true
expiration_days: null
clustering:
fields:
- fxa_server
references: {}
deprecated: false

Просмотреть файл

@ -0,0 +1,29 @@
SELECT
-- example logger expected input: fxa-auth-server
SPLIT(jsonPayload.logger, "-")[OFFSET(1)] AS fxa_server,
* REPLACE (
(
SELECT AS STRUCT
jsonPayload.* REPLACE (
(
SELECT AS STRUCT
jsonPayload.fields.* REPLACE (
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
)
) AS fields
)
) AS jsonPayload
)
FROM
`moz-fx-fxa-nonprod.gke_fxa_stage_log.stderr`
WHERE
(
DATE(_PARTITIONTIME)
BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY)
AND DATE_ADD(@submission_date, INTERVAL 1 DAY)
)
AND DATE(`timestamp`) = @submission_date
AND jsonPayload.type = 'amplitudeEvent'
AND jsonPayload.logger IN ("fxa-auth-server", "fxa-content-server")
AND jsonPayload.fields.event_type IS NOT NULL

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,27 @@
friendly_name: Non-Prod FxA GCP Stdout Events
description: |-
Staging table containing stdout fxa logs
owners:
- kik@mozilla.com
labels:
application: fxa
incremental: true
schedule: daily
dag: bqetl_fxa_events
owner1: kik
scheduling:
dag_name: bqetl_fxa_events
start_date: "2023-05-26"
arguments:
- --schema_update_option=ALLOW_FIELD_ADDITION
bigquery:
time_partitioning:
type: day
field: timestamp
require_partition_filter: true
expiration_days: null
clustering:
fields:
- fxa_server
references: {}
deprecated: false

Просмотреть файл

@ -0,0 +1,29 @@
SELECT
-- example logger expected input: fxa-auth-server
SPLIT(jsonPayload.logger, "-")[OFFSET(1)] AS fxa_server,
* REPLACE (
(
SELECT AS STRUCT
jsonPayload.* REPLACE (
(
SELECT AS STRUCT
jsonPayload.fields.* REPLACE (
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
)
) AS fields
)
) AS jsonPayload
)
FROM
`moz-fx-fxa-nonprod.gke_fxa_stage_log.stdout`
WHERE
(
DATE(_PARTITIONTIME)
BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY)
AND DATE_ADD(@submission_date, INTERVAL 1 DAY)
)
AND DATE(`timestamp`) = @submission_date
AND jsonPayload.type = 'amplitudeEvent'
AND jsonPayload.logger IN ("fxa-auth-server", "fxa-content-server")
AND jsonPayload.fields.event_type IS NOT NULL

Просмотреть файл

@ -297,6 +297,27 @@ fields:
- name: assertion
type: STRING
mode: NULLABLE
- name: document_type
type: STRING
mode: NULLABLE
- name: ip_address
type: STRING
mode: NULLABLE
- name: user_agent
type: STRING
mode: NULLABLE
- name: document_id
type: STRING
mode: NULLABLE
- name: payload
type: STRING
mode: NULLABLE
- name: document_namespace
type: STRING
mode: NULLABLE
- name: document_version
type: STRING
mode: NULLABLE
- name: user_id
type: STRING
mode: NULLABLE

Просмотреть файл

@ -1,35 +0,0 @@
friendly_name: FxA Server Events (nonprod)
description: |
FxA server events extracted from accounts server stdout logs (auth, content servers)
This new table is the direct result of FxA migration from AWS to GCP
(see: OPST-296 for more context).
Effective 2023-05-26 the events from the following servers land in this table
(existing data will remain in those tables):
- auth (previous table: `nonprod_fxa_auth_events_v1`)
- content (previous table: `nonprod_fxa_content_events_v1`)
Payment server events will continue landing inside: `nonprod_fxa_stdout_events_v1`
owners:
- kik@mozilla.com
labels:
application: fxa
incremental: true
schedule: daily
owner1: kik
scheduling:
dag_name: bqetl_fxa_events
start_date: '2023-05-26'
arguments:
- --schema_update_option=ALLOW_FIELD_ADDITION
bigquery:
time_partitioning:
type: day
field: timestamp
require_partition_filter: true
expiration_days: null
clustering:
fields:
- fxa_server
references: {}

Просмотреть файл

@ -1,22 +0,0 @@
SELECT
SPLIT(jsonPayload.logger, "-")[OFFSET(1)] AS fxa_server,
* REPLACE (
(
SELECT AS STRUCT
jsonPayload.* REPLACE (
(
SELECT AS STRUCT
jsonPayload.fields.* EXCEPT (device_id, user_id),
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
) AS fields
)
) AS jsonPayload
),
FROM
`moz-fx-fxa-nonprod.gke_fxa_stage_log.stdout`
WHERE
jsonPayload.type = 'amplitudeEvent'
AND jsonPayload.logger IS NOT NULL
AND jsonPayload.fields.event_type IS NOT NULL
AND DATE(`timestamp`) = @submission_date

Просмотреть файл

@ -5,9 +5,10 @@ SELECT
jsonPayload.* REPLACE (
(
SELECT AS STRUCT
jsonPayload.fields.* EXCEPT (device_id, user_id),
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
jsonPayload.fields.* REPLACE (
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
)
) AS fields
)
) AS jsonPayload