# feat(): updated fxa nonprod queries updated to be in line with production queries (#4297)
* updated fxa nonprod/staging queries to be in line with what production queries look like * Apply suggestions from code review provided by srose Co-authored-by: Sean Rose <1994030+sean-rose@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Sean Rose <1994030+sean-rose@users.noreply.github.com> * tweaks made as suggested by srose in PR#4297 --------- Co-authored-by: Sean Rose <1994030+sean-rose@users.noreply.github.com>
This commit is contained in:
Родитель
2356bfeca7
Коммит
69592dab81
|
@ -65,7 +65,8 @@ dry_run:
|
|||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_auth_events_v1/query.sql
|
||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_content_events_v1/query.sql
|
||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_stdout_events_v1/query.sql
|
||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_server_events_v1/query.sql
|
||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_gcp_stdout_events_v1/query.sql
|
||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_gcp_stderr_events_v1/query.sql
|
||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/docker_fxa_admin_server_sanitized_v1/init.sql
|
||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/docker_fxa_admin_server_sanitized_v1/query.sql
|
||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/docker_fxa_customs_sanitized_v1/init.sql
|
||||
|
|
|
@ -435,8 +435,8 @@ with DAG(
|
|||
destination_table="nonprod_fxa_auth_events_v1",
|
||||
dataset_id="firefox_accounts_derived",
|
||||
project_id="moz-fx-data-shared-prod",
|
||||
owner="dthorn@mozilla.com",
|
||||
email=["dthorn@mozilla.com", "telemetry-alerts@mozilla.com"],
|
||||
owner="kik@mozilla.com",
|
||||
email=["dthorn@mozilla.com", "kik@mozilla.com", "telemetry-alerts@mozilla.com"],
|
||||
date_partition_parameter="submission_date",
|
||||
depends_on_past=False,
|
||||
arguments=["--schema_update_option=ALLOW_FIELD_ADDITION"],
|
||||
|
@ -454,9 +454,22 @@ with DAG(
|
|||
arguments=["--schema_update_option=ALLOW_FIELD_ADDITION"],
|
||||
)
|
||||
|
||||
firefox_accounts_derived__nonprod_fxa_server_events__v1 = bigquery_etl_query(
|
||||
task_id="firefox_accounts_derived__nonprod_fxa_server_events__v1",
|
||||
destination_table="nonprod_fxa_server_events_v1",
|
||||
firefox_accounts_derived__nonprod_fxa_gcp_stderr_events__v1 = bigquery_etl_query(
|
||||
task_id="firefox_accounts_derived__nonprod_fxa_gcp_stderr_events__v1",
|
||||
destination_table="nonprod_fxa_gcp_stderr_events_v1",
|
||||
dataset_id="firefox_accounts_derived",
|
||||
project_id="moz-fx-data-shared-prod",
|
||||
owner="kik@mozilla.com",
|
||||
email=["dthorn@mozilla.com", "kik@mozilla.com", "telemetry-alerts@mozilla.com"],
|
||||
start_date=datetime.datetime(2023, 5, 26, 0, 0),
|
||||
date_partition_parameter="submission_date",
|
||||
depends_on_past=False,
|
||||
arguments=["--schema_update_option=ALLOW_FIELD_ADDITION"],
|
||||
)
|
||||
|
||||
firefox_accounts_derived__nonprod_fxa_gcp_stdout_events__v1 = bigquery_etl_query(
|
||||
task_id="firefox_accounts_derived__nonprod_fxa_gcp_stdout_events__v1",
|
||||
destination_table="nonprod_fxa_gcp_stdout_events_v1",
|
||||
dataset_id="firefox_accounts_derived",
|
||||
project_id="moz-fx-data-shared-prod",
|
||||
owner="kik@mozilla.com",
|
||||
|
|
|
@ -69,13 +69,12 @@ stdout_events AS (
|
|||
FROM
|
||||
`moz-fx-data-shared-prod.firefox_accounts_derived.nonprod_fxa_stdout_events_v1`
|
||||
),
|
||||
-- New fxa event table (nonprod) includes, content and auth events
|
||||
server_events AS (
|
||||
gcp_stdout_events AS (
|
||||
SELECT
|
||||
fxa_server,
|
||||
`timestamp`,
|
||||
receiveTimestamp,
|
||||
TIMESTAMP_MILLIS(CAST(jsonPayload.fields.time AS INT64)) AS event_time,
|
||||
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(jsonPayload.fields.time AS INT64)) AS event_time,
|
||||
jsonPayload.fields.user_id,
|
||||
jsonPayload.fields.country,
|
||||
JSON_VALUE(jsonPayload.fields.event_properties, "$.country_code") AS country_code,
|
||||
|
@ -89,9 +88,28 @@ server_events AS (
|
|||
jsonPayload.fields.event_properties,
|
||||
jsonPayload.fields.device_id,
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.firefox_accounts_derived.nonprod_fxa_server_events_v1`
|
||||
WHERE
|
||||
DATE(`timestamp`) >= "2023-05-26"
|
||||
`moz-fx-data-shared-prod.firefox_accounts_derived.nonprod_fxa_gcp_stdout_events_v1`
|
||||
),
|
||||
gcp_stderr_events AS (
|
||||
SELECT
|
||||
fxa_server,
|
||||
`timestamp`,
|
||||
receiveTimestamp,
|
||||
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(jsonPayload.fields.time AS INT64)) AS event_time,
|
||||
jsonPayload.fields.user_id,
|
||||
jsonPayload.fields.country,
|
||||
JSON_VALUE(jsonPayload.fields.event_properties, "$.country_code") AS country_code,
|
||||
jsonPayload.fields.language,
|
||||
jsonPayload.fields.app_version,
|
||||
jsonPayload.fields.os_name,
|
||||
jsonPayload.fields.os_version,
|
||||
jsonPayload.fields.event_type,
|
||||
jsonPayload.logger,
|
||||
jsonPayload.fields.user_properties,
|
||||
jsonPayload.fields.event_properties,
|
||||
jsonPayload.fields.device_id,
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.firefox_accounts_derived.nonprod_fxa_gcp_stderr_events_v1`
|
||||
),
|
||||
unioned AS (
|
||||
SELECT
|
||||
|
@ -112,7 +130,12 @@ unioned AS (
|
|||
SELECT
|
||||
*
|
||||
FROM
|
||||
server_events
|
||||
gcp_stdout_events
|
||||
UNION ALL
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
gcp_stderr_events
|
||||
)
|
||||
SELECT
|
||||
fxa_server AS fxa_log,
|
||||
|
|
|
@ -3,7 +3,7 @@ friendly_name: Non-Prod FxA Auth Events
|
|||
description:
|
||||
Selected Amplitude events extracted from Non-Prod FxA auth server logs
|
||||
owners:
|
||||
- dthorn@mozilla.com
|
||||
- kik@mozilla.com
|
||||
labels:
|
||||
application: fxa
|
||||
incremental: true
|
||||
|
|
|
@ -5,7 +5,11 @@ SELECT
|
|||
jsonPayload.* REPLACE (
|
||||
(
|
||||
SELECT AS STRUCT
|
||||
jsonPayload.fields.* EXCEPT (user_id, device_id, deviceid) REPLACE(
|
||||
jsonPayload.fields.* EXCEPT (deviceid) REPLACE(
|
||||
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
|
||||
TO_HEX(
|
||||
SHA256(COALESCE(jsonPayload.fields.device_id, jsonPayload.fields.deviceid))
|
||||
) AS device_id,
|
||||
-- See https://bugzilla.mozilla.org/show_bug.cgi?id=1707571
|
||||
CAST(NULL AS FLOAT64) AS emailverified,
|
||||
CAST(NULL AS FLOAT64) AS isprimary,
|
||||
|
@ -13,10 +17,6 @@ SELECT
|
|||
-- casting id as field type in source tables inconsistent
|
||||
CAST(jsonPayload.fields.id AS STRING) AS id
|
||||
),
|
||||
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
|
||||
TO_HEX(
|
||||
SHA256(COALESCE(jsonPayload.fields.device_id, jsonPayload.fields.deviceid))
|
||||
) AS device_id
|
||||
) AS fields
|
||||
)
|
||||
) AS jsonPayload
|
||||
|
|
|
@ -5,9 +5,10 @@ SELECT
|
|||
jsonPayload.* REPLACE (
|
||||
(
|
||||
SELECT AS STRUCT
|
||||
jsonPayload.fields.* EXCEPT (device_id, user_id),
|
||||
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
|
||||
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
|
||||
jsonPayload.fields.* REPLACE (
|
||||
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
|
||||
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
|
||||
)
|
||||
) AS fields
|
||||
)
|
||||
) AS jsonPayload
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
friendly_name: Non-Prod FxA GCP Stderr Events
|
||||
description: |-
|
||||
Staging table containing stderr fxa logs
|
||||
owners:
|
||||
- kik@mozilla.com
|
||||
labels:
|
||||
application: fxa
|
||||
incremental: true
|
||||
schedule: daily
|
||||
dag: bqetl_fxa_events
|
||||
owner1: kik
|
||||
scheduling:
|
||||
dag_name: bqetl_fxa_events
|
||||
start_date: "2023-05-26"
|
||||
arguments:
|
||||
- --schema_update_option=ALLOW_FIELD_ADDITION
|
||||
bigquery:
|
||||
time_partitioning:
|
||||
type: day
|
||||
field: timestamp
|
||||
require_partition_filter: true
|
||||
expiration_days: null
|
||||
clustering:
|
||||
fields:
|
||||
- fxa_server
|
||||
references: {}
|
||||
deprecated: false
|
|
@ -0,0 +1,29 @@
|
|||
SELECT
|
||||
-- example logger expected input: fxa-auth-server
|
||||
SPLIT(jsonPayload.logger, "-")[OFFSET(1)] AS fxa_server,
|
||||
* REPLACE (
|
||||
(
|
||||
SELECT AS STRUCT
|
||||
jsonPayload.* REPLACE (
|
||||
(
|
||||
SELECT AS STRUCT
|
||||
jsonPayload.fields.* REPLACE (
|
||||
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
|
||||
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
|
||||
)
|
||||
) AS fields
|
||||
)
|
||||
) AS jsonPayload
|
||||
)
|
||||
FROM
|
||||
`moz-fx-fxa-nonprod.gke_fxa_stage_log.stderr`
|
||||
WHERE
|
||||
(
|
||||
DATE(_PARTITIONTIME)
|
||||
BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY)
|
||||
AND DATE_ADD(@submission_date, INTERVAL 1 DAY)
|
||||
)
|
||||
AND DATE(`timestamp`) = @submission_date
|
||||
AND jsonPayload.type = 'amplitudeEvent'
|
||||
AND jsonPayload.logger IN ("fxa-auth-server", "fxa-content-server")
|
||||
AND jsonPayload.fields.event_type IS NOT NULL
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,27 @@
|
|||
friendly_name: Non-Prod FxA GCP Stdout Events
|
||||
description: |-
|
||||
Staging table containing stdout fxa logs
|
||||
owners:
|
||||
- kik@mozilla.com
|
||||
labels:
|
||||
application: fxa
|
||||
incremental: true
|
||||
schedule: daily
|
||||
dag: bqetl_fxa_events
|
||||
owner1: kik
|
||||
scheduling:
|
||||
dag_name: bqetl_fxa_events
|
||||
start_date: "2023-05-26"
|
||||
arguments:
|
||||
- --schema_update_option=ALLOW_FIELD_ADDITION
|
||||
bigquery:
|
||||
time_partitioning:
|
||||
type: day
|
||||
field: timestamp
|
||||
require_partition_filter: true
|
||||
expiration_days: null
|
||||
clustering:
|
||||
fields:
|
||||
- fxa_server
|
||||
references: {}
|
||||
deprecated: false
|
|
@ -0,0 +1,29 @@
|
|||
SELECT
|
||||
-- example logger expected input: fxa-auth-server
|
||||
SPLIT(jsonPayload.logger, "-")[OFFSET(1)] AS fxa_server,
|
||||
* REPLACE (
|
||||
(
|
||||
SELECT AS STRUCT
|
||||
jsonPayload.* REPLACE (
|
||||
(
|
||||
SELECT AS STRUCT
|
||||
jsonPayload.fields.* REPLACE (
|
||||
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
|
||||
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
|
||||
)
|
||||
) AS fields
|
||||
)
|
||||
) AS jsonPayload
|
||||
)
|
||||
FROM
|
||||
`moz-fx-fxa-nonprod.gke_fxa_stage_log.stdout`
|
||||
WHERE
|
||||
(
|
||||
DATE(_PARTITIONTIME)
|
||||
BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY)
|
||||
AND DATE_ADD(@submission_date, INTERVAL 1 DAY)
|
||||
)
|
||||
AND DATE(`timestamp`) = @submission_date
|
||||
AND jsonPayload.type = 'amplitudeEvent'
|
||||
AND jsonPayload.logger IN ("fxa-auth-server", "fxa-content-server")
|
||||
AND jsonPayload.fields.event_type IS NOT NULL
|
|
@ -297,6 +297,27 @@ fields:
|
|||
- name: assertion
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: document_type
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: ip_address
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: user_agent
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: document_id
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: payload
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: document_namespace
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: document_version
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
- name: user_id
|
||||
type: STRING
|
||||
mode: NULLABLE
|
|
@ -1,35 +0,0 @@
|
|||
friendly_name: FxA Server Events (nonprod)
|
||||
description: |
|
||||
FxA server events extracted from accounts server stdout logs (auth, content servers)
|
||||
|
||||
This new table is the direct result of FxA migration from AWS to GCP
|
||||
(see: OPST-296 for more context).
|
||||
|
||||
Effective 2023-05-26 the events from the following servers land in this table
|
||||
(existing data will remain in those tables):
|
||||
- auth (previous table: `nonprod_fxa_auth_events_v1`)
|
||||
- content (previous table: `nonprod_fxa_content_events_v1`)
|
||||
|
||||
Payment server events will continue landing inside: `nonprod_fxa_stdout_events_v1`
|
||||
owners:
|
||||
- kik@mozilla.com
|
||||
labels:
|
||||
application: fxa
|
||||
incremental: true
|
||||
schedule: daily
|
||||
owner1: kik
|
||||
scheduling:
|
||||
dag_name: bqetl_fxa_events
|
||||
start_date: '2023-05-26'
|
||||
arguments:
|
||||
- --schema_update_option=ALLOW_FIELD_ADDITION
|
||||
bigquery:
|
||||
time_partitioning:
|
||||
type: day
|
||||
field: timestamp
|
||||
require_partition_filter: true
|
||||
expiration_days: null
|
||||
clustering:
|
||||
fields:
|
||||
- fxa_server
|
||||
references: {}
|
|
@ -1,22 +0,0 @@
|
|||
SELECT
|
||||
SPLIT(jsonPayload.logger, "-")[OFFSET(1)] AS fxa_server,
|
||||
* REPLACE (
|
||||
(
|
||||
SELECT AS STRUCT
|
||||
jsonPayload.* REPLACE (
|
||||
(
|
||||
SELECT AS STRUCT
|
||||
jsonPayload.fields.* EXCEPT (device_id, user_id),
|
||||
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
|
||||
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
|
||||
) AS fields
|
||||
)
|
||||
) AS jsonPayload
|
||||
),
|
||||
FROM
|
||||
`moz-fx-fxa-nonprod.gke_fxa_stage_log.stdout`
|
||||
WHERE
|
||||
jsonPayload.type = 'amplitudeEvent'
|
||||
AND jsonPayload.logger IS NOT NULL
|
||||
AND jsonPayload.fields.event_type IS NOT NULL
|
||||
AND DATE(`timestamp`) = @submission_date
|
|
@ -5,9 +5,10 @@ SELECT
|
|||
jsonPayload.* REPLACE (
|
||||
(
|
||||
SELECT AS STRUCT
|
||||
jsonPayload.fields.* EXCEPT (device_id, user_id),
|
||||
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
|
||||
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
|
||||
jsonPayload.fields.* REPLACE (
|
||||
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
|
||||
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
|
||||
)
|
||||
) AS fields
|
||||
)
|
||||
) AS jsonPayload
|
||||
|
|
Загрузка…
Ссылка в новой задаче