# feat(): updated fxa nonprod queries updated to be in line with production queries (#4297)
* updated fxa nonprod/staging queries to be in line with what production queries look like * Apply suggestions from code review provided by srose Co-authored-by: Sean Rose <1994030+sean-rose@users.noreply.github.com> * Apply suggestions from code review Co-authored-by: Sean Rose <1994030+sean-rose@users.noreply.github.com> * tweaks made as suggested by srose in PR#4297 --------- Co-authored-by: Sean Rose <1994030+sean-rose@users.noreply.github.com>
This commit is contained in:
Родитель
2356bfeca7
Коммит
69592dab81
|
@ -65,7 +65,8 @@ dry_run:
|
||||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_auth_events_v1/query.sql
|
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_auth_events_v1/query.sql
|
||||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_content_events_v1/query.sql
|
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_content_events_v1/query.sql
|
||||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_stdout_events_v1/query.sql
|
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_stdout_events_v1/query.sql
|
||||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_server_events_v1/query.sql
|
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_gcp_stdout_events_v1/query.sql
|
||||||
|
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_gcp_stderr_events_v1/query.sql
|
||||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/docker_fxa_admin_server_sanitized_v1/init.sql
|
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/docker_fxa_admin_server_sanitized_v1/init.sql
|
||||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/docker_fxa_admin_server_sanitized_v1/query.sql
|
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/docker_fxa_admin_server_sanitized_v1/query.sql
|
||||||
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/docker_fxa_customs_sanitized_v1/init.sql
|
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/docker_fxa_customs_sanitized_v1/init.sql
|
||||||
|
|
|
@ -435,8 +435,8 @@ with DAG(
|
||||||
destination_table="nonprod_fxa_auth_events_v1",
|
destination_table="nonprod_fxa_auth_events_v1",
|
||||||
dataset_id="firefox_accounts_derived",
|
dataset_id="firefox_accounts_derived",
|
||||||
project_id="moz-fx-data-shared-prod",
|
project_id="moz-fx-data-shared-prod",
|
||||||
owner="dthorn@mozilla.com",
|
owner="kik@mozilla.com",
|
||||||
email=["dthorn@mozilla.com", "telemetry-alerts@mozilla.com"],
|
email=["dthorn@mozilla.com", "kik@mozilla.com", "telemetry-alerts@mozilla.com"],
|
||||||
date_partition_parameter="submission_date",
|
date_partition_parameter="submission_date",
|
||||||
depends_on_past=False,
|
depends_on_past=False,
|
||||||
arguments=["--schema_update_option=ALLOW_FIELD_ADDITION"],
|
arguments=["--schema_update_option=ALLOW_FIELD_ADDITION"],
|
||||||
|
@ -454,9 +454,22 @@ with DAG(
|
||||||
arguments=["--schema_update_option=ALLOW_FIELD_ADDITION"],
|
arguments=["--schema_update_option=ALLOW_FIELD_ADDITION"],
|
||||||
)
|
)
|
||||||
|
|
||||||
firefox_accounts_derived__nonprod_fxa_server_events__v1 = bigquery_etl_query(
|
firefox_accounts_derived__nonprod_fxa_gcp_stderr_events__v1 = bigquery_etl_query(
|
||||||
task_id="firefox_accounts_derived__nonprod_fxa_server_events__v1",
|
task_id="firefox_accounts_derived__nonprod_fxa_gcp_stderr_events__v1",
|
||||||
destination_table="nonprod_fxa_server_events_v1",
|
destination_table="nonprod_fxa_gcp_stderr_events_v1",
|
||||||
|
dataset_id="firefox_accounts_derived",
|
||||||
|
project_id="moz-fx-data-shared-prod",
|
||||||
|
owner="kik@mozilla.com",
|
||||||
|
email=["dthorn@mozilla.com", "kik@mozilla.com", "telemetry-alerts@mozilla.com"],
|
||||||
|
start_date=datetime.datetime(2023, 5, 26, 0, 0),
|
||||||
|
date_partition_parameter="submission_date",
|
||||||
|
depends_on_past=False,
|
||||||
|
arguments=["--schema_update_option=ALLOW_FIELD_ADDITION"],
|
||||||
|
)
|
||||||
|
|
||||||
|
firefox_accounts_derived__nonprod_fxa_gcp_stdout_events__v1 = bigquery_etl_query(
|
||||||
|
task_id="firefox_accounts_derived__nonprod_fxa_gcp_stdout_events__v1",
|
||||||
|
destination_table="nonprod_fxa_gcp_stdout_events_v1",
|
||||||
dataset_id="firefox_accounts_derived",
|
dataset_id="firefox_accounts_derived",
|
||||||
project_id="moz-fx-data-shared-prod",
|
project_id="moz-fx-data-shared-prod",
|
||||||
owner="kik@mozilla.com",
|
owner="kik@mozilla.com",
|
||||||
|
|
|
@ -69,13 +69,12 @@ stdout_events AS (
|
||||||
FROM
|
FROM
|
||||||
`moz-fx-data-shared-prod.firefox_accounts_derived.nonprod_fxa_stdout_events_v1`
|
`moz-fx-data-shared-prod.firefox_accounts_derived.nonprod_fxa_stdout_events_v1`
|
||||||
),
|
),
|
||||||
-- New fxa event table (nonprod) includes, content and auth events
|
gcp_stdout_events AS (
|
||||||
server_events AS (
|
|
||||||
SELECT
|
SELECT
|
||||||
fxa_server,
|
fxa_server,
|
||||||
`timestamp`,
|
`timestamp`,
|
||||||
receiveTimestamp,
|
receiveTimestamp,
|
||||||
TIMESTAMP_MILLIS(CAST(jsonPayload.fields.time AS INT64)) AS event_time,
|
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(jsonPayload.fields.time AS INT64)) AS event_time,
|
||||||
jsonPayload.fields.user_id,
|
jsonPayload.fields.user_id,
|
||||||
jsonPayload.fields.country,
|
jsonPayload.fields.country,
|
||||||
JSON_VALUE(jsonPayload.fields.event_properties, "$.country_code") AS country_code,
|
JSON_VALUE(jsonPayload.fields.event_properties, "$.country_code") AS country_code,
|
||||||
|
@ -89,9 +88,28 @@ server_events AS (
|
||||||
jsonPayload.fields.event_properties,
|
jsonPayload.fields.event_properties,
|
||||||
jsonPayload.fields.device_id,
|
jsonPayload.fields.device_id,
|
||||||
FROM
|
FROM
|
||||||
`moz-fx-data-shared-prod.firefox_accounts_derived.nonprod_fxa_server_events_v1`
|
`moz-fx-data-shared-prod.firefox_accounts_derived.nonprod_fxa_gcp_stdout_events_v1`
|
||||||
WHERE
|
),
|
||||||
DATE(`timestamp`) >= "2023-05-26"
|
gcp_stderr_events AS (
|
||||||
|
SELECT
|
||||||
|
fxa_server,
|
||||||
|
`timestamp`,
|
||||||
|
receiveTimestamp,
|
||||||
|
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(jsonPayload.fields.time AS INT64)) AS event_time,
|
||||||
|
jsonPayload.fields.user_id,
|
||||||
|
jsonPayload.fields.country,
|
||||||
|
JSON_VALUE(jsonPayload.fields.event_properties, "$.country_code") AS country_code,
|
||||||
|
jsonPayload.fields.language,
|
||||||
|
jsonPayload.fields.app_version,
|
||||||
|
jsonPayload.fields.os_name,
|
||||||
|
jsonPayload.fields.os_version,
|
||||||
|
jsonPayload.fields.event_type,
|
||||||
|
jsonPayload.logger,
|
||||||
|
jsonPayload.fields.user_properties,
|
||||||
|
jsonPayload.fields.event_properties,
|
||||||
|
jsonPayload.fields.device_id,
|
||||||
|
FROM
|
||||||
|
`moz-fx-data-shared-prod.firefox_accounts_derived.nonprod_fxa_gcp_stderr_events_v1`
|
||||||
),
|
),
|
||||||
unioned AS (
|
unioned AS (
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -112,7 +130,12 @@ unioned AS (
|
||||||
SELECT
|
SELECT
|
||||||
*
|
*
|
||||||
FROM
|
FROM
|
||||||
server_events
|
gcp_stdout_events
|
||||||
|
UNION ALL
|
||||||
|
SELECT
|
||||||
|
*
|
||||||
|
FROM
|
||||||
|
gcp_stderr_events
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
fxa_server AS fxa_log,
|
fxa_server AS fxa_log,
|
||||||
|
|
|
@ -3,7 +3,7 @@ friendly_name: Non-Prod FxA Auth Events
|
||||||
description:
|
description:
|
||||||
Selected Amplitude events extracted from Non-Prod FxA auth server logs
|
Selected Amplitude events extracted from Non-Prod FxA auth server logs
|
||||||
owners:
|
owners:
|
||||||
- dthorn@mozilla.com
|
- kik@mozilla.com
|
||||||
labels:
|
labels:
|
||||||
application: fxa
|
application: fxa
|
||||||
incremental: true
|
incremental: true
|
||||||
|
|
|
@ -5,7 +5,11 @@ SELECT
|
||||||
jsonPayload.* REPLACE (
|
jsonPayload.* REPLACE (
|
||||||
(
|
(
|
||||||
SELECT AS STRUCT
|
SELECT AS STRUCT
|
||||||
jsonPayload.fields.* EXCEPT (user_id, device_id, deviceid) REPLACE(
|
jsonPayload.fields.* EXCEPT (deviceid) REPLACE(
|
||||||
|
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
|
||||||
|
TO_HEX(
|
||||||
|
SHA256(COALESCE(jsonPayload.fields.device_id, jsonPayload.fields.deviceid))
|
||||||
|
) AS device_id,
|
||||||
-- See https://bugzilla.mozilla.org/show_bug.cgi?id=1707571
|
-- See https://bugzilla.mozilla.org/show_bug.cgi?id=1707571
|
||||||
CAST(NULL AS FLOAT64) AS emailverified,
|
CAST(NULL AS FLOAT64) AS emailverified,
|
||||||
CAST(NULL AS FLOAT64) AS isprimary,
|
CAST(NULL AS FLOAT64) AS isprimary,
|
||||||
|
@ -13,10 +17,6 @@ SELECT
|
||||||
-- casting id as field type in source tables inconsistent
|
-- casting id as field type in source tables inconsistent
|
||||||
CAST(jsonPayload.fields.id AS STRING) AS id
|
CAST(jsonPayload.fields.id AS STRING) AS id
|
||||||
),
|
),
|
||||||
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
|
|
||||||
TO_HEX(
|
|
||||||
SHA256(COALESCE(jsonPayload.fields.device_id, jsonPayload.fields.deviceid))
|
|
||||||
) AS device_id
|
|
||||||
) AS fields
|
) AS fields
|
||||||
)
|
)
|
||||||
) AS jsonPayload
|
) AS jsonPayload
|
||||||
|
|
|
@ -5,9 +5,10 @@ SELECT
|
||||||
jsonPayload.* REPLACE (
|
jsonPayload.* REPLACE (
|
||||||
(
|
(
|
||||||
SELECT AS STRUCT
|
SELECT AS STRUCT
|
||||||
jsonPayload.fields.* EXCEPT (device_id, user_id),
|
jsonPayload.fields.* REPLACE (
|
||||||
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
|
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
|
||||||
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
|
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
|
||||||
|
)
|
||||||
) AS fields
|
) AS fields
|
||||||
)
|
)
|
||||||
) AS jsonPayload
|
) AS jsonPayload
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
friendly_name: Non-Prod FxA GCP Stderr Events
|
||||||
|
description: |-
|
||||||
|
Staging table containing stderr fxa logs
|
||||||
|
owners:
|
||||||
|
- kik@mozilla.com
|
||||||
|
labels:
|
||||||
|
application: fxa
|
||||||
|
incremental: true
|
||||||
|
schedule: daily
|
||||||
|
dag: bqetl_fxa_events
|
||||||
|
owner1: kik
|
||||||
|
scheduling:
|
||||||
|
dag_name: bqetl_fxa_events
|
||||||
|
start_date: "2023-05-26"
|
||||||
|
arguments:
|
||||||
|
- --schema_update_option=ALLOW_FIELD_ADDITION
|
||||||
|
bigquery:
|
||||||
|
time_partitioning:
|
||||||
|
type: day
|
||||||
|
field: timestamp
|
||||||
|
require_partition_filter: true
|
||||||
|
expiration_days: null
|
||||||
|
clustering:
|
||||||
|
fields:
|
||||||
|
- fxa_server
|
||||||
|
references: {}
|
||||||
|
deprecated: false
|
|
@ -0,0 +1,29 @@
|
||||||
|
SELECT
|
||||||
|
-- example logger expected input: fxa-auth-server
|
||||||
|
SPLIT(jsonPayload.logger, "-")[OFFSET(1)] AS fxa_server,
|
||||||
|
* REPLACE (
|
||||||
|
(
|
||||||
|
SELECT AS STRUCT
|
||||||
|
jsonPayload.* REPLACE (
|
||||||
|
(
|
||||||
|
SELECT AS STRUCT
|
||||||
|
jsonPayload.fields.* REPLACE (
|
||||||
|
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
|
||||||
|
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
|
||||||
|
)
|
||||||
|
) AS fields
|
||||||
|
)
|
||||||
|
) AS jsonPayload
|
||||||
|
)
|
||||||
|
FROM
|
||||||
|
`moz-fx-fxa-nonprod.gke_fxa_stage_log.stderr`
|
||||||
|
WHERE
|
||||||
|
(
|
||||||
|
DATE(_PARTITIONTIME)
|
||||||
|
BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY)
|
||||||
|
AND DATE_ADD(@submission_date, INTERVAL 1 DAY)
|
||||||
|
)
|
||||||
|
AND DATE(`timestamp`) = @submission_date
|
||||||
|
AND jsonPayload.type = 'amplitudeEvent'
|
||||||
|
AND jsonPayload.logger IN ("fxa-auth-server", "fxa-content-server")
|
||||||
|
AND jsonPayload.fields.event_type IS NOT NULL
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,27 @@
|
||||||
|
friendly_name: Non-Prod FxA GCP Stdout Events
|
||||||
|
description: |-
|
||||||
|
Staging table containing stdout fxa logs
|
||||||
|
owners:
|
||||||
|
- kik@mozilla.com
|
||||||
|
labels:
|
||||||
|
application: fxa
|
||||||
|
incremental: true
|
||||||
|
schedule: daily
|
||||||
|
dag: bqetl_fxa_events
|
||||||
|
owner1: kik
|
||||||
|
scheduling:
|
||||||
|
dag_name: bqetl_fxa_events
|
||||||
|
start_date: "2023-05-26"
|
||||||
|
arguments:
|
||||||
|
- --schema_update_option=ALLOW_FIELD_ADDITION
|
||||||
|
bigquery:
|
||||||
|
time_partitioning:
|
||||||
|
type: day
|
||||||
|
field: timestamp
|
||||||
|
require_partition_filter: true
|
||||||
|
expiration_days: null
|
||||||
|
clustering:
|
||||||
|
fields:
|
||||||
|
- fxa_server
|
||||||
|
references: {}
|
||||||
|
deprecated: false
|
|
@ -0,0 +1,29 @@
|
||||||
|
SELECT
|
||||||
|
-- example logger expected input: fxa-auth-server
|
||||||
|
SPLIT(jsonPayload.logger, "-")[OFFSET(1)] AS fxa_server,
|
||||||
|
* REPLACE (
|
||||||
|
(
|
||||||
|
SELECT AS STRUCT
|
||||||
|
jsonPayload.* REPLACE (
|
||||||
|
(
|
||||||
|
SELECT AS STRUCT
|
||||||
|
jsonPayload.fields.* REPLACE (
|
||||||
|
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
|
||||||
|
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
|
||||||
|
)
|
||||||
|
) AS fields
|
||||||
|
)
|
||||||
|
) AS jsonPayload
|
||||||
|
)
|
||||||
|
FROM
|
||||||
|
`moz-fx-fxa-nonprod.gke_fxa_stage_log.stdout`
|
||||||
|
WHERE
|
||||||
|
(
|
||||||
|
DATE(_PARTITIONTIME)
|
||||||
|
BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY)
|
||||||
|
AND DATE_ADD(@submission_date, INTERVAL 1 DAY)
|
||||||
|
)
|
||||||
|
AND DATE(`timestamp`) = @submission_date
|
||||||
|
AND jsonPayload.type = 'amplitudeEvent'
|
||||||
|
AND jsonPayload.logger IN ("fxa-auth-server", "fxa-content-server")
|
||||||
|
AND jsonPayload.fields.event_type IS NOT NULL
|
|
@ -297,6 +297,27 @@ fields:
|
||||||
- name: assertion
|
- name: assertion
|
||||||
type: STRING
|
type: STRING
|
||||||
mode: NULLABLE
|
mode: NULLABLE
|
||||||
|
- name: document_type
|
||||||
|
type: STRING
|
||||||
|
mode: NULLABLE
|
||||||
|
- name: ip_address
|
||||||
|
type: STRING
|
||||||
|
mode: NULLABLE
|
||||||
|
- name: user_agent
|
||||||
|
type: STRING
|
||||||
|
mode: NULLABLE
|
||||||
|
- name: document_id
|
||||||
|
type: STRING
|
||||||
|
mode: NULLABLE
|
||||||
|
- name: payload
|
||||||
|
type: STRING
|
||||||
|
mode: NULLABLE
|
||||||
|
- name: document_namespace
|
||||||
|
type: STRING
|
||||||
|
mode: NULLABLE
|
||||||
|
- name: document_version
|
||||||
|
type: STRING
|
||||||
|
mode: NULLABLE
|
||||||
- name: user_id
|
- name: user_id
|
||||||
type: STRING
|
type: STRING
|
||||||
mode: NULLABLE
|
mode: NULLABLE
|
|
@ -1,35 +0,0 @@
|
||||||
friendly_name: FxA Server Events (nonprod)
|
|
||||||
description: |
|
|
||||||
FxA server events extracted from accounts server stdout logs (auth, content servers)
|
|
||||||
|
|
||||||
This new table is the direct result of FxA migration from AWS to GCP
|
|
||||||
(see: OPST-296 for more context).
|
|
||||||
|
|
||||||
Effective 2023-05-26 the events from the following servers land in this table
|
|
||||||
(existing data will remain in those tables):
|
|
||||||
- auth (previous table: `nonprod_fxa_auth_events_v1`)
|
|
||||||
- content (previous table: `nonprod_fxa_content_events_v1`)
|
|
||||||
|
|
||||||
Payment server events will continue landing inside: `nonprod_fxa_stdout_events_v1`
|
|
||||||
owners:
|
|
||||||
- kik@mozilla.com
|
|
||||||
labels:
|
|
||||||
application: fxa
|
|
||||||
incremental: true
|
|
||||||
schedule: daily
|
|
||||||
owner1: kik
|
|
||||||
scheduling:
|
|
||||||
dag_name: bqetl_fxa_events
|
|
||||||
start_date: '2023-05-26'
|
|
||||||
arguments:
|
|
||||||
- --schema_update_option=ALLOW_FIELD_ADDITION
|
|
||||||
bigquery:
|
|
||||||
time_partitioning:
|
|
||||||
type: day
|
|
||||||
field: timestamp
|
|
||||||
require_partition_filter: true
|
|
||||||
expiration_days: null
|
|
||||||
clustering:
|
|
||||||
fields:
|
|
||||||
- fxa_server
|
|
||||||
references: {}
|
|
|
@ -1,22 +0,0 @@
|
||||||
SELECT
|
|
||||||
SPLIT(jsonPayload.logger, "-")[OFFSET(1)] AS fxa_server,
|
|
||||||
* REPLACE (
|
|
||||||
(
|
|
||||||
SELECT AS STRUCT
|
|
||||||
jsonPayload.* REPLACE (
|
|
||||||
(
|
|
||||||
SELECT AS STRUCT
|
|
||||||
jsonPayload.fields.* EXCEPT (device_id, user_id),
|
|
||||||
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
|
|
||||||
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
|
|
||||||
) AS fields
|
|
||||||
)
|
|
||||||
) AS jsonPayload
|
|
||||||
),
|
|
||||||
FROM
|
|
||||||
`moz-fx-fxa-nonprod.gke_fxa_stage_log.stdout`
|
|
||||||
WHERE
|
|
||||||
jsonPayload.type = 'amplitudeEvent'
|
|
||||||
AND jsonPayload.logger IS NOT NULL
|
|
||||||
AND jsonPayload.fields.event_type IS NOT NULL
|
|
||||||
AND DATE(`timestamp`) = @submission_date
|
|
|
@ -5,9 +5,10 @@ SELECT
|
||||||
jsonPayload.* REPLACE (
|
jsonPayload.* REPLACE (
|
||||||
(
|
(
|
||||||
SELECT AS STRUCT
|
SELECT AS STRUCT
|
||||||
jsonPayload.fields.* EXCEPT (device_id, user_id),
|
jsonPayload.fields.* REPLACE (
|
||||||
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
|
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
|
||||||
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
|
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
|
||||||
|
)
|
||||||
) AS fields
|
) AS fields
|
||||||
)
|
)
|
||||||
) AS jsonPayload
|
) AS jsonPayload
|
||||||
|
|
Загрузка…
Ссылка в новой задаче