DENG-722 Handle events from FxA services migrated to new GCP environment (#4288)

Co-authored-by: kik-kik <kignasiak@mozilla.com>
Co-authored-by: Sean Rose <1994030+sean-rose@users.noreply.github.com>
This commit is contained in:
akkomar 2023-09-13 20:49:29 +02:00 коммит произвёл GitHub
Родитель 3f79cc5151
Коммит b64aea8d1a
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
12 изменённых файлов: 2080 добавлений и 22 удалений

Просмотреть файл

@ -59,6 +59,8 @@ dry_run:
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_amplitude_user_ids_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_amplitude_user_ids_v1/init.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_stdout_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_gcp_stderr_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_gcp_stdout_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_auth_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_content_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_stdout_events_v1/query.sql

Просмотреть файл

@ -204,6 +204,36 @@ with DAG(
funnel_events_source__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_content_events__v1
)
wait_for_firefox_accounts_derived__fxa_gcp_stderr_events__v1 = ExternalTaskSensor(
task_id="wait_for_firefox_accounts_derived__fxa_gcp_stderr_events__v1",
external_dag_id="bqetl_fxa_events",
external_task_id="firefox_accounts_derived__fxa_gcp_stderr_events__v1",
execution_delta=datetime.timedelta(seconds=5400),
check_existence=True,
mode="reschedule",
allowed_states=ALLOWED_STATES,
failed_states=FAILED_STATES,
pool="DATA_ENG_EXTERNALTASKSENSOR",
)
funnel_events_source__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_gcp_stderr_events__v1
)
wait_for_firefox_accounts_derived__fxa_gcp_stdout_events__v1 = ExternalTaskSensor(
task_id="wait_for_firefox_accounts_derived__fxa_gcp_stdout_events__v1",
external_dag_id="bqetl_fxa_events",
external_task_id="firefox_accounts_derived__fxa_gcp_stdout_events__v1",
execution_delta=datetime.timedelta(seconds=5400),
check_existence=True,
mode="reschedule",
allowed_states=ALLOWED_STATES,
failed_states=FAILED_STATES,
pool="DATA_ENG_EXTERNALTASKSENSOR",
)
funnel_events_source__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_gcp_stdout_events__v1
)
wait_for_firefox_accounts_derived__fxa_stdout_events__v1 = ExternalTaskSensor(
task_id="wait_for_firefox_accounts_derived__fxa_stdout_events__v1",
external_dag_id="bqetl_fxa_events",

Просмотреть файл

@ -174,6 +174,74 @@ with DAG(
depends_on_past=False,
)
firefox_accounts_derived__fxa_gcp_stderr_events__v1 = bigquery_etl_query(
task_id="firefox_accounts_derived__fxa_gcp_stderr_events__v1",
destination_table="fxa_gcp_stderr_events_v1",
dataset_id="firefox_accounts_derived",
project_id="moz-fx-data-shared-prod",
owner="kik@mozilla.com",
email=["dthorn@mozilla.com", "kik@mozilla.com", "telemetry-alerts@mozilla.com"],
start_date=datetime.datetime(2023, 9, 7, 0, 0),
date_partition_parameter="submission_date",
depends_on_past=False,
arguments=["--schema_update_option=ALLOW_FIELD_ADDITION"],
)
with TaskGroup(
"firefox_accounts_derived__fxa_gcp_stderr_events__v1_external"
) as firefox_accounts_derived__fxa_gcp_stderr_events__v1_external:
ExternalTaskMarker(
task_id="bqetl_subplat__wait_for_firefox_accounts_derived__fxa_gcp_stderr_events__v1",
external_dag_id="bqetl_subplat",
external_task_id="wait_for_firefox_accounts_derived__fxa_gcp_stderr_events__v1",
execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=85500)).isoformat() }}",
)
ExternalTaskMarker(
task_id="bqetl_event_rollup__wait_for_firefox_accounts_derived__fxa_gcp_stderr_events__v1",
external_dag_id="bqetl_event_rollup",
external_task_id="wait_for_firefox_accounts_derived__fxa_gcp_stderr_events__v1",
execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=81000)).isoformat() }}",
)
firefox_accounts_derived__fxa_gcp_stderr_events__v1_external.set_upstream(
firefox_accounts_derived__fxa_gcp_stderr_events__v1
)
firefox_accounts_derived__fxa_gcp_stdout_events__v1 = bigquery_etl_query(
task_id="firefox_accounts_derived__fxa_gcp_stdout_events__v1",
destination_table="fxa_gcp_stdout_events_v1",
dataset_id="firefox_accounts_derived",
project_id="moz-fx-data-shared-prod",
owner="kik@mozilla.com",
email=["dthorn@mozilla.com", "kik@mozilla.com", "telemetry-alerts@mozilla.com"],
start_date=datetime.datetime(2023, 9, 7, 0, 0),
date_partition_parameter="submission_date",
depends_on_past=False,
arguments=["--schema_update_option=ALLOW_FIELD_ADDITION"],
)
with TaskGroup(
"firefox_accounts_derived__fxa_gcp_stdout_events__v1_external"
) as firefox_accounts_derived__fxa_gcp_stdout_events__v1_external:
ExternalTaskMarker(
task_id="bqetl_subplat__wait_for_firefox_accounts_derived__fxa_gcp_stdout_events__v1",
external_dag_id="bqetl_subplat",
external_task_id="wait_for_firefox_accounts_derived__fxa_gcp_stdout_events__v1",
execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=85500)).isoformat() }}",
)
ExternalTaskMarker(
task_id="bqetl_event_rollup__wait_for_firefox_accounts_derived__fxa_gcp_stdout_events__v1",
external_dag_id="bqetl_event_rollup",
external_task_id="wait_for_firefox_accounts_derived__fxa_gcp_stdout_events__v1",
execution_date="{{ (execution_date - macros.timedelta(days=-1, seconds=81000)).isoformat() }}",
)
firefox_accounts_derived__fxa_gcp_stdout_events__v1_external.set_upstream(
firefox_accounts_derived__fxa_gcp_stdout_events__v1
)
firefox_accounts_derived__fxa_log_auth_events__v1 = bigquery_etl_query(
task_id="firefox_accounts_derived__fxa_log_auth_events__v1",
destination_table="fxa_log_auth_events_v1",
@ -422,6 +490,14 @@ with DAG(
firefox_accounts_derived__fxa_content_events__v1
)
firefox_accounts_derived__fxa_users_daily__v1.set_upstream(
firefox_accounts_derived__fxa_gcp_stderr_events__v1
)
firefox_accounts_derived__fxa_users_daily__v1.set_upstream(
firefox_accounts_derived__fxa_gcp_stdout_events__v1
)
firefox_accounts_derived__fxa_users_daily__v1.set_upstream(
firefox_accounts_derived__fxa_stdout_events__v1
)
@ -434,6 +510,14 @@ with DAG(
firefox_accounts_derived__fxa_content_events__v1
)
firefox_accounts_derived__fxa_users_first_seen__v1.set_upstream(
firefox_accounts_derived__fxa_gcp_stderr_events__v1
)
firefox_accounts_derived__fxa_users_first_seen__v1.set_upstream(
firefox_accounts_derived__fxa_gcp_stdout_events__v1
)
firefox_accounts_derived__fxa_users_first_seen__v1.set_upstream(
firefox_accounts_derived__fxa_stdout_events__v1
)
@ -450,6 +534,14 @@ with DAG(
firefox_accounts_derived__fxa_content_events__v1
)
firefox_accounts_derived__fxa_users_services_daily__v1.set_upstream(
firefox_accounts_derived__fxa_gcp_stderr_events__v1
)
firefox_accounts_derived__fxa_users_services_daily__v1.set_upstream(
firefox_accounts_derived__fxa_gcp_stdout_events__v1
)
firefox_accounts_derived__fxa_users_services_daily__v1.set_upstream(
firefox_accounts_derived__fxa_stdout_events__v1
)
@ -462,6 +554,14 @@ with DAG(
firefox_accounts_derived__fxa_content_events__v1
)
firefox_accounts_derived__fxa_users_services_daily__v2.set_upstream(
firefox_accounts_derived__fxa_gcp_stderr_events__v1
)
firefox_accounts_derived__fxa_users_services_daily__v2.set_upstream(
firefox_accounts_derived__fxa_gcp_stdout_events__v1
)
firefox_accounts_derived__fxa_users_services_daily__v2.set_upstream(
firefox_accounts_derived__fxa_stdout_events__v1
)
@ -474,6 +574,14 @@ with DAG(
firefox_accounts_derived__fxa_content_events__v1
)
firefox_accounts_derived__fxa_users_services_devices_daily__v1.set_upstream(
firefox_accounts_derived__fxa_gcp_stderr_events__v1
)
firefox_accounts_derived__fxa_users_services_devices_daily__v1.set_upstream(
firefox_accounts_derived__fxa_gcp_stdout_events__v1
)
firefox_accounts_derived__fxa_users_services_devices_daily__v1.set_upstream(
firefox_accounts_derived__fxa_stdout_events__v1
)

Просмотреть файл

@ -1224,6 +1224,36 @@ with DAG(
cjms_bigquery__flows__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_content_events__v1
)
wait_for_firefox_accounts_derived__fxa_gcp_stderr_events__v1 = ExternalTaskSensor(
task_id="wait_for_firefox_accounts_derived__fxa_gcp_stderr_events__v1",
external_dag_id="bqetl_fxa_events",
external_task_id="firefox_accounts_derived__fxa_gcp_stderr_events__v1",
execution_delta=datetime.timedelta(seconds=900),
check_existence=True,
mode="reschedule",
allowed_states=ALLOWED_STATES,
failed_states=FAILED_STATES,
pool="DATA_ENG_EXTERNALTASKSENSOR",
)
cjms_bigquery__flows__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_gcp_stderr_events__v1
)
wait_for_firefox_accounts_derived__fxa_gcp_stdout_events__v1 = ExternalTaskSensor(
task_id="wait_for_firefox_accounts_derived__fxa_gcp_stdout_events__v1",
external_dag_id="bqetl_fxa_events",
external_task_id="firefox_accounts_derived__fxa_gcp_stdout_events__v1",
execution_delta=datetime.timedelta(seconds=900),
check_existence=True,
mode="reschedule",
allowed_states=ALLOWED_STATES,
failed_states=FAILED_STATES,
pool="DATA_ENG_EXTERNALTASKSENSOR",
)
cjms_bigquery__flows__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_gcp_stdout_events__v1
)
wait_for_firefox_accounts_derived__fxa_stdout_events__v1 = ExternalTaskSensor(
task_id="wait_for_firefox_accounts_derived__fxa_stdout_events__v1",
external_dag_id="bqetl_fxa_events",
@ -1352,6 +1382,12 @@ with DAG(
mozilla_vpn_derived__funnel_product_page_to_subscribed__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_content_events__v1
)
mozilla_vpn_derived__funnel_product_page_to_subscribed__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_gcp_stderr_events__v1
)
mozilla_vpn_derived__funnel_product_page_to_subscribed__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_gcp_stdout_events__v1
)
mozilla_vpn_derived__funnel_product_page_to_subscribed__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_stdout_events__v1
)
@ -1370,6 +1406,12 @@ with DAG(
mozilla_vpn_derived__fxa_attribution__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_content_events__v1
)
mozilla_vpn_derived__fxa_attribution__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_gcp_stderr_events__v1
)
mozilla_vpn_derived__fxa_attribution__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_gcp_stdout_events__v1
)
mozilla_vpn_derived__fxa_attribution__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_stdout_events__v1
)
@ -1384,6 +1426,12 @@ with DAG(
mozilla_vpn_derived__login_flows__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_content_events__v1
)
mozilla_vpn_derived__login_flows__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_gcp_stderr_events__v1
)
mozilla_vpn_derived__login_flows__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_gcp_stdout_events__v1
)
mozilla_vpn_derived__login_flows__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_stdout_events__v1
)
@ -1720,6 +1768,12 @@ with DAG(
subscription_platform_derived__subplat_flow_events__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_content_events__v1
)
subscription_platform_derived__subplat_flow_events__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_gcp_stderr_events__v1
)
subscription_platform_derived__subplat_flow_events__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_gcp_stdout_events__v1
)
subscription_platform_derived__subplat_flow_events__v1.set_upstream(
wait_for_firefox_accounts_derived__fxa_stdout_events__v1
)

Просмотреть файл

@ -6,8 +6,9 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.firefox_accounts.fxa_all_events`
AS
WITH fxa_auth_events AS (
WITH auth_events AS (
SELECT
"auth" AS fxa_server,
`timestamp`,
receiveTimestamp,
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(jsonPayload.fields.time AS INT64)) AS event_time,
@ -25,11 +26,15 @@ WITH fxa_auth_events AS (
jsonPayload.fields.device_id,
FROM
`moz-fx-data-shared-prod.firefox_accounts_derived.fxa_auth_events_v1`
-- TODO: add a cut off date once AWS to GCP migration is complete.
),
-- This table doesn't include any user events that are considered "active",
-- but should always be included for a complete raw event log.
fxa_auth_bounce_events AS (
auth_bounce_events AS (
SELECT
-- TODO: once no longer aliasing to fxa_log in the final part of the query,
-- we should change this label to "auth"
"auth_bounce" AS fxa_server,
`timestamp`,
receiveTimestamp,
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(jsonPayload.fields.time AS INT64)) AS event_time,
@ -50,8 +55,9 @@ fxa_auth_bounce_events AS (
FROM
`moz-fx-data-shared-prod.firefox_accounts_derived.fxa_auth_bounce_events_v1`
),
fxa_content_events AS (
content_events AS (
SELECT
"content" AS fxa_server,
`timestamp`,
receiveTimestamp,
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(jsonPayload.fields.time AS INT64)) AS event_time,
@ -69,10 +75,12 @@ fxa_content_events AS (
jsonPayload.fields.device_id,
FROM
`moz-fx-data-shared-prod.firefox_accounts_derived.fxa_content_events_v1`
-- TODO: add a cut off date once AWS to GCP migration is complete.
),
-- oauth events, see the note on top
fxa_oauth_events AS (
oauth_events AS (
SELECT
"oauth" AS fxa_server,
`timestamp`,
receiveTimestamp,
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(jsonPayload.fields.time AS INT64)) AS event_time,
@ -90,9 +98,13 @@ fxa_oauth_events AS (
CAST(NULL AS STRING) AS device_id,
FROM
`moz-fx-data-shared-prod.firefox_accounts_derived.fxa_oauth_events_v1`
-- TODO: add a cut off date once AWS to GCP migration is complete.
),
fxa_stdout_events AS (
stdout_events AS (
SELECT
-- TODO: once no longer aliasing to fxa_log in the final part of the query,
-- we should change this label to "payments"
"stdout" AS fxa_server,
`timestamp`,
receiveTimestamp,
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(jsonPayload.fields.time AS INT64)) AS event_time,
@ -110,45 +122,104 @@ fxa_stdout_events AS (
jsonPayload.fields.device_id,
FROM
`moz-fx-data-shared-prod.firefox_accounts_derived.fxa_stdout_events_v1`
-- TODO: add a cut off date once AWS to GCP migration is complete.
),
-- stdout table that contains events from services migrated to new GCP environment
gcp_stdout_events AS (
SELECT
fxa_server,
`timestamp`,
receiveTimestamp,
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(jsonPayload.fields.time AS INT64)) AS event_time,
jsonPayload.fields.user_id,
jsonPayload.fields.country,
JSON_VALUE(jsonPayload.fields.event_properties, "$.country_code") AS country_code,
jsonPayload.fields.language,
jsonPayload.fields.app_version,
jsonPayload.fields.os_name,
jsonPayload.fields.os_version,
jsonPayload.fields.event_type,
jsonPayload.logger,
jsonPayload.fields.user_properties,
jsonPayload.fields.event_properties,
jsonPayload.fields.device_id,
FROM
`moz-fx-data-shared-prod.firefox_accounts_derived.fxa_gcp_stdout_events_v1`
WHERE
-- this is when traffic switch over started, all prior dates contain test data.
-- see: DENG-1035 for more info.
DATE(`timestamp`) >= "2023-09-07"
),
-- stderr table that contains events from services migrated to new GCP environment
gcp_stderr_events AS (
SELECT
fxa_server,
`timestamp`,
receiveTimestamp,
SAFE.TIMESTAMP_MILLIS(SAFE_CAST(jsonPayload.fields.time AS INT64)) AS event_time,
jsonPayload.fields.user_id,
jsonPayload.fields.country,
JSON_VALUE(jsonPayload.fields.event_properties, "$.country_code") AS country_code,
jsonPayload.fields.language,
jsonPayload.fields.app_version,
jsonPayload.fields.os_name,
jsonPayload.fields.os_version,
jsonPayload.fields.event_type,
jsonPayload.logger,
jsonPayload.fields.user_properties,
jsonPayload.fields.event_properties,
jsonPayload.fields.device_id,
FROM
`moz-fx-data-shared-prod.firefox_accounts_derived.fxa_gcp_stderr_events_v1`
WHERE
-- this is when traffic switch over started, all prior dates contain test data.
-- see: DENG-1035 for more info.
DATE(`timestamp`) >= "2023-09-07"
),
unioned AS (
SELECT
*,
'auth' AS fxa_log,
*
FROM
fxa_auth_events
auth_events
UNION ALL
SELECT
*,
'auth_bounce' AS fxa_log,
*
FROM
fxa_auth_bounce_events
auth_bounce_events
UNION ALL
SELECT
*,
'content' AS fxa_log,
*
FROM
fxa_content_events
content_events
UNION ALL
-- oauth events, see the note on top
SELECT
*,
'oauth' AS fxa_log,
*
FROM
fxa_oauth_events
oauth_events
UNION ALL
SELECT
*,
'stdout' AS fxa_log,
*
FROM
fxa_stdout_events
stdout_events
UNION ALL
SELECT
*
FROM
gcp_stdout_events
UNION ALL
SELECT
*
FROM
gcp_stderr_events
)
SELECT
-- TODO: remove this aliasing, however, this will require changes downstream why broken down into multiple changes / PRs
fxa_server AS fxa_log,
`timestamp`,
receiveTimestamp,
event_time,
logger,
fxa_log,
event_type,
user_id,
device_id,

Просмотреть файл

@ -0,0 +1,37 @@
friendly_name: FxA GCP stderr events (prod)
description: |
FxA server events extracted from stderr logs.
This new table is the direct result of FxA migration from AWS to GCP
(see: OPST-296 for more context).
Traffic shifting from AWS to GCP started on 2023-09-07 and finished on 2023-09-12 and this table contains partial
event data for these days. Once the migration is complete all events from the fxa auth server will land in this table
(previous table: `fxa_auth_events_v1`).
Note that at the time of writing there is an open ticket (https://mozilla-hub.atlassian.net/browse/FXA-8315)
to standardize logging across fxa packages. If resolved, auth server events might land in `fxa_gcp_stdout_events_v1` instead.
This should be transparent to users of this data since it should be consumed via `fxa_all_events` view.
Payment server events will continue landing inside: `fxa_stdout_events_v1`
owners:
- kik@mozilla.com
labels:
application: fxa
incremental: true
schedule: daily
owner1: kik
dag: bqetl_fxa_events
scheduling:
dag_name: bqetl_fxa_events
start_date: '2023-09-07'
arguments:
- --schema_update_option=ALLOW_FIELD_ADDITION
bigquery:
time_partitioning:
type: day
field: timestamp
require_partition_filter: true
expiration_days: null
clustering:
fields:
- fxa_server
references: {}

Просмотреть файл

@ -0,0 +1,62 @@
SELECT
-- example logger expected input: fxa-auth-server
SPLIT(jsonPayload.logger, "-")[OFFSET(1)] AS fxa_server,
* REPLACE (
(
SELECT AS STRUCT
jsonPayload.* REPLACE (
(
SELECT AS STRUCT
jsonPayload.fields.* REPLACE (
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
)
) AS fields
)
) AS jsonPayload
),
FROM
`moz-fx-fxa-prod.gke_fxa_prod_log.stderr`
WHERE
(
DATE(_PARTITIONTIME)
BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY)
AND DATE_ADD(@submission_date, INTERVAL 1 DAY)
)
AND DATE(`timestamp`) = @submission_date
AND jsonPayload.type = 'amplitudeEvent'
-- We expect to only see events from fxa-auth-server and fxa-content-server here
-- Although at time of writing they are split across `stdout` and `stderr` logs, there is an open issue to standardize
-- this: https://mozilla-hub.atlassian.net/browse/FXA-8315
-- Filtering for both here will ensure we don't miss any events if logger output is changed in the future
AND jsonPayload.logger IN ("fxa-auth-server", "fxa-content-server")
AND jsonPayload.fields.event_type IS NOT NULL
-- The following condition lets through all non-auth-server events, and auth-server events that are not
-- coming from high-volume oauth client IDs that are redundant. It is copied for compatibility from:
-- https://github.com/mozilla/bigquery-etl/blob/8a97f747949dc87d9b5425d82776b2c1626aca2e/sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_auth_events_v1/query.sql#L43-L62
AND (
jsonPayload.logger != 'fxa-auth-server'
OR (
-- We filter out events associated with high-volume oauth client IDs that
-- are redundant with cert_signed events;
-- see https://github.com/mozilla/bigquery-etl/issues/348
JSON_VALUE(jsonPayload.fields.event_properties, '$.oauth_client_id') NOT IN (
'3332a18d142636cb', -- fennec sync
'5882386c6d801776', -- desktop sync
'1b1a3e44c54fbb58' -- ios sync
)
-- We do want to let through some desktop sync events
-- see https://github.com/mozilla/bigquery-etl/issues/573
OR (
JSON_VALUE(jsonPayload.fields.event_properties, '$.oauth_client_id') IN (
'5882386c6d801776',
'1b1a3e44c54fbb58'
)
AND jsonPayload.fields.event_type NOT IN (
'fxa_activity - access_token_checked',
'fxa_activity - access_token_created'
)
)
OR JSON_VALUE(jsonPayload.fields.event_properties, '$.oauth_client_id') IS NULL
)
)

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,35 @@
friendly_name: FxA GCP stdout events (prod)
description: |
FxA server events extracted from stdout logs.
This new table is the direct result of FxA migration from AWS to GCP
(see: OPST-296 for more context).
Traffic shifting from AWS to GCP started on 2023-09-07 and this table contains partial event data starting from this date.
Once the migration is complete all events from the fxa content server will land in this table
(previous table: `fxa_content_events_v1`).
See also `fxa_gcp_stderr_events_v1`.
Payment server events will continue landing inside: `fxa_stdout_events_v1`
owners:
- kik@mozilla.com
labels:
application: fxa
incremental: true
schedule: daily
owner1: kik
dag: bqetl_fxa_events
scheduling:
dag_name: bqetl_fxa_events
start_date: '2023-09-07'
arguments:
- --schema_update_option=ALLOW_FIELD_ADDITION
bigquery:
time_partitioning:
type: day
field: timestamp
require_partition_filter: true
expiration_days: null
clustering:
fields:
- fxa_server
references: {}

Просмотреть файл

@ -0,0 +1,62 @@
SELECT
-- example logger expected input: fxa-auth-server
SPLIT(jsonPayload.logger, "-")[OFFSET(1)] AS fxa_server,
* REPLACE (
(
SELECT AS STRUCT
jsonPayload.* REPLACE (
(
SELECT AS STRUCT
jsonPayload.fields.* REPLACE (
TO_HEX(SHA256(jsonPayload.fields.user_id)) AS user_id,
TO_HEX(SHA256(jsonPayload.fields.device_id)) AS device_id
)
) AS fields
)
) AS jsonPayload
),
FROM
`moz-fx-fxa-prod.gke_fxa_prod_log.stdout`
WHERE
(
DATE(_PARTITIONTIME)
BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY)
AND DATE_ADD(@submission_date, INTERVAL 1 DAY)
)
AND DATE(`timestamp`) = @submission_date
AND jsonPayload.type = 'amplitudeEvent'
-- We expect to only see events from fxa-auth-server and fxa-content-server here
-- Although at time of writing they are split across `stdout` and `stderr` logs, there is an open issue to standardize
-- this: https://mozilla-hub.atlassian.net/browse/FXA-8315
-- Filtering for both here will ensure we don't miss any events if logger output is changed in the future
AND jsonPayload.logger IN ("fxa-auth-server", "fxa-content-server")
AND jsonPayload.fields.event_type IS NOT NULL
-- The following condition lets through all non-auth-server events, and auth-server events that are not
-- coming from high-volume oauth client IDs that are redundant. It is copied for compatibility from:
-- https://github.com/mozilla/bigquery-etl/blob/8a97f747949dc87d9b5425d82776b2c1626aca2e/sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_auth_events_v1/query.sql#L43-L62
AND (
jsonPayload.logger != 'fxa-auth-server'
OR (
-- We filter out events associated with high-volume oauth client IDs that
-- are redundant with cert_signed events;
-- see https://github.com/mozilla/bigquery-etl/issues/348
JSON_VALUE(jsonPayload.fields.event_properties, '$.oauth_client_id') NOT IN (
'3332a18d142636cb', -- fennec sync
'5882386c6d801776', -- desktop sync
'1b1a3e44c54fbb58' -- ios sync
)
-- We do want to let through some desktop sync events
-- see https://github.com/mozilla/bigquery-etl/issues/573
OR (
JSON_VALUE(jsonPayload.fields.event_properties, '$.oauth_client_id') IN (
'5882386c6d801776',
'1b1a3e44c54fbb58'
)
AND jsonPayload.fields.event_type NOT IN (
'fxa_activity - access_token_checked',
'fxa_activity - access_token_created'
)
)
OR JSON_VALUE(jsonPayload.fields.event_properties, '$.oauth_client_id') IS NULL
)
)

Просмотреть файл

@ -0,0 +1,463 @@
fields:
- name: fxa_server
type: STRING
mode: NULLABLE
- name: logName
type: STRING
mode: NULLABLE
- name: resource
type: RECORD
mode: NULLABLE
fields:
- name: type
type: STRING
mode: NULLABLE
- name: labels
type: RECORD
mode: NULLABLE
fields:
- name: project_id
type: STRING
mode: NULLABLE
- name: pod_name
type: STRING
mode: NULLABLE
- name: container_name
type: STRING
mode: NULLABLE
- name: cluster_name
type: STRING
mode: NULLABLE
- name: namespace_name
type: STRING
mode: NULLABLE
- name: location
type: STRING
mode: NULLABLE
- name: textPayload
type: STRING
mode: NULLABLE
- name: jsonPayload
type: RECORD
mode: NULLABLE
fields:
- name: log_type
type: STRING
mode: NULLABLE
- name: request
type: STRING
mode: NULLABLE
- name: request_time
type: FLOAT
mode: NULLABLE
- name: x_forwarded_for
type: STRING
mode: NULLABLE
- name: bytes_sent
type: FLOAT
mode: NULLABLE
- name: user_agent
type: STRING
mode: NULLABLE
- name: x_forwarded_proto
type: STRING
mode: NULLABLE
- name: trace
type: STRING
mode: NULLABLE
- name: remote_user
type: STRING
mode: NULLABLE
- name: remote_addr
type: STRING
mode: NULLABLE
- name: referrer
type: STRING
mode: NULLABLE
- name: status
type: STRING
mode: NULLABLE
- name: logger
type: STRING
mode: NULLABLE
- name: pid
type: FLOAT
mode: NULLABLE
- name: timestamp
type: FLOAT
mode: NULLABLE
- name: fields
type: RECORD
mode: NULLABLE
fields:
- name: remoteaddresschain
type: STRING
mode: NULLABLE
- name: t
type: STRING
mode: NULLABLE
- name: method
type: STRING
mode: NULLABLE
- name: status
type: STRING
mode: NULLABLE
- name: useragent
type: STRING
mode: NULLABLE
- name: contentlength
type: STRING
mode: NULLABLE
- name: clientaddress
type: STRING
mode: NULLABLE
- name: path
type: STRING
mode: NULLABLE
- name: errno
type: FLOAT
mode: NULLABLE
- name: code
type: FLOAT
mode: NULLABLE
- name: agent
type: STRING
mode: NULLABLE
- name: client_id
type: STRING
mode: NULLABLE
- name: uid
type: STRING
mode: NULLABLE
- name: referer
type: STRING
mode: NULLABLE
- name: message
type: STRING
mode: NULLABLE
- name: event
type: STRING
mode: NULLABLE
- name: time
type: FLOAT
mode: NULLABLE
- name: event_type
type: STRING
mode: NULLABLE
- name: region
type: STRING
mode: NULLABLE
- name: country
type: STRING
mode: NULLABLE
- name: session_id
type: FLOAT
mode: NULLABLE
- name: language
type: STRING
mode: NULLABLE
- name: app_version
type: STRING
mode: NULLABLE
- name: user_properties
type: STRING
mode: NULLABLE
- name: event_properties
type: STRING
mode: NULLABLE
- name: device_id
type: STRING
mode: NULLABLE
- name: op
type: STRING
mode: NULLABLE
- name: os_name
type: STRING
mode: NULLABLE
- name: err
type: STRING
mode: NULLABLE
- name: joierrors
type: STRING
mode: NULLABLE
- name: stack
type: STRING
mode: NULLABLE
- name: reason
type: STRING
mode: NULLABLE
- name: result
type: STRING
mode: NULLABLE
- name: api
type: FLOAT
mode: NULLABLE
- name: os_version
type: STRING
mode: NULLABLE
- name: user_id
type: STRING
mode: NULLABLE
- name: msg
type: STRING
mode: NULLABLE
- name: error
type: STRING
mode: NULLABLE
- name: source
type: STRING
mode: NULLABLE
- name: blocked
type: STRING
mode: NULLABLE
- name: column
type: FLOAT
mode: NULLABLE
- name: line
type: FLOAT
mode: NULLABLE
- name: referrer
type: STRING
mode: NULLABLE
- name: violated
type: STRING
mode: NULLABLE
- name: url
type: STRING
mode: NULLABLE
- name: version
type: STRING
mode: NULLABLE
- name: directory
type: STRING
mode: NULLABLE
- name: port
type: FLOAT
mode: NULLABLE
- name: poolstats
type: STRING
mode: NULLABLE
- name: signal
type: STRING
mode: NULLABLE
- name: reqtime
type: FLOAT
mode: NULLABLE
- name: rp
type: STRING
mode: NULLABLE
- name: assertion_verification_time
type: FLOAT
mode: NULLABLE
- name: trustedissuers
type: STRING
mode: NULLABLE
- name: assertion
type: STRING
mode: NULLABLE
- name: host
type: STRING
mode: NULLABLE
- name: header
type: STRING
mode: NULLABLE
- name: usergroupheader
type: STRING
mode: NULLABLE
- name: usergroupheader_notnull
type: STRING
mode: NULLABLE
- name: user
type: STRING
mode: NULLABLE
- name: email
type: STRING
mode: NULLABLE
- name: search_type
type: STRING
mode: NULLABLE
- name: auto_completed
type: BOOLEAN
mode: NULLABLE
- name: payload
type: STRING
mode: NULLABLE
- name: user_agent
type: STRING
mode: NULLABLE
- name: ip_address
type: STRING
mode: NULLABLE
- name: document_version
type: STRING
mode: NULLABLE
- name: document_namespace
type: STRING
mode: NULLABLE
- name: document_type
type: STRING
mode: NULLABLE
- name: document_id
type: STRING
mode: NULLABLE
- name: originaltransactionid
type: STRING
mode: NULLABLE
- name: config
type: STRING
mode: NULLABLE
- name: duration
type: FLOAT
mode: NULLABLE
- name: info
type: STRING
mode: NULLABLE
- name: detail
type: STRING
mode: NULLABLE
- name: cause
type: STRING
mode: NULLABLE
- name: success
type: FLOAT
mode: NULLABLE
- name: envversion
type: STRING
mode: NULLABLE
- name: type
type: STRING
mode: NULLABLE
- name: severity
type: FLOAT
mode: NULLABLE
- name: timestamp
type: TIMESTAMP
mode: NULLABLE
- name: receiveTimestamp
type: TIMESTAMP
mode: NULLABLE
- name: severity
type: STRING
mode: NULLABLE
- name: insertId
type: STRING
mode: NULLABLE
- name: httpRequest
type: RECORD
mode: NULLABLE
fields:
- name: requestMethod
type: STRING
mode: NULLABLE
- name: requestUrl
type: STRING
mode: NULLABLE
- name: requestSize
type: INTEGER
mode: NULLABLE
- name: status
type: INTEGER
mode: NULLABLE
- name: responseSize
type: INTEGER
mode: NULLABLE
- name: userAgent
type: STRING
mode: NULLABLE
- name: remoteIp
type: STRING
mode: NULLABLE
- name: serverIp
type: STRING
mode: NULLABLE
- name: referer
type: STRING
mode: NULLABLE
- name: cacheLookup
type: BOOLEAN
mode: NULLABLE
- name: cacheHit
type: BOOLEAN
mode: NULLABLE
- name: cacheValidatedWithOriginServer
type: BOOLEAN
mode: NULLABLE
- name: cacheFillBytes
type: INTEGER
mode: NULLABLE
- name: protocol
type: STRING
mode: NULLABLE
- name: labels
type: RECORD
mode: NULLABLE
fields:
- name: k8s_pod_app_kubernetes_io_component
type: STRING
mode: NULLABLE
- name: k8s_pod_deployment
type: STRING
mode: NULLABLE
- name: compute_googleapis_com_resource_name
type: STRING
mode: NULLABLE
- name: k8s_pod_env_code
type: STRING
mode: NULLABLE
- name: k8s_pod_pod_template_hash
type: STRING
mode: NULLABLE
- name: k8s_pod_app_kubernetes_io_name
type: STRING
mode: NULLABLE
- name: operation
type: RECORD
mode: NULLABLE
fields:
- name: id
type: STRING
mode: NULLABLE
- name: producer
type: STRING
mode: NULLABLE
- name: first
type: BOOLEAN
mode: NULLABLE
- name: last
type: BOOLEAN
mode: NULLABLE
- name: trace
type: STRING
mode: NULLABLE
- name: spanId
type: STRING
mode: NULLABLE
- name: traceSampled
type: BOOLEAN
mode: NULLABLE
- name: sourceLocation
type: RECORD
mode: NULLABLE
fields:
- name: file
type: STRING
mode: NULLABLE
- name: line
type: INTEGER
mode: NULLABLE
- name: function
type: STRING
mode: NULLABLE
- name: split
type: RECORD
mode: NULLABLE
fields:
- name: uid
type: STRING
mode: NULLABLE
- name: index
type: INTEGER
mode: NULLABLE
- name: totalSplits
type: INTEGER
mode: NULLABLE

Просмотреть файл

@ -49,7 +49,7 @@ WITH windowed AS (
NOT (event_type = 'fxa_rp - engage' AND service = 'fx-monitor')
) OVER w1 = 0 AS monitor_only
FROM
`moz-fx-data-shared-prod.firefox_accounts.fxa_all_events`
firefox_accounts.fxa_all_events
WHERE
fxa_log IN ('auth', 'auth_bounce', 'content', 'oauth')
AND user_id IS NOT NULL