feat(DENG-1576): introducing fxa_log_device_command_events_v2 to include GCP based logs (#4308)

* introducing fxa_log_device_command_events_v2 to pull relevant logs from GCP log table

* updated the bqetl_fxa_events DAG

* correcting the source table

* added fxa_log_device_command_events_v2 to dry run skip list due to the source table permissions issue and added date filters to incidcate tiemframes for which events are included in both tables

* Update sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_log_device_command_events_v2/query.sql

Co-authored-by: akkomar <akkomar@users.noreply.github.com>

* made changes as suggested by srose in PR#4308

---------

Co-authored-by: akkomar <akkomar@users.noreply.github.com>
This commit is contained in:
kik-kik 2023-09-21 12:06:14 +02:00 коммит произвёл GitHub
Родитель 1cd7c09842
Коммит 15b277c3d5
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
8 изменённых файлов: 161 добавлений и 7 удалений

Просмотреть файл

@ -53,6 +53,7 @@ dry_run:
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_log_auth_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_log_content_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_log_device_command_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_log_device_command_events_v2/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_users_services_first_seen_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_users_services_last_seen_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/fxa_amplitude_export_v1/query.sql

Просмотреть файл

@ -264,13 +264,14 @@ with DAG(
depends_on_past=False,
)
firefox_accounts_derived__fxa_log_device_command_events__v1 = bigquery_etl_query(
task_id="firefox_accounts_derived__fxa_log_device_command_events__v1",
destination_table="fxa_log_device_command_events_v1",
firefox_accounts_derived__fxa_log_device_command_events__v2 = bigquery_etl_query(
task_id="firefox_accounts_derived__fxa_log_device_command_events__v2",
destination_table="fxa_log_device_command_events_v2",
dataset_id="firefox_accounts_derived",
project_id="moz-fx-data-shared-prod",
owner="dthorn@mozilla.com",
email=["dthorn@mozilla.com", "telemetry-alerts@mozilla.com"],
owner="kik@mozilla.com",
email=["dthorn@mozilla.com", "kik@mozilla.com", "telemetry-alerts@mozilla.com"],
start_date=datetime.datetime(2023, 9, 7, 0, 0),
date_partition_parameter="submission_date",
depends_on_past=False,
)

Просмотреть файл

@ -5,3 +5,8 @@ SELECT
*
FROM
`moz-fx-data-shared-prod.firefox_accounts_derived.fxa_log_device_command_events_v1`
UNION ALL
SELECT
*
FROM
`moz-fx-data-shared-prod.firefox_accounts_derived.fxa_log_device_command_events_v2`

Просмотреть файл

@ -2,8 +2,11 @@
friendly_name: FxA Log Device Command Events
description: |-
A subset of FxA auth server logs related to "send tab" activity.
See https://bugzilla.mozilla.org/show_bug.cgi?id=1649074
This table contains event data from the FxA service when it was still running in AWS.
All new events come from GCP deployment of FxA and are available in the v2 of this table.
The migration started on 2023-09-07 and was concluded by 2023-09-15.
owners:
- dthorn@mozilla.com
labels:
@ -11,7 +14,8 @@ labels:
incremental: true
schedule: daily
scheduling:
dag_name: bqetl_fxa_events
# descheduled due to source table change. v2 of the query retrieves the data from the new source.
# dag_name: bqetl_fxa_events
# This query references secret keys that are not available for dry runs,
# so we must explicitly write out dependencies. In this case, the query
# depends only on fxa logs produced via Stackdriver integration, so no other

Просмотреть файл

@ -0,0 +1,34 @@
fields:
- name: timestamp
type: TIMESTAMP
mode: NULLABLE
- name: type
type: STRING
mode: NULLABLE
- name: user_id
type: STRING
mode: NULLABLE
- name: index
type: STRING
mode: NULLABLE
- name: command
type: STRING
mode: NULLABLE
- name: target
type: STRING
mode: NULLABLE
- name: target_os
type: STRING
mode: NULLABLE
- name: target_type
type: STRING
mode: NULLABLE
- name: sender
type: STRING
mode: NULLABLE
- name: sender_os
type: STRING
mode: NULLABLE
- name: sender_type
type: STRING
mode: NULLABLE

Просмотреть файл

@ -0,0 +1,33 @@
---
friendly_name: FxA Log Device Command Events
description: |-
A subset of FxA auth server logs related to "send tab" activity.
See https://bugzilla.mozilla.org/show_bug.cgi?id=1649074
This table contains FxA log device command events from the GCP based FxA deployment.
The migration from AWS to GCP started on 2023-09-07 and concluded by 2023-09-15.
And the v1 version of the table contains event data prior to the migration coming to a conclusion.
owners:
- kik@mozilla.com
labels:
application: fxa
incremental: true
schedule: daily
scheduling:
dag_name: bqetl_fxa_events
# v2 created as the result of the source table changing post AWS to GCP migration.
# The start date represents when the migration started.
start_date: "2023-09-07"
# This query references secret keys that are not available for dry runs,
# so we must explicitly write out dependencies. In this case, the query
# depends only on fxa logs produced via Stackdriver integration, so no other
# scheduled tasks are involved and the referenced_tables list is empty.
referenced_tables: []
bigquery:
time_partitioning:
type: day
field: timestamp
require_partition_filter: false
clustering:
fields:
- command

Просмотреть файл

@ -0,0 +1,42 @@
WITH hmac_key AS (
SELECT
AEAD.DECRYPT_BYTES(
(SELECT keyset FROM `moz-fx-dataops-secrets.airflow_query_keys.fxa_prod`),
ciphertext,
CAST(key_id AS BYTES)
) AS value
FROM
`moz-fx-data-shared-prod.firefox_accounts_derived.encrypted_keys_v1`
WHERE
key_id = 'fxa_hmac_prod'
)
SELECT
`timestamp`,
jsonPayload.type,
TO_HEX(
udf.hmac_sha256((SELECT * FROM hmac_key), CAST(jsonPayload.fields.uid AS BYTES))
) AS user_id,
TO_HEX(
udf.hmac_sha256(
(SELECT * FROM hmac_key),
CAST(FORMAT('%d', CAST(jsonPayload.fields.index AS INT64)) AS BYTES)
)
) AS index,
jsonPayload.fields.command,
TO_HEX(
udf.hmac_sha256((SELECT * FROM hmac_key), CAST(jsonPayload.fields.target AS BYTES))
) AS target,
jsonPayload.fields.targetOS AS target_os,
jsonPayload.fields.targetType AS target_type,
TO_HEX(
udf.hmac_sha256((SELECT * FROM hmac_key), CAST(jsonPayload.fields.sender AS BYTES))
) AS sender,
jsonPayload.fields.senderOS AS sender_os,
jsonPayload.fields.senderType AS sender_type,
FROM
`moz-fx-fxa-prod.gke_fxa_prod_log.stderr`
WHERE
jsonPayload.type LIKE 'device.command.%'
AND DATE(`timestamp`) = @submission_date
-- Only include events after AWS tp GCP migration started.
AND DATE(`timestamp`) >= "2023-09-07"

Просмотреть файл

@ -0,0 +1,34 @@
fields:
- name: timestamp
type: TIMESTAMP
mode: NULLABLE
- name: type
type: STRING
mode: NULLABLE
- name: user_id
type: STRING
mode: NULLABLE
- name: index
type: STRING
mode: NULLABLE
- name: command
type: STRING
mode: NULLABLE
- name: target
type: STRING
mode: NULLABLE
- name: target_os
type: STRING
mode: NULLABLE
- name: target_type
type: STRING
mode: NULLABLE
- name: sender
type: STRING
mode: NULLABLE
- name: sender_os
type: STRING
mode: NULLABLE
- name: sender_type
type: STRING
mode: NULLABLE