feat(DENG-1696): docker fxa admin server sanitized updating after gcp migration (#4400)

* added v2 of docker_fxa_admin_server_events

* updated the view to include the fields needed

* added schema for firefox_accounts_derived/docker_fxa_admin_server_sanitized_v1

* Update sql/moz-fx-data-shared-prod/firefox_accounts/docker_fxa_admin_server_sanitized/view.sql

Co-authored-by: Sean Rose <1994030+sean-rose@users.noreply.github.com>

* added docker_fxa_admin_server_sanitized_v2 to dry_run skip list

---------

Co-authored-by: Sean Rose <1994030+sean-rose@users.noreply.github.com>
This commit is contained in:
kik-kik 2023-10-10 21:23:49 +02:00 коммит произвёл GitHub
Родитель f0b6089b86
Коммит 79de048842
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
9 изменённых файлов: 871 добавлений и 15 удалений

Просмотреть файл

@ -70,6 +70,7 @@ dry_run:
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/nonprod_fxa_gcp_stderr_events_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/docker_fxa_admin_server_sanitized_v1/init.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/docker_fxa_admin_server_sanitized_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/docker_fxa_admin_server_sanitized_v2/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/docker_fxa_customs_sanitized_v1/init.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/docker_fxa_customs_sanitized_v1/query.sql
- sql/moz-fx-data-shared-prod/firefox_accounts_derived/docker_fxa_customs_sanitized_v2/query.sql

Просмотреть файл

@ -54,17 +54,13 @@ with DAG(
doc_md=docs,
tags=tags,
) as dag:
docker_fxa_admin_server_v1 = bigquery_etl_query(
task_id="docker_fxa_admin_server_v1",
destination_table="docker_fxa_admin_server_sanitized_v1",
docker_fxa_admin_server_v2 = bigquery_etl_query(
task_id="docker_fxa_admin_server_v2",
destination_table="docker_fxa_admin_server_sanitized_v2",
dataset_id="firefox_accounts_derived",
project_id="moz-fx-data-shared-prod",
owner="frank@mozilla.com",
email=[
"dthorn@mozilla.com",
"frank@mozilla.com",
"telemetry-alerts@mozilla.com",
],
owner="kik@mozilla.com",
email=["dthorn@mozilla.com", "kik@mozilla.com", "telemetry-alerts@mozilla.com"],
date_partition_parameter="submission_date",
depends_on_past=False,
arguments=["--schema_update_option=ALLOW_FIELD_ADDITION"],

Просмотреть файл

@ -2,6 +2,83 @@ CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.firefox_accounts.docker_fxa_admin_server_sanitized`
AS
SELECT
*
* REPLACE(
STRUCT(
resource.type,
STRUCT(
resource.labels.project_id,
CAST(NULL AS STRING) AS pod_name,
CAST(NULL AS STRING) AS container_name,
CAST(NULL AS STRING) AS cluster_name,
CAST(NULL AS STRING) AS namespace_name,
CAST(NULL AS STRING) AS location,
resource.labels.zone,
resource.labels.instance_id
) AS labels
) AS resource,
STRUCT(
CAST(NULL AS STRING) AS k8s_pod_app_kubernetes_io_component,
CAST(NULL AS STRING) AS k8s_pod_deployment,
labels.compute_googleapis_com_resource_name,
CAST(NULL AS STRING) AS k8s_pod_env_code,
CAST(NULL AS STRING) AS k8s_pod_pod_template_hash,
CAST(NULL AS STRING) AS k8s_pod_app_kubernetes_io_name,
CAST(NULL AS STRING) AS k8s_pod_job_name,
CAST(NULL AS STRING) AS k8s_pod_controller_uid,
CAST(NULL AS STRING) AS k8s_pod_redis,
labels.stack,
labels.application,
labels.env,
labels.type
) AS labels,
STRUCT(
jsonPayload.timestamp,
STRUCT(
jsonPayload.fields.event,
jsonPayload.fields.search_type
) AS fields
) AS jsonPayload
)
FROM
`moz-fx-data-shared-prod.firefox_accounts_derived.docker_fxa_admin_server_sanitized_v1`
UNION ALL
SELECT
* REPLACE(
STRUCT(
resource.type,
STRUCT(
resource.labels.project_id,
resource.labels.pod_name,
resource.labels.container_name,
resource.labels.cluster_name,
resource.labels.namespace_name,
resource.labels.location,
CAST(NULL AS STRING) AS zone,
CAST(NULL AS STRING) AS instance_id
) AS labels
) AS resource,
STRUCT(
labels.k8s_pod_app_kubernetes_io_component,
labels.k8s_pod_deployment,
labels.compute_googleapis_com_resource_name,
labels.k8s_pod_env_code,
labels.k8s_pod_pod_template_hash,
labels.k8s_pod_app_kubernetes_io_name,
labels.k8s_pod_job_name,
labels.k8s_pod_controller_uid,
labels.k8s_pod_redis,
CAST(NULL AS STRING) AS stack,
CAST(NULL AS STRING) AS application,
CAST(NULL AS STRING) AS env,
CAST(NULL AS STRING) AS type
) AS labels,
STRUCT(
jsonPayload.timestamp,
STRUCT(
jsonPayload.fields.event,
jsonPayload.fields.search_type
) AS fields
) AS jsonPayload
)
FROM
`moz-fx-data-shared-prod.firefox_accounts_derived.docker_fxa_admin_server_sanitized_v2`

Просмотреть файл

@ -2,16 +2,22 @@ friendly_name: Docker Fxa Admin Server Sanitized
description: |-
Sanitized version of docker_fxa_admin_server from FxA.
PII is hashed.
This table contains FxA admin server (sanitized) events up until the migration from AWS to GCP was concluded.
There is a time window between 2023-09-07 and 2023-09-27 when we were receiving relevant events from both deployments.
GCP based data is available via the v2 of this table.
owners:
- frank@mozilla.com
labels:
application: fxa
incremental: true
schedule: daily
scheduling:
dag_name: bqetl_fxa_events
task_name: docker_fxa_admin_server_v1
arguments: ['--schema_update_option=ALLOW_FIELD_ADDITION']
# Query descheduled as a direct result of AWS migration to GCP
# on 27th September 2023 the last AWS instances were spun down.
# scheduling:
# dag_name: bqetl_fxa_events
# task_name: docker_fxa_admin_server_v1
# arguments: ['--schema_update_option=ALLOW_FIELD_ADDITION']
bigquery:
time_partitioning:
type: day

Просмотреть файл

@ -0,0 +1,232 @@
fields:
- name: date
type: DATE
mode: NULLABLE
- name: logName
type: STRING
mode: NULLABLE
- name: resource
type: RECORD
mode: NULLABLE
fields:
- name: type
type: STRING
mode: NULLABLE
- name: labels
type: RECORD
mode: NULLABLE
fields:
- name: zone
type: STRING
mode: NULLABLE
- name: project_id
type: STRING
mode: NULLABLE
- name: instance_id
type: STRING
mode: NULLABLE
- name: textPayload
type: STRING
mode: NULLABLE
- name: jsonPayload
type: RECORD
mode: NULLABLE
fields:
- name: severity
type: FLOAT
mode: NULLABLE
- name: timestamp
type: FLOAT
mode: NULLABLE
- name: type
type: STRING
mode: NULLABLE
- name: pid
type: FLOAT
mode: NULLABLE
- name: logger
type: STRING
mode: NULLABLE
- name: fields
type: RECORD
mode: NULLABLE
fields:
- name: usergroupheader
type: STRING
mode: NULLABLE
- name: search_type
type: STRING
mode: NULLABLE
- name: event
type: STRING
mode: NULLABLE
- name: auto_completed
type: BOOLEAN
mode: NULLABLE
- name: email
type: BYTES
mode: NULLABLE
- name: user
type: BYTES
mode: NULLABLE
- name: uid
type: STRING
mode: NULLABLE
- name: success
type: FLOAT
mode: NULLABLE
- name: msg
type: STRING
mode: NULLABLE
- name: poolstats
type: STRING
mode: NULLABLE
- name: originaltransactionid
type: STRING
mode: NULLABLE
- name: err
type: STRING
mode: NULLABLE
- name: originaltransactionids
type: STRING
mode: NULLABLE
- name: message
type: STRING
mode: NULLABLE
- name: purchasetoken
type: STRING
mode: NULLABLE
- name: stack
type: STRING
mode: NULLABLE
- name: envversion
type: STRING
mode: NULLABLE
- name: timestamp
type: TIMESTAMP
mode: NULLABLE
- name: receiveTimestamp
type: TIMESTAMP
mode: NULLABLE
- name: severity
type: STRING
mode: NULLABLE
- name: insertId
type: STRING
mode: NULLABLE
- name: httpRequest
type: RECORD
mode: NULLABLE
fields:
- name: requestMethod
type: STRING
mode: NULLABLE
- name: requestUrl
type: STRING
mode: NULLABLE
- name: requestSize
type: INTEGER
mode: NULLABLE
- name: status
type: INTEGER
mode: NULLABLE
- name: responseSize
type: INTEGER
mode: NULLABLE
- name: userAgent
type: STRING
mode: NULLABLE
- name: remoteIp
type: STRING
mode: NULLABLE
- name: serverIp
type: STRING
mode: NULLABLE
- name: referer
type: STRING
mode: NULLABLE
- name: cacheLookup
type: BOOLEAN
mode: NULLABLE
- name: cacheHit
type: BOOLEAN
mode: NULLABLE
- name: cacheValidatedWithOriginServer
type: BOOLEAN
mode: NULLABLE
- name: cacheFillBytes
type: INTEGER
mode: NULLABLE
- name: protocol
type: STRING
mode: NULLABLE
- name: labels
type: RECORD
mode: NULLABLE
fields:
- name: stack
type: STRING
mode: NULLABLE
- name: application
type: STRING
mode: NULLABLE
- name: env
type: STRING
mode: NULLABLE
- name: compute_googleapis_com_resource_name
type: STRING
mode: NULLABLE
- name: type
type: STRING
mode: NULLABLE
- name: operation
type: RECORD
mode: NULLABLE
fields:
- name: id
type: STRING
mode: NULLABLE
- name: producer
type: STRING
mode: NULLABLE
- name: first
type: BOOLEAN
mode: NULLABLE
- name: last
type: BOOLEAN
mode: NULLABLE
- name: trace
type: STRING
mode: NULLABLE
- name: spanId
type: STRING
mode: NULLABLE
- name: traceSampled
type: BOOLEAN
mode: NULLABLE
- name: sourceLocation
type: RECORD
mode: NULLABLE
fields:
- name: file
type: STRING
mode: NULLABLE
- name: line
type: INTEGER
mode: NULLABLE
- name: function
type: STRING
mode: NULLABLE
- name: split
type: RECORD
mode: NULLABLE
fields:
- name: uid
type: STRING
mode: NULLABLE
- name: index
type: INTEGER
mode: NULLABLE
- name: totalSplits
type: INTEGER
mode: NULLABLE

Просмотреть файл

@ -0,0 +1,28 @@
friendly_name: Docker Fxa Admin Server Sanitized
description: |-
Sanitized version of docker_fxa_admin_server from FxA.
PII is hashed.
(contains events coming from the GCP deployment of this service)
owners:
- kik@mozilla.com
labels:
application: fxa
incremental: true
schedule: daily
dag: bqetl_fxa_events
owner1: kik
scheduling:
dag_name: bqetl_fxa_events
task_name: docker_fxa_admin_server_v2
arguments:
- --schema_update_option=ALLOW_FIELD_ADDITION
bigquery:
time_partitioning:
type: day
field: date
require_partition_filter: false
expiration_days: null
clustering: null
references: {}
deprecated: false

Просмотреть файл

@ -0,0 +1,30 @@
SELECT
@submission_date AS date,
* REPLACE (
(
SELECT AS STRUCT
jsonPayload.* REPLACE (
(
SELECT AS STRUCT
jsonPayload.fields.* REPLACE (
SHA256(jsonPayload.fields.user) AS user,
SHA256(jsonPayload.fields.email) AS email
)
) AS fields
)
) AS jsonPayload
)
FROM
`moz-fx-fxa-prod.gke_fxa_prod_log.stdout`
WHERE
DATE(`timestamp`) = @submission_date
AND (
DATE(_PARTITIONTIME)
BETWEEN DATE_SUB(@submission_date, INTERVAL 1 DAY)
AND DATE_ADD(@submission_date, INTERVAL 1 DAY)
)
-- v2 consumes events from GCP based fxa service,
-- the date indicates when the migration to GCP started.
AND DATE(`timestamp`) >= "2023-09-07"
AND labels.k8s_pod_deployment = "admin-backend"
AND resource.labels.container_name = "admin-backend"

Просмотреть файл

@ -0,0 +1,487 @@
fields:
- name: date
type: DATE
mode: NULLABLE
- name: logName
type: STRING
mode: NULLABLE
- name: resource
type: RECORD
mode: NULLABLE
fields:
- name: type
type: STRING
mode: NULLABLE
- name: labels
type: RECORD
mode: NULLABLE
fields:
- name: project_id
type: STRING
mode: NULLABLE
- name: pod_name
type: STRING
mode: NULLABLE
- name: container_name
type: STRING
mode: NULLABLE
- name: cluster_name
type: STRING
mode: NULLABLE
- name: namespace_name
type: STRING
mode: NULLABLE
- name: location
type: STRING
mode: NULLABLE
- name: textPayload
type: STRING
mode: NULLABLE
- name: jsonPayload
type: RECORD
mode: NULLABLE
fields:
- name: log_type
type: STRING
mode: NULLABLE
- name: request
type: STRING
mode: NULLABLE
- name: request_time
type: FLOAT
mode: NULLABLE
- name: x_forwarded_for
type: STRING
mode: NULLABLE
- name: bytes_sent
type: FLOAT
mode: NULLABLE
- name: user_agent
type: STRING
mode: NULLABLE
- name: x_forwarded_proto
type: STRING
mode: NULLABLE
- name: trace
type: STRING
mode: NULLABLE
- name: remote_user
type: STRING
mode: NULLABLE
- name: remote_addr
type: STRING
mode: NULLABLE
- name: referrer
type: STRING
mode: NULLABLE
- name: status
type: STRING
mode: NULLABLE
- name: logger
type: STRING
mode: NULLABLE
- name: pid
type: FLOAT
mode: NULLABLE
- name: timestamp
type: FLOAT
mode: NULLABLE
- name: fields
type: RECORD
mode: NULLABLE
fields:
- name: remoteaddresschain
type: STRING
mode: NULLABLE
- name: t
type: STRING
mode: NULLABLE
- name: method
type: STRING
mode: NULLABLE
- name: status
type: STRING
mode: NULLABLE
- name: useragent
type: STRING
mode: NULLABLE
- name: contentlength
type: STRING
mode: NULLABLE
- name: clientaddress
type: STRING
mode: NULLABLE
- name: path
type: STRING
mode: NULLABLE
- name: errno
type: FLOAT
mode: NULLABLE
- name: code
type: FLOAT
mode: NULLABLE
- name: agent
type: STRING
mode: NULLABLE
- name: client_id
type: STRING
mode: NULLABLE
- name: uid
type: STRING
mode: NULLABLE
- name: referer
type: STRING
mode: NULLABLE
- name: message
type: STRING
mode: NULLABLE
- name: event
type: STRING
mode: NULLABLE
- name: time
type: FLOAT
mode: NULLABLE
- name: event_type
type: STRING
mode: NULLABLE
- name: region
type: STRING
mode: NULLABLE
- name: country
type: STRING
mode: NULLABLE
- name: session_id
type: FLOAT
mode: NULLABLE
- name: language
type: STRING
mode: NULLABLE
- name: app_version
type: STRING
mode: NULLABLE
- name: user_properties
type: STRING
mode: NULLABLE
- name: event_properties
type: STRING
mode: NULLABLE
- name: device_id
type: STRING
mode: NULLABLE
- name: op
type: STRING
mode: NULLABLE
- name: os_name
type: STRING
mode: NULLABLE
- name: err
type: STRING
mode: NULLABLE
- name: joierrors
type: STRING
mode: NULLABLE
- name: stack
type: STRING
mode: NULLABLE
- name: reason
type: STRING
mode: NULLABLE
- name: result
type: STRING
mode: NULLABLE
- name: api
type: FLOAT
mode: NULLABLE
- name: os_version
type: STRING
mode: NULLABLE
- name: user_id
type: STRING
mode: NULLABLE
- name: msg
type: STRING
mode: NULLABLE
- name: error
type: STRING
mode: NULLABLE
- name: source
type: STRING
mode: NULLABLE
- name: blocked
type: STRING
mode: NULLABLE
- name: column
type: FLOAT
mode: NULLABLE
- name: line
type: FLOAT
mode: NULLABLE
- name: referrer
type: STRING
mode: NULLABLE
- name: violated
type: STRING
mode: NULLABLE
- name: url
type: STRING
mode: NULLABLE
- name: version
type: STRING
mode: NULLABLE
- name: directory
type: STRING
mode: NULLABLE
- name: port
type: FLOAT
mode: NULLABLE
- name: poolstats
type: STRING
mode: NULLABLE
- name: signal
type: STRING
mode: NULLABLE
- name: reqtime
type: FLOAT
mode: NULLABLE
- name: rp
type: STRING
mode: NULLABLE
- name: assertion_verification_time
type: FLOAT
mode: NULLABLE
- name: trustedissuers
type: STRING
mode: NULLABLE
- name: assertion
type: STRING
mode: NULLABLE
- name: host
type: STRING
mode: NULLABLE
- name: header
type: STRING
mode: NULLABLE
- name: usergroupheader
type: STRING
mode: NULLABLE
- name: usergroupheader_notnull
type: STRING
mode: NULLABLE
- name: user
type: BYTES
mode: NULLABLE
- name: email
type: BYTES
mode: NULLABLE
- name: search_type
type: STRING
mode: NULLABLE
- name: auto_completed
type: BOOLEAN
mode: NULLABLE
- name: payload
type: STRING
mode: NULLABLE
- name: user_agent
type: STRING
mode: NULLABLE
- name: ip_address
type: STRING
mode: NULLABLE
- name: document_version
type: STRING
mode: NULLABLE
- name: document_namespace
type: STRING
mode: NULLABLE
- name: document_type
type: STRING
mode: NULLABLE
- name: document_id
type: STRING
mode: NULLABLE
- name: originaltransactionid
type: STRING
mode: NULLABLE
- name: config
type: STRING
mode: NULLABLE
- name: duration
type: FLOAT
mode: NULLABLE
- name: info
type: STRING
mode: NULLABLE
- name: detail
type: STRING
mode: NULLABLE
- name: cause
type: STRING
mode: NULLABLE
- name: success
type: FLOAT
mode: NULLABLE
- name: device_model
type: STRING
mode: NULLABLE
- name: sample
type: STRING
mode: NULLABLE
- name: request
type: STRING
mode: NULLABLE
- name: name
type: STRING
mode: NULLABLE
- name: closed
type: BOOLEAN
mode: NULLABLE
- name: envversion
type: STRING
mode: NULLABLE
- name: type
type: STRING
mode: NULLABLE
- name: severity
type: FLOAT
mode: NULLABLE
- name: timestamp
type: TIMESTAMP
mode: NULLABLE
- name: receiveTimestamp
type: TIMESTAMP
mode: NULLABLE
- name: severity
type: STRING
mode: NULLABLE
- name: insertId
type: STRING
mode: NULLABLE
- name: httpRequest
type: RECORD
mode: NULLABLE
fields:
- name: requestMethod
type: STRING
mode: NULLABLE
- name: requestUrl
type: STRING
mode: NULLABLE
- name: requestSize
type: INTEGER
mode: NULLABLE
- name: status
type: INTEGER
mode: NULLABLE
- name: responseSize
type: INTEGER
mode: NULLABLE
- name: userAgent
type: STRING
mode: NULLABLE
- name: remoteIp
type: STRING
mode: NULLABLE
- name: serverIp
type: STRING
mode: NULLABLE
- name: referer
type: STRING
mode: NULLABLE
- name: cacheLookup
type: BOOLEAN
mode: NULLABLE
- name: cacheHit
type: BOOLEAN
mode: NULLABLE
- name: cacheValidatedWithOriginServer
type: BOOLEAN
mode: NULLABLE
- name: cacheFillBytes
type: INTEGER
mode: NULLABLE
- name: protocol
type: STRING
mode: NULLABLE
- name: labels
type: RECORD
mode: NULLABLE
fields:
- name: k8s_pod_app_kubernetes_io_component
type: STRING
mode: NULLABLE
- name: k8s_pod_deployment
type: STRING
mode: NULLABLE
- name: compute_googleapis_com_resource_name
type: STRING
mode: NULLABLE
- name: k8s_pod_env_code
type: STRING
mode: NULLABLE
- name: k8s_pod_pod_template_hash
type: STRING
mode: NULLABLE
- name: k8s_pod_app_kubernetes_io_name
type: STRING
mode: NULLABLE
- name: k8s_pod_job_name
type: STRING
mode: NULLABLE
- name: k8s_pod_controller_uid
type: STRING
mode: NULLABLE
- name: k8s_pod_redis
type: STRING
mode: NULLABLE
- name: operation
type: RECORD
mode: NULLABLE
fields:
- name: id
type: STRING
mode: NULLABLE
- name: producer
type: STRING
mode: NULLABLE
- name: first
type: BOOLEAN
mode: NULLABLE
- name: last
type: BOOLEAN
mode: NULLABLE
- name: trace
type: STRING
mode: NULLABLE
- name: spanId
type: STRING
mode: NULLABLE
- name: traceSampled
type: BOOLEAN
mode: NULLABLE
- name: sourceLocation
type: RECORD
mode: NULLABLE
fields:
- name: file
type: STRING
mode: NULLABLE
- name: line
type: INTEGER
mode: NULLABLE
- name: function
type: STRING
mode: NULLABLE
- name: split
type: RECORD
mode: NULLABLE
fields:
- name: uid
type: STRING
mode: NULLABLE
- name: index
type: INTEGER
mode: NULLABLE
- name: totalSplits
type: INTEGER
mode: NULLABLE

Просмотреть файл

@ -16,7 +16,6 @@ labels:
# Query descheduled as a direct result of AWS migration to GCP
# on 27th September 2023 the last AWS instances were spun down.
# scheduling:
# scheduling:
# dag_name: bqetl_fxa_events
# arguments:
# - --schema_update_option=ALLOW_FIELD_ADDITION