Add dataset for monitoring schema errors over time (#442)

* Add query for last month of schema errors

* Add generated sql for schema error counts

* Move schemas into correct location

* Add document_version and named groups

* Skip schema error counts in dryrun
This commit is contained in:
Anthony Miyaguchi 2019-10-23 15:37:05 -07:00 коммит произвёл GitHub
Родитель e49dff0daa
Коммит d60c0fd842
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 113 добавлений и 0 удалений

Просмотреть файл

@ -29,6 +29,7 @@ DERIVED_DATASETS_DRY_RUN_URL = (
SKIP = {
# Access Denied
"sql/monitoring/schema_error_counts_v1/view.sql",
"sql/monitoring/structured_error_counts_v1/view.sql",
"sql/telemetry/fxa_content_events_v1/query.sql",
"sql/telemetry/fxa_auth_bounce_events_v1/query.sql",

Просмотреть файл

@ -0,0 +1,56 @@
CREATE
OR REPLACE VIEW `moz-fx-data-shared-prod.monitoring.schema_error_counts_v1` AS WITH extracted AS (
SELECT
TIMESTAMP_TRUNC(submission_timestamp, HOUR) AS hour,
job_name,
document_namespace,
document_type,
document_version,
error_message
FROM
`moz-fx-data-shared-prod.payload_bytes_error.*`
WHERE
submission_timestamp < TIMESTAMP_TRUNC(current_timestamp, day)
AND submission_timestamp > TIMESTAMP_SUB(
TIMESTAMP_TRUNC(current_timestamp, day),
INTERVAL 28 * 24 hour
)
AND exception_class = 'org.everit.json.schema.ValidationException'
),
count_errors AS (
SELECT
document_namespace,
document_type,
document_version,
hour,
job_name,
SPLIT(error_message, ":")[OFFSET (1)] AS path,
COUNT(*) AS error_count,
ROW_NUMBER() OVER (
PARTITION BY
hour,
document_namespace,
document_type,
document_version
ORDER BY
COUNT(*) DESC
) AS error_rank
FROM
extracted
GROUP BY
document_namespace,
document_type,
document_version,
hour,
job_name,
path
)
SELECT
*
FROM
count_errors
ORDER BY
document_namespace,
document_type,
error_rank,
hour

Просмотреть файл

@ -0,0 +1,56 @@
CREATE
OR REPLACE VIEW `moz-fx-data-shared-prod.monitoring.schema_error_counts_v1` AS WITH extracted AS (
SELECT
TIMESTAMP_TRUNC(submission_timestamp, HOUR) AS hour,
job_name,
document_namespace,
document_type,
document_version,
error_message
FROM
`moz-fx-data-shared-prod.payload_bytes_error.*`
WHERE
submission_timestamp < TIMESTAMP_TRUNC(current_timestamp, day)
AND submission_timestamp > TIMESTAMP_SUB(
TIMESTAMP_TRUNC(current_timestamp, day),
INTERVAL 28 * 24 hour
)
AND exception_class = 'org.everit.json.schema.ValidationException'
),
count_errors AS (
SELECT
document_namespace,
document_type,
document_version,
hour,
job_name,
SPLIT(error_message, ":")[OFFSET (1)] AS path,
COUNT(*) AS error_count,
ROW_NUMBER() OVER (
PARTITION BY
hour,
document_namespace,
document_type,
document_version
ORDER BY
COUNT(*) DESC
) AS error_rank
FROM
extracted
GROUP BY
document_namespace,
document_type,
document_version,
hour,
job_name,
path
)
SELECT
*
FROM
count_errors
ORDER BY
document_namespace,
document_type,
error_rank,
hour