Use live tables for structured error counts (#4598)
* Use live tables for structured error counts * Prevent from old records being deleted
This commit is contained in:
Родитель
16bdbcbcc8
Коммит
be60f5aa56
|
@ -2,6 +2,7 @@ CREATE OR REPLACE VIEW
|
|||
`moz-fx-data-shared-prod.monitoring.structured_error_counts`
|
||||
AS
|
||||
SELECT
|
||||
*
|
||||
*,
|
||||
CAST(submission_date AS TIMESTAMP) AS hour -- for backwards compatibility
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.monitoring_derived.structured_error_counts_v1`
|
||||
`moz-fx-data-shared-prod.monitoring_derived.structured_error_counts_v2`
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
DECLARE dummy INT64; -- declare a dummy variable to indicate that this is a script to bigquery-etl
|
||||
|
||||
CREATE TEMP TABLE
|
||||
deletion_counts(submission_date DATE, dataset_id STRING, num_rows INT64);
|
||||
|
||||
|
@ -43,5 +42,6 @@ THEN
|
|||
VALUES
|
||||
(d.submission_date, d.dataset_id, num_rows)
|
||||
WHEN NOT MATCHED BY SOURCE
|
||||
AND r.submission_date = @submission_date
|
||||
THEN
|
||||
DELETE;
|
||||
|
|
|
@ -1,66 +0,0 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`moz-fx-data-shared-prod.monitoring_derived.structured_error_counts_v1`
|
||||
AS
|
||||
WITH ping_counts AS (
|
||||
SELECT
|
||||
TIMESTAMP_TRUNC(submission_timestamp, HOUR) AS hour,
|
||||
metadata.document_namespace,
|
||||
metadata.document_type,
|
||||
metadata.document_version,
|
||||
COUNT(*) AS ping_count
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.monitoring.payload_bytes_decoded_structured`
|
||||
WHERE
|
||||
submission_timestamp >= TIMESTAMP_SUB(current_timestamp, INTERVAL(28 * 24) HOUR)
|
||||
GROUP BY
|
||||
hour,
|
||||
document_namespace,
|
||||
document_type,
|
||||
document_version
|
||||
),
|
||||
error_counts AS (
|
||||
SELECT
|
||||
TIMESTAMP_TRUNC(submission_timestamp, HOUR) AS hour,
|
||||
document_namespace,
|
||||
document_type,
|
||||
document_version,
|
||||
error_type,
|
||||
COUNT(*) AS error_count
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.monitoring.payload_bytes_error_structured`
|
||||
WHERE
|
||||
submission_timestamp >= TIMESTAMP_SUB(current_timestamp, INTERVAL(28 * 24) HOUR)
|
||||
GROUP BY
|
||||
hour,
|
||||
document_namespace,
|
||||
document_type,
|
||||
document_version,
|
||||
error_type
|
||||
),
|
||||
structured_hourly_errors AS (
|
||||
SELECT
|
||||
hour,
|
||||
document_namespace,
|
||||
document_type,
|
||||
document_version,
|
||||
error_type,
|
||||
COALESCE(ping_count, 0) + COALESCE(error_count, 0) AS ping_count,
|
||||
COALESCE(error_count, 0) AS error_count
|
||||
FROM
|
||||
ping_counts
|
||||
FULL OUTER JOIN
|
||||
error_counts
|
||||
USING
|
||||
(hour, document_namespace, document_type, document_version)
|
||||
),
|
||||
with_ratio AS (
|
||||
SELECT
|
||||
*,
|
||||
SAFE_DIVIDE(1.0 * error_count, ping_count) AS error_ratio
|
||||
FROM
|
||||
structured_hourly_errors
|
||||
)
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
with_ratio
|
|
@ -0,0 +1,19 @@
|
|||
---
|
||||
friendly_name: Structured Error Counts
|
||||
description: >
|
||||
A daily count of structured errors by document namespace
|
||||
owners:
|
||||
- ascholtz@mozilla.com
|
||||
labels:
|
||||
schedule: daily
|
||||
scheduling:
|
||||
dag_name: bqetl_monitoring
|
||||
referenced_tables:
|
||||
- ['moz-fx-data-shared-prod', '*_live', '*']
|
||||
date_partition_parameter: null
|
||||
parameters: ["submission_date:DATE:{{ds}}"]
|
||||
bigquery:
|
||||
time_partitioning:
|
||||
type: day
|
||||
field: submission_date
|
||||
require_partition_filter: false
|
|
@ -0,0 +1,25 @@
|
|||
fields:
|
||||
- mode: NULLABLE
|
||||
name: submission_date
|
||||
type: DATE
|
||||
- mode: NULLABLE
|
||||
name: document_namespace
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: document_type
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: document_version
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: ping_count
|
||||
type: INTEGER
|
||||
- mode: NULLABLE
|
||||
name: error_type
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: error_count
|
||||
type: INTEGER
|
||||
- mode: NULLABLE
|
||||
name: error_ratio
|
||||
type: FLOAT
|
|
@ -0,0 +1,127 @@
|
|||
DECLARE dummy INT64; -- dummy variable to indicate to bigquery-etl that this is a script
|
||||
CREATE TEMP TABLE
|
||||
ping_counts(
|
||||
submission_date DATE,
|
||||
document_namespace STRING,
|
||||
document_type STRING,
|
||||
document_version STRING,
|
||||
ping_count INT64
|
||||
);
|
||||
|
||||
FOR record IN (
|
||||
SELECT
|
||||
schema_name AS dataset_id
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.INFORMATION_SCHEMA.SCHEMATA`
|
||||
WHERE
|
||||
schema_name LIKE "%_live%"
|
||||
)
|
||||
DO
|
||||
EXECUTE IMMEDIATE CONCAT(
|
||||
"INSERT ping_counts (submission_date, document_namespace, document_type, document_version, ping_count) ",
|
||||
"SELECT PARSE_DATE('%Y%m%d', PARTITION_ID) AS submission_date, ",
|
||||
"REPLACE(TABLE_SCHEMA, '_live', '') AS document_namespace, ",
|
||||
"REGEXP_EXTRACT(TABLE_NAME, r'(.+)_v[0-9]+') AS document_type, ",
|
||||
"REGEXP_EXTRACT(TABLE_NAME, r'.+_v([0-9]+)') AS document_version, ",
|
||||
"TOTAL_ROWS AS ping_count ",
|
||||
"FROM ",
|
||||
record.dataset_id,
|
||||
".INFORMATION_SCHEMA.PARTITIONS ",
|
||||
"WHERE PARTITION_ID != '__NULL__' AND ",
|
||||
"PARSE_DATE('%Y%m%d', PARTITION_ID) < CURRENT_DATE AND ('",
|
||||
@submission_date,
|
||||
"' IS NULL OR '",
|
||||
@submission_date,
|
||||
"' = PARSE_DATE('%Y%m%d', PARTITION_ID))"
|
||||
);
|
||||
END
|
||||
FOR;
|
||||
|
||||
CREATE TEMP TABLE
|
||||
error_counts(
|
||||
submission_date DATE,
|
||||
document_namespace STRING,
|
||||
document_type STRING,
|
||||
document_version STRING,
|
||||
ping_count INTEGER,
|
||||
error_type STRING,
|
||||
error_count INTEGER,
|
||||
error_ratio FLOAT64
|
||||
)
|
||||
AS
|
||||
WITH errors AS (
|
||||
SELECT
|
||||
DATE(submission_timestamp) AS submission_date,
|
||||
document_namespace,
|
||||
document_type,
|
||||
document_version,
|
||||
error_type,
|
||||
COUNT(*) AS error_count
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.monitoring.payload_bytes_error_structured`
|
||||
WHERE
|
||||
DATE(submission_timestamp) = @submission_date
|
||||
GROUP BY
|
||||
submission_date,
|
||||
document_namespace,
|
||||
document_type,
|
||||
document_version,
|
||||
error_type
|
||||
)
|
||||
SELECT
|
||||
submission_date,
|
||||
document_namespace,
|
||||
document_type,
|
||||
document_version,
|
||||
ping_count,
|
||||
error_type,
|
||||
COALESCE(ping_count, 0) + COALESCE(error_count, 0) AS ping_count,
|
||||
COALESCE(error_count, 0) AS error_count,
|
||||
SAFE_DIVIDE(
|
||||
1.0 * COALESCE(error_count, 0),
|
||||
COALESCE(ping_count, 0) + COALESCE(error_count, 0)
|
||||
) AS error_ratio
|
||||
FROM
|
||||
ping_counts
|
||||
FULL OUTER JOIN
|
||||
errors
|
||||
USING
|
||||
(submission_date, document_namespace, document_type, document_version);
|
||||
|
||||
MERGE
|
||||
`moz-fx-data-shared-prod.monitoring_derived.structured_error_counts_v2` r
|
||||
USING
|
||||
error_counts d
|
||||
ON
|
||||
d.submission_date = r.submission_date
|
||||
AND r.document_namespace = d.document_namespace
|
||||
AND r.document_type = d.document_type
|
||||
AND r.document_version = d.document_version
|
||||
WHEN NOT MATCHED
|
||||
THEN
|
||||
INSERT
|
||||
(
|
||||
submission_date,
|
||||
document_namespace,
|
||||
document_type,
|
||||
document_version,
|
||||
ping_count,
|
||||
error_type,
|
||||
error_count,
|
||||
error_ratio
|
||||
)
|
||||
VALUES
|
||||
(
|
||||
d.submission_date,
|
||||
d.document_namespace,
|
||||
d.document_type,
|
||||
d.document_version,
|
||||
d.ping_count,
|
||||
d.error_type,
|
||||
d.error_count,
|
||||
d.error_ratio
|
||||
)
|
||||
WHEN NOT MATCHED BY SOURCE
|
||||
AND r.submission_date = @submission_date
|
||||
THEN
|
||||
DELETE;
|
Загрузка…
Ссылка в новой задаче