Bug 1844886: Add non-norm cols to glam scalar tbls (#4111)

* Add non-norm cols to scalar tbls

* Add missing schema to scalar_percentiles_v1

* Add missing schema to scalar_percentiles_v1
This commit is contained in:
Eduardo Filho 2023-07-25 11:13:20 -04:00 коммит произвёл GitHub
Родитель 1f7f2facea
Коммит 25d4ab4042
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 150 добавлений и 55 удалений

Просмотреть файл

@ -220,11 +220,13 @@ dry_run:
- sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_bucket_counts_v1/query.sql
- sql/moz-fx-data-shared-prod/telemetry_derived/clients_non_norm_histogram_bucket_counts_v1/query.sql
- sql/moz-fx-data-shared-prod/telemetry_derived/glam_client_probe_counts_extract_v1/query.sql
- sql/moz-fx-data-shared-prod/telemetry_derived/client_probe_counts/view.sql
- sql/moz-fx-data-shared-prod/telemetry_derived/glam_sample_counts_v1/query.sql
- sql/moz-fx-data-shared-prod/telemetry_derived/glam_user_counts_v1/query.sql
- sql/moz-fx-data-shared-prod/telemetry_derived/scalar_percentiles_v1/query.sql
- sql/moz-fx-data-shared-prod/telemetry_derived/histogram_percentiles_v1/query.sql
- sql/moz-fx-data-shared-prod/telemetry_derived/clients_scalar_probe_counts_v1/query.sql
- sql/moz-fx-data-shared-prod/telemetry/client_probe_counts/view.sql
- sql/moz-fx-data-shared-prod/monitoring_derived/bigquery_etl_sql_run_check_v1/query.sql
# Dataset sql/glam-fenix-dev:glam_etl was not found
- sql/glam-fenix-dev/glam_etl/**/*.sql
@ -295,6 +297,7 @@ format:
- sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_bucket_counts_v1/query.sql
- sql/moz-fx-data-shared-prod/telemetry_derived/clients_non_norm_histogram_bucket_counts_v1/query.sql
- sql/moz-fx-data-shared-prod/telemetry_derived/clients_scalar_probe_counts_v1/query.sql
- sql/moz-fx-data-shared-prod/telemetry/client_probe_counts/view.sql
- sql/moz-fx-data-shared-prod/telemetry_derived/core_clients_daily_v1/query.sql
- sql/moz-fx-data-shared-prod/telemetry_derived/core_clients_last_seen_v1/init.sql
- sql/moz-fx-data-shared-prod/telemetry_derived/core_clients_last_seen_v1/query.sql

Просмотреть файл

@ -296,51 +296,58 @@ clients_scalar_bucket_counts AS (
process,
client_agg_type,
bucket
),
aggregated AS (
SELECT
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key,
process,
-- empty columns to match clients_histogram_probe_counts_v1 schema
NULL AS first_bucket,
NULL AS last_bucket,
NULL AS num_buckets,
client_agg_type,
agg_type,
SUM(user_count) AS total_users,
CASE
WHEN metric_type = 'scalar'
OR metric_type = 'keyed-scalar'
THEN mozfun.glam.histogram_fill_buckets(
ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, user_count)),
ANY_VALUE(buckets)
)
WHEN metric_type = 'boolean'
OR metric_type = 'keyed-scalar-boolean'
THEN mozfun.glam.histogram_fill_buckets(
ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, user_count)),
['always', 'never', 'sometimes']
)
END AS aggregates
FROM
clients_scalar_bucket_counts
LEFT JOIN
buckets_by_metric
USING
(metric, key)
GROUP BY
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key,
process,
client_agg_type,
agg_type
)
SELECT
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key,
process,
-- empty columns to match clients_histogram_probe_counts_v1 schema
NULL AS first_bucket,
NULL AS last_bucket,
NULL AS num_buckets,
client_agg_type,
agg_type,
SUM(user_count) AS total_users,
CASE
WHEN metric_type = 'scalar'
OR metric_type = 'keyed-scalar'
THEN mozfun.glam.histogram_fill_buckets(
ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, user_count)),
ANY_VALUE(buckets)
)
WHEN metric_type = 'boolean'
OR metric_type = 'keyed-scalar-boolean'
THEN mozfun.glam.histogram_fill_buckets(
ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, user_count)),
['always', 'never', 'sometimes']
)
END AS aggregates
*,
aggregates AS non_norm_aggregates
FROM
clients_scalar_bucket_counts
LEFT JOIN
buckets_by_metric
USING
(metric, key)
GROUP BY
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key,
process,
client_agg_type,
agg_type
aggregated

Просмотреть файл

@ -79,11 +79,18 @@ percentiles AS (
metric_type,
key,
process,
client_agg_type)
SELECT *
REPLACE(mozfun.glam.map_from_array_offsets_precise(
[0.1, 1.0, 5.0, 25.0, 50.0, 75.0, 95.0, 99.0, 99.9],
aggregates
) AS aggregates)
FROM percentiles
client_agg_type
),
aggregated AS (
SELECT *
REPLACE(mozfun.glam.map_from_array_offsets_precise(
[0.1, 1.0, 5.0, 25.0, 50.0, 75.0, 95.0, 99.0, 99.9],
aggregates
) AS aggregates)
FROM percentiles
)
SELECT
*,
aggregates AS non_norm_aggregates
FROM
aggregated

Просмотреть файл

@ -0,0 +1,78 @@
fields:
- mode: NULLABLE
name: os
type: STRING
- mode: NULLABLE
name: app_version
type: INTEGER
- mode: NULLABLE
name: app_build_id
type: STRING
- mode: NULLABLE
name: channel
type: STRING
- mode: NULLABLE
name: metric
type: STRING
- mode: NULLABLE
name: metric_type
type: STRING
- mode: NULLABLE
name: key
type: STRING
- mode: NULLABLE
name: process
type: STRING
- mode: NULLABLE
name: first_bucket
type: INTEGER
- mode: NULLABLE
name: last_bucket
type: INTEGER
- mode: NULLABLE
name: num_buckets
type: INTEGER
- mode: NULLABLE
name: client_agg_type
type: STRING
- mode: NULLABLE
name: agg_type
type: STRING
- mode: NULLABLE
name: total_users
type: INTEGER
- fields:
- mode: NULLABLE
name: key
type: STRING
- mode: NULLABLE
name: value
type: FLOAT
mode: REPEATED
name: aggregates
type: RECORD
- fields:
- mode: NULLABLE
name: key
type: STRING
- mode: NULLABLE
name: value
type: FLOAT
mode: REPEATED
name: non_norm_aggregates
type: RECORD

Просмотреть файл

@ -1,2 +1,2 @@
{"agg_type":"histogram","aggregates":[],"app_build_id":"first","app_version":75,"channel":"nightly","client_agg_type":"min","key":"","metric":"some_metric","metric_type":"scalar","os":"first","process":"parent","total_users":400}
{"agg_type":"histogram","aggregates":[],"app_build_id":"first","app_version":75,"channel":"nightly","client_agg_type":"min","key":"","metric":"some_metric","metric_type":"scalar","process":"parent","total_users":400}
{"agg_type":"histogram","aggregates":[],"non_norm_aggregates":[],"app_build_id":"first","app_version":75,"channel":"nightly","client_agg_type":"min","key":"","metric":"some_metric","metric_type":"scalar","os":"first","process":"parent","total_users":400}
{"agg_type":"histogram","aggregates":[],"non_norm_aggregates":[],"app_build_id":"first","app_version":75,"channel":"nightly","client_agg_type":"min","key":"","metric":"some_metric","metric_type":"scalar","process":"parent","total_users":400}