firefox_desktop_to_glam (#2485)
* firefox_desktop_to_glam added sql files part 1 * update with the lastest mozfun function
This commit is contained in:
Родитель
ca6b93a403
Коммит
80af7b96df
|
@ -173,6 +173,24 @@ def main():
|
|||
"num_versions_to_keep": 3,
|
||||
"total_users": 90000,
|
||||
},
|
||||
"firefox_desktop_glam_nightly": {
|
||||
"build_date_udf": "mozfun.glam.build_hour_to_datetime",
|
||||
"filter_version": True,
|
||||
"num_versions_to_keep": 3,
|
||||
"total_users": 10,
|
||||
},
|
||||
"firefox_desktop_glam_beta": {
|
||||
"build_date_udf": "mozfun.glam.build_hour_to_datetime",
|
||||
"filter_version": True,
|
||||
"num_versions_to_keep": 3,
|
||||
"total_users": 10,
|
||||
},
|
||||
"firefox_desktop_glam_release": {
|
||||
"build_date_udf": "mozfun.glam.build_hour_to_datetime",
|
||||
"filter_version": True,
|
||||
"num_versions_to_keep": 3,
|
||||
"total_users": 10,
|
||||
},
|
||||
}
|
||||
validate(instance=config, schema=config_schema)
|
||||
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`{{ project }}`.glam_etl.firefox_desktop_glam_beta__view_clients_daily_histogram_aggregates_v1
|
||||
AS
|
||||
WITH extracted AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`{{ project }}`.glam_etl.firefox_desktop__view_clients_daily_histogram_aggregates_v1
|
||||
WHERE
|
||||
channel = 'beta'
|
||||
)
|
||||
SELECT
|
||||
* EXCEPT (app_build_id, channel),
|
||||
`mozfun.glam.build_seconds_to_hour`(app_build_id) AS app_build_id,
|
||||
"*" AS channel
|
||||
FROM
|
||||
extracted
|
|
@ -0,0 +1,17 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`{{ project }}`.glam_etl.firefox_desktop_glam_beta__view_clients_daily_scalar_aggregates_v1
|
||||
AS
|
||||
WITH extracted AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`{{ project }}`.glam_etl.firefox_desktop__view_clients_daily_scalar_aggregates_v1
|
||||
WHERE
|
||||
channel = 'beta'
|
||||
)
|
||||
SELECT
|
||||
* EXCEPT (app_build_id, channel),
|
||||
`mozfun.glam.build_seconds_to_hour`(app_build_id) AS app_build_id,
|
||||
"*" AS channel
|
||||
FROM
|
||||
extracted
|
|
@ -0,0 +1,17 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`{{ project }}`.glam_etl.firefox_desktop_glam_nightly__view_clients_daily_histogram_aggregates_v1
|
||||
AS
|
||||
WITH extracted AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`{{ project }}`.glam_etl.firefox_desktop__view_clients_daily_histogram_aggregates_v1
|
||||
WHERE
|
||||
channel = 'nightly'
|
||||
)
|
||||
SELECT
|
||||
* EXCEPT (app_build_id, channel),
|
||||
`mozfun.glam.build_seconds_to_hour`(app_build_id) AS app_build_id,
|
||||
"*" AS channel
|
||||
FROM
|
||||
extracted
|
|
@ -0,0 +1,17 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`{{ project }}`.glam_etl.firefox_desktop_glam_nightly__view_clients_daily_scalar_aggregates_v1
|
||||
AS
|
||||
WITH extracted AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`{{ project }}`.glam_etl.firefox_desktop__view_clients_daily_scalar_aggregates_v1
|
||||
WHERE
|
||||
channel = 'nightly'
|
||||
)
|
||||
SELECT
|
||||
* EXCEPT (app_build_id, channel),
|
||||
`mozfun.glam.build_seconds_to_hour`(app_build_id) AS app_build_id,
|
||||
"*" AS channel
|
||||
FROM
|
||||
extracted
|
|
@ -0,0 +1,17 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`{{ project }}`.glam_etl.firefox_desktop_glam_nightly__view_clients_daily_histogram_aggregates_v1
|
||||
AS
|
||||
WITH extracted AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`{{ project }}`.glam_etl.firefox_desktop__view_clients_daily_histogram_aggregates_v1
|
||||
WHERE
|
||||
channel = 'release'
|
||||
)
|
||||
SELECT
|
||||
* EXCEPT (app_build_id, channel),
|
||||
`mozfun.glam.build_seconds_to_hour`(app_build_id) AS app_build_id,
|
||||
"*" AS channel
|
||||
FROM
|
||||
extracted
|
|
@ -0,0 +1,17 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`{{ project }}`.glam_etl.firefox_desktop_glam_nightly__view_clients_daily_scalar_aggregates_v1
|
||||
AS
|
||||
WITH extracted AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`{{ project }}`.glam_etl.firefox_desktop__view_clients_daily_scalar_aggregates_v1
|
||||
WHERE
|
||||
channel = 'release'
|
||||
)
|
||||
SELECT
|
||||
* EXCEPT (app_build_id, channel),
|
||||
`mozfun.glam.build_seconds_to_hour`(app_build_id) AS app_build_id,
|
||||
"*" AS channel
|
||||
FROM
|
||||
extracted
|
|
@ -0,0 +1,54 @@
|
|||
#!/bin/bash
|
||||
# generate sql for checking into the repository and for testing the workflow
|
||||
|
||||
set -e
|
||||
|
||||
project=${PROJECT:-glam-fenix-dev}
|
||||
skip_generate=${SKIP_GENERATE:-false}
|
||||
skip_daily=${SKIP_DAILY:-false}
|
||||
generate_only=${GENERATE_ONLY:-false}
|
||||
# NOTE: there are three app_ids that we must look at for historical context. For
|
||||
# the purpose of this script, it is sufficient to look only at what is currently
|
||||
# "firefox desktop". We must have at least one table scalar/histogram tables for
|
||||
# each of the referenced tables in the view. We'll keep all pings for
|
||||
# firefox_desktop, and only the metrics ping for the others.
|
||||
app_ids=(
|
||||
"firefox_desktop"
|
||||
)
|
||||
logical_app_id="firefox_desktop_glam_nightly"
|
||||
|
||||
dir="$(dirname "$0")/.."
|
||||
sql_dir=$dir/../../sql/$project/glam_etl
|
||||
|
||||
if [[ $skip_generate == false ]]; then
|
||||
for app_id in "${app_ids[@]}"; do
|
||||
PRODUCT=$app_id STAGE=daily $dir/generate_glean_sql &
|
||||
done
|
||||
wait
|
||||
# remove tables to reduce noise of checked-in queries
|
||||
for app_id in "${app_ids[@]}"; do
|
||||
if [[ $app_id == "firefox_desktop" ]]; then
|
||||
continue
|
||||
fi
|
||||
for path in "${sql_dir}/${app_id}__clients"*; do
|
||||
if [[ $path == "${sql_dir}/${app_id}__clients"*metrics* ]]; then
|
||||
continue
|
||||
fi
|
||||
rm -r $path
|
||||
done
|
||||
done
|
||||
PRODUCT=$logical_app_id STAGE=incremental $dir/generate_glean_sql
|
||||
fi
|
||||
|
||||
if [[ $generate_only != false ]]; then
|
||||
bqetl glam glean update-schemas
|
||||
exit
|
||||
fi
|
||||
|
||||
if [[ $skip_daily == false ]]; then
|
||||
for app_id in "${app_ids[@]}"; do
|
||||
PRODUCT=$app_id STAGE=daily $dir/run_glam_sql
|
||||
done
|
||||
fi
|
||||
PRODUCT=$logical_app_id STAGE=incremental $dir/run_glam_sql
|
||||
bqetl glam glean update-schemas
|
|
@ -0,0 +1,148 @@
|
|||
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_histogram_aggregates --source-table firefox_desktop_stable.metrics_v1
|
||||
WITH extracted AS (
|
||||
SELECT
|
||||
*,
|
||||
DATE(submission_timestamp) AS submission_date,
|
||||
client_info.client_id,
|
||||
"metrics" AS ping_type,
|
||||
COALESCE(
|
||||
SAFE_CAST(SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS INT64),
|
||||
0
|
||||
) AS app_version,
|
||||
client_info.os AS os,
|
||||
client_info.app_build AS app_build_id,
|
||||
client_info.app_channel AS channel
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.firefox_desktop_stable.metrics_v1`
|
||||
WHERE
|
||||
DATE(submission_timestamp) = @submission_date
|
||||
AND client_info.client_id IS NOT NULL
|
||||
),
|
||||
histograms AS (
|
||||
SELECT
|
||||
sample_id,
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
ARRAY<STRUCT<metric STRING, metric_type STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>[
|
||||
(
|
||||
"fog_ipc_buffer_sizes",
|
||||
"memory_distribution",
|
||||
metrics.memory_distribution.fog_ipc_buffer_sizes.values
|
||||
),
|
||||
(
|
||||
"fog_ipc_flush_durations",
|
||||
"timing_distribution",
|
||||
metrics.timing_distribution.fog_ipc_flush_durations.values
|
||||
),
|
||||
(
|
||||
"glean_database_size",
|
||||
"memory_distribution",
|
||||
metrics.memory_distribution.glean_database_size.values
|
||||
),
|
||||
(
|
||||
"glean_upload_discarded_exceeding_pings_size",
|
||||
"memory_distribution",
|
||||
metrics.memory_distribution.glean_upload_discarded_exceeding_pings_size.values
|
||||
),
|
||||
(
|
||||
"glean_upload_pending_pings_directory_size",
|
||||
"memory_distribution",
|
||||
metrics.memory_distribution.glean_upload_pending_pings_directory_size.values
|
||||
),
|
||||
(
|
||||
"paint_build_displaylist_time",
|
||||
"timing_distribution",
|
||||
metrics.timing_distribution.paint_build_displaylist_time.values
|
||||
)
|
||||
] AS metadata
|
||||
FROM
|
||||
extracted
|
||||
),
|
||||
flattened_histograms AS (
|
||||
SELECT
|
||||
sample_id,
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metadata.*
|
||||
FROM
|
||||
histograms,
|
||||
UNNEST(metadata) AS metadata
|
||||
WHERE
|
||||
value IS NOT NULL
|
||||
),
|
||||
-- ARRAY_CONCAT_AGG may fail if the array of records exceeds 20 MB when
|
||||
-- serialized and shuffled. This may exhibit itself in a pathological case where
|
||||
-- the a single client sends *many* pings in a single day. However, this case
|
||||
-- has not been observed. If this does occur, each histogram should be unnested
|
||||
-- aggregated. This will force more shuffles and is inefficient. This may be
|
||||
-- mitigated by removing all of the empty entries which are sent to keep bucket
|
||||
-- ranges contiguous.
|
||||
--
|
||||
-- Tested via org_mozilla_fenix.metrics_v1 for 2020-02-23, unnest vs concat
|
||||
-- Slot consumed: 00:50:15 vs 00:06:45, Shuffled: 27.5GB vs 6.0 GB
|
||||
aggregated AS (
|
||||
SELECT
|
||||
sample_id,
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
mozfun.map.sum(ARRAY_CONCAT_AGG(value)) AS value
|
||||
FROM
|
||||
flattened_histograms
|
||||
GROUP BY
|
||||
sample_id,
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type
|
||||
)
|
||||
SELECT
|
||||
sample_id,
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
ARRAY_AGG(
|
||||
STRUCT<
|
||||
metric STRING,
|
||||
metric_type STRING,
|
||||
key STRING,
|
||||
agg_type STRING,
|
||||
value ARRAY<STRUCT<key STRING, value INT64>>
|
||||
>(metric, metric_type, '', 'summed_histogram', value)
|
||||
) AS histogram_aggregates
|
||||
FROM
|
||||
aggregated
|
||||
GROUP BY
|
||||
sample_id,
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel
|
|
@ -0,0 +1,239 @@
|
|||
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table firefox_desktop_stable.baseline_v1
|
||||
WITH extracted AS (
|
||||
SELECT
|
||||
*,
|
||||
DATE(submission_timestamp) AS submission_date,
|
||||
client_info.client_id,
|
||||
"baseline" AS ping_type,
|
||||
COALESCE(
|
||||
SAFE_CAST(SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS INT64),
|
||||
0
|
||||
) AS app_version,
|
||||
client_info.os AS os,
|
||||
client_info.app_build AS app_build_id,
|
||||
client_info.app_channel AS channel
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.firefox_desktop_stable.baseline_v1`
|
||||
WHERE
|
||||
DATE(submission_timestamp) = @submission_date
|
||||
AND client_info.client_id IS NOT NULL
|
||||
),
|
||||
unlabeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
|
||||
(
|
||||
'glean_baseline_duration',
|
||||
'timespan',
|
||||
'',
|
||||
'avg',
|
||||
avg(CAST(metrics.timespan.glean_baseline_duration.value AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_baseline_duration',
|
||||
'timespan',
|
||||
'',
|
||||
'count',
|
||||
IF(MIN(metrics.timespan.glean_baseline_duration.value) IS NULL, NULL, COUNT(*))
|
||||
),
|
||||
(
|
||||
'glean_baseline_duration',
|
||||
'timespan',
|
||||
'',
|
||||
'max',
|
||||
max(CAST(metrics.timespan.glean_baseline_duration.value AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_baseline_duration',
|
||||
'timespan',
|
||||
'',
|
||||
'min',
|
||||
min(CAST(metrics.timespan.glean_baseline_duration.value AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_baseline_duration',
|
||||
'timespan',
|
||||
'',
|
||||
'sum',
|
||||
sum(CAST(metrics.timespan.glean_baseline_duration.value AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_validation_metrics_ping_count',
|
||||
'counter',
|
||||
'',
|
||||
'avg',
|
||||
avg(CAST(metrics.counter.glean_validation_metrics_ping_count AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_validation_metrics_ping_count',
|
||||
'counter',
|
||||
'',
|
||||
'count',
|
||||
IF(MIN(metrics.counter.glean_validation_metrics_ping_count) IS NULL, NULL, COUNT(*))
|
||||
),
|
||||
(
|
||||
'glean_validation_metrics_ping_count',
|
||||
'counter',
|
||||
'',
|
||||
'max',
|
||||
max(CAST(metrics.counter.glean_validation_metrics_ping_count AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_validation_metrics_ping_count',
|
||||
'counter',
|
||||
'',
|
||||
'min',
|
||||
min(CAST(metrics.counter.glean_validation_metrics_ping_count AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_validation_metrics_ping_count',
|
||||
'counter',
|
||||
'',
|
||||
'sum',
|
||||
sum(CAST(metrics.counter.glean_validation_metrics_ping_count AS INT64))
|
||||
)
|
||||
] AS scalar_aggregates
|
||||
FROM
|
||||
extracted
|
||||
GROUP BY
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel
|
||||
),
|
||||
grouped_labeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
ARRAY<STRUCT<name STRING, type STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>[
|
||||
(
|
||||
'glean_error_invalid_label',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_error_invalid_label
|
||||
),
|
||||
(
|
||||
'glean_error_invalid_overflow',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_error_invalid_overflow
|
||||
),
|
||||
(
|
||||
'glean_error_invalid_state',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_error_invalid_state
|
||||
),
|
||||
(
|
||||
'glean_error_invalid_value',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_error_invalid_value
|
||||
),
|
||||
(
|
||||
'glean_validation_pings_submitted',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_validation_pings_submitted
|
||||
)
|
||||
] AS metrics
|
||||
FROM
|
||||
extracted
|
||||
),
|
||||
flattened_labeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metrics.name AS metric,
|
||||
metrics.type AS metric_type,
|
||||
value.key AS key,
|
||||
value.value AS value
|
||||
FROM
|
||||
grouped_labeled_metrics
|
||||
CROSS JOIN
|
||||
UNNEST(metrics) AS metrics,
|
||||
UNNEST(metrics.value) AS value
|
||||
),
|
||||
aggregated_labeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
MAX(value) AS max,
|
||||
MIN(value) AS min,
|
||||
AVG(value) AS avg,
|
||||
SUM(value) AS sum,
|
||||
IF(MIN(value) IS NULL, NULL, COUNT(*)) AS count
|
||||
FROM
|
||||
flattened_labeled_metrics
|
||||
GROUP BY
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key
|
||||
),
|
||||
labeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
ARRAY_CONCAT_AGG(
|
||||
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
|
||||
(metric, metric_type, key, 'max', max),
|
||||
(metric, metric_type, key, 'min', min),
|
||||
(metric, metric_type, key, 'avg', avg),
|
||||
(metric, metric_type, key, 'sum', sum),
|
||||
(metric, metric_type, key, 'count', count)
|
||||
]
|
||||
) AS scalar_aggregates
|
||||
FROM
|
||||
aggregated_labeled_metrics
|
||||
GROUP BY
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel
|
||||
)
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
unlabeled_metrics
|
||||
UNION ALL
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
labeled_metrics
|
|
@ -0,0 +1,164 @@
|
|||
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table firefox_desktop_stable.deletion_request_v1
|
||||
WITH extracted AS (
|
||||
SELECT
|
||||
*,
|
||||
DATE(submission_timestamp) AS submission_date,
|
||||
client_info.client_id,
|
||||
"deletion-request" AS ping_type,
|
||||
COALESCE(
|
||||
SAFE_CAST(SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS INT64),
|
||||
0
|
||||
) AS app_version,
|
||||
client_info.os AS os,
|
||||
client_info.app_build AS app_build_id,
|
||||
client_info.app_channel AS channel
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.firefox_desktop_stable.deletion_request_v1`
|
||||
WHERE
|
||||
DATE(submission_timestamp) = @submission_date
|
||||
AND client_info.client_id IS NOT NULL
|
||||
),
|
||||
unlabeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
|
||||
] AS scalar_aggregates
|
||||
FROM
|
||||
extracted
|
||||
GROUP BY
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel
|
||||
),
|
||||
grouped_labeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
ARRAY<STRUCT<name STRING, type STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>[
|
||||
(
|
||||
'glean_error_invalid_label',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_error_invalid_label
|
||||
),
|
||||
(
|
||||
'glean_error_invalid_overflow',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_error_invalid_overflow
|
||||
),
|
||||
(
|
||||
'glean_error_invalid_state',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_error_invalid_state
|
||||
),
|
||||
(
|
||||
'glean_error_invalid_value',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_error_invalid_value
|
||||
)
|
||||
] AS metrics
|
||||
FROM
|
||||
extracted
|
||||
),
|
||||
flattened_labeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metrics.name AS metric,
|
||||
metrics.type AS metric_type,
|
||||
value.key AS key,
|
||||
value.value AS value
|
||||
FROM
|
||||
grouped_labeled_metrics
|
||||
CROSS JOIN
|
||||
UNNEST(metrics) AS metrics,
|
||||
UNNEST(metrics.value) AS value
|
||||
),
|
||||
aggregated_labeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
MAX(value) AS max,
|
||||
MIN(value) AS min,
|
||||
AVG(value) AS avg,
|
||||
SUM(value) AS sum,
|
||||
IF(MIN(value) IS NULL, NULL, COUNT(*)) AS count
|
||||
FROM
|
||||
flattened_labeled_metrics
|
||||
GROUP BY
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key
|
||||
),
|
||||
labeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
ARRAY_CONCAT_AGG(
|
||||
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
|
||||
(metric, metric_type, key, 'max', max),
|
||||
(metric, metric_type, key, 'min', min),
|
||||
(metric, metric_type, key, 'avg', avg),
|
||||
(metric, metric_type, key, 'sum', sum),
|
||||
(metric, metric_type, key, 'count', count)
|
||||
]
|
||||
) AS scalar_aggregates
|
||||
FROM
|
||||
aggregated_labeled_metrics
|
||||
GROUP BY
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel
|
||||
)
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
unlabeled_metrics
|
||||
UNION ALL
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
labeled_metrics
|
|
@ -0,0 +1,164 @@
|
|||
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table firefox_desktop_stable.events_v1
|
||||
WITH extracted AS (
|
||||
SELECT
|
||||
*,
|
||||
DATE(submission_timestamp) AS submission_date,
|
||||
client_info.client_id,
|
||||
"events" AS ping_type,
|
||||
COALESCE(
|
||||
SAFE_CAST(SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS INT64),
|
||||
0
|
||||
) AS app_version,
|
||||
client_info.os AS os,
|
||||
client_info.app_build AS app_build_id,
|
||||
client_info.app_channel AS channel
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.firefox_desktop_stable.events_v1`
|
||||
WHERE
|
||||
DATE(submission_timestamp) = @submission_date
|
||||
AND client_info.client_id IS NOT NULL
|
||||
),
|
||||
unlabeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
|
||||
] AS scalar_aggregates
|
||||
FROM
|
||||
extracted
|
||||
GROUP BY
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel
|
||||
),
|
||||
grouped_labeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
ARRAY<STRUCT<name STRING, type STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>[
|
||||
(
|
||||
'glean_error_invalid_label',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_error_invalid_label
|
||||
),
|
||||
(
|
||||
'glean_error_invalid_overflow',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_error_invalid_overflow
|
||||
),
|
||||
(
|
||||
'glean_error_invalid_state',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_error_invalid_state
|
||||
),
|
||||
(
|
||||
'glean_error_invalid_value',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_error_invalid_value
|
||||
)
|
||||
] AS metrics
|
||||
FROM
|
||||
extracted
|
||||
),
|
||||
flattened_labeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metrics.name AS metric,
|
||||
metrics.type AS metric_type,
|
||||
value.key AS key,
|
||||
value.value AS value
|
||||
FROM
|
||||
grouped_labeled_metrics
|
||||
CROSS JOIN
|
||||
UNNEST(metrics) AS metrics,
|
||||
UNNEST(metrics.value) AS value
|
||||
),
|
||||
aggregated_labeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
MAX(value) AS max,
|
||||
MIN(value) AS min,
|
||||
AVG(value) AS avg,
|
||||
SUM(value) AS sum,
|
||||
IF(MIN(value) IS NULL, NULL, COUNT(*)) AS count
|
||||
FROM
|
||||
flattened_labeled_metrics
|
||||
GROUP BY
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key
|
||||
),
|
||||
labeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
ARRAY_CONCAT_AGG(
|
||||
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
|
||||
(metric, metric_type, key, 'max', max),
|
||||
(metric, metric_type, key, 'min', min),
|
||||
(metric, metric_type, key, 'avg', avg),
|
||||
(metric, metric_type, key, 'sum', sum),
|
||||
(metric, metric_type, key, 'count', count)
|
||||
]
|
||||
) AS scalar_aggregates
|
||||
FROM
|
||||
aggregated_labeled_metrics
|
||||
GROUP BY
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel
|
||||
)
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
unlabeled_metrics
|
||||
UNION ALL
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
labeled_metrics
|
|
@ -0,0 +1,178 @@
|
|||
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table firefox_desktop_stable.fog_validation_v1
|
||||
WITH extracted AS (
|
||||
SELECT
|
||||
*,
|
||||
DATE(submission_timestamp) AS submission_date,
|
||||
client_info.client_id,
|
||||
"fog-validation" AS ping_type,
|
||||
COALESCE(
|
||||
SAFE_CAST(SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS INT64),
|
||||
0
|
||||
) AS app_version,
|
||||
client_info.os AS os,
|
||||
client_info.app_build AS app_build_id,
|
||||
client_info.app_channel AS channel
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.firefox_desktop_stable.fog_validation_v1`
|
||||
WHERE
|
||||
DATE(submission_timestamp) = @submission_date
|
||||
AND client_info.client_id IS NOT NULL
|
||||
),
|
||||
unlabeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
|
||||
(
|
||||
'fog_validation_profile_disk_is_ssd',
|
||||
'boolean',
|
||||
'',
|
||||
'false',
|
||||
SUM(CAST(NOT metrics.boolean.fog_validation_profile_disk_is_ssd AS INT64))
|
||||
),
|
||||
(
|
||||
'fog_validation_profile_disk_is_ssd',
|
||||
'boolean',
|
||||
'',
|
||||
'true',
|
||||
SUM(CAST(metrics.boolean.fog_validation_profile_disk_is_ssd AS INT64))
|
||||
)
|
||||
] AS scalar_aggregates
|
||||
FROM
|
||||
extracted
|
||||
GROUP BY
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel
|
||||
),
|
||||
grouped_labeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
ARRAY<STRUCT<name STRING, type STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>[
|
||||
(
|
||||
'glean_error_invalid_label',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_error_invalid_label
|
||||
),
|
||||
(
|
||||
'glean_error_invalid_overflow',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_error_invalid_overflow
|
||||
),
|
||||
(
|
||||
'glean_error_invalid_state',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_error_invalid_state
|
||||
),
|
||||
(
|
||||
'glean_error_invalid_value',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_error_invalid_value
|
||||
)
|
||||
] AS metrics
|
||||
FROM
|
||||
extracted
|
||||
),
|
||||
flattened_labeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metrics.name AS metric,
|
||||
metrics.type AS metric_type,
|
||||
value.key AS key,
|
||||
value.value AS value
|
||||
FROM
|
||||
grouped_labeled_metrics
|
||||
CROSS JOIN
|
||||
UNNEST(metrics) AS metrics,
|
||||
UNNEST(metrics.value) AS value
|
||||
),
|
||||
aggregated_labeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
MAX(value) AS max,
|
||||
MIN(value) AS min,
|
||||
AVG(value) AS avg,
|
||||
SUM(value) AS sum,
|
||||
IF(MIN(value) IS NULL, NULL, COUNT(*)) AS count
|
||||
FROM
|
||||
flattened_labeled_metrics
|
||||
GROUP BY
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key
|
||||
),
|
||||
labeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
ARRAY_CONCAT_AGG(
|
||||
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
|
||||
(metric, metric_type, key, 'max', max),
|
||||
(metric, metric_type, key, 'min', min),
|
||||
(metric, metric_type, key, 'avg', avg),
|
||||
(metric, metric_type, key, 'sum', sum),
|
||||
(metric, metric_type, key, 'count', count)
|
||||
]
|
||||
) AS scalar_aggregates
|
||||
FROM
|
||||
aggregated_labeled_metrics
|
||||
GROUP BY
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel
|
||||
)
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
unlabeled_metrics
|
||||
UNION ALL
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
labeled_metrics
|
|
@ -0,0 +1,591 @@
|
|||
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table firefox_desktop_stable.metrics_v1
|
||||
WITH extracted AS (
|
||||
SELECT
|
||||
*,
|
||||
DATE(submission_timestamp) AS submission_date,
|
||||
client_info.client_id,
|
||||
"metrics" AS ping_type,
|
||||
COALESCE(
|
||||
SAFE_CAST(SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS INT64),
|
||||
0
|
||||
) AS app_version,
|
||||
client_info.os AS os,
|
||||
client_info.app_build AS app_build_id,
|
||||
client_info.app_channel AS channel
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.firefox_desktop_stable.metrics_v1`
|
||||
WHERE
|
||||
DATE(submission_timestamp) = @submission_date
|
||||
AND client_info.client_id IS NOT NULL
|
||||
),
|
||||
unlabeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
|
||||
(
|
||||
'browser_ui_proton_enabled',
|
||||
'boolean',
|
||||
'',
|
||||
'false',
|
||||
SUM(CAST(NOT metrics.boolean.browser_ui_proton_enabled AS INT64))
|
||||
),
|
||||
(
|
||||
'browser_ui_proton_enabled',
|
||||
'boolean',
|
||||
'',
|
||||
'true',
|
||||
SUM(CAST(metrics.boolean.browser_ui_proton_enabled AS INT64))
|
||||
),
|
||||
(
|
||||
'fog_failed_idle_registration',
|
||||
'boolean',
|
||||
'',
|
||||
'false',
|
||||
SUM(CAST(NOT metrics.boolean.fog_failed_idle_registration AS INT64))
|
||||
),
|
||||
(
|
||||
'fog_failed_idle_registration',
|
||||
'boolean',
|
||||
'',
|
||||
'true',
|
||||
SUM(CAST(metrics.boolean.fog_failed_idle_registration AS INT64))
|
||||
),
|
||||
(
|
||||
'fog_initialization',
|
||||
'timespan',
|
||||
'',
|
||||
'avg',
|
||||
avg(CAST(metrics.timespan.fog_initialization.value AS INT64))
|
||||
),
|
||||
(
|
||||
'fog_initialization',
|
||||
'timespan',
|
||||
'',
|
||||
'count',
|
||||
IF(MIN(metrics.timespan.fog_initialization.value) IS NULL, NULL, COUNT(*))
|
||||
),
|
||||
(
|
||||
'fog_initialization',
|
||||
'timespan',
|
||||
'',
|
||||
'max',
|
||||
max(CAST(metrics.timespan.fog_initialization.value AS INT64))
|
||||
),
|
||||
(
|
||||
'fog_initialization',
|
||||
'timespan',
|
||||
'',
|
||||
'min',
|
||||
min(CAST(metrics.timespan.fog_initialization.value AS INT64))
|
||||
),
|
||||
(
|
||||
'fog_initialization',
|
||||
'timespan',
|
||||
'',
|
||||
'sum',
|
||||
sum(CAST(metrics.timespan.fog_initialization.value AS INT64))
|
||||
),
|
||||
(
|
||||
'fog_ipc_replay_failures',
|
||||
'counter',
|
||||
'',
|
||||
'avg',
|
||||
avg(CAST(metrics.counter.fog_ipc_replay_failures AS INT64))
|
||||
),
|
||||
(
|
||||
'fog_ipc_replay_failures',
|
||||
'counter',
|
||||
'',
|
||||
'count',
|
||||
IF(MIN(metrics.counter.fog_ipc_replay_failures) IS NULL, NULL, COUNT(*))
|
||||
),
|
||||
(
|
||||
'fog_ipc_replay_failures',
|
||||
'counter',
|
||||
'',
|
||||
'max',
|
||||
max(CAST(metrics.counter.fog_ipc_replay_failures AS INT64))
|
||||
),
|
||||
(
|
||||
'fog_ipc_replay_failures',
|
||||
'counter',
|
||||
'',
|
||||
'min',
|
||||
min(CAST(metrics.counter.fog_ipc_replay_failures AS INT64))
|
||||
),
|
||||
(
|
||||
'fog_ipc_replay_failures',
|
||||
'counter',
|
||||
'',
|
||||
'sum',
|
||||
sum(CAST(metrics.counter.fog_ipc_replay_failures AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_core_migration_successful',
|
||||
'boolean',
|
||||
'',
|
||||
'false',
|
||||
SUM(CAST(NOT metrics.boolean.glean_core_migration_successful AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_core_migration_successful',
|
||||
'boolean',
|
||||
'',
|
||||
'true',
|
||||
SUM(CAST(metrics.boolean.glean_core_migration_successful AS INT64))
|
||||
),
|
||||
('glean_error_io', 'counter', '', 'avg', avg(CAST(metrics.counter.glean_error_io AS INT64))),
|
||||
(
|
||||
'glean_error_io',
|
||||
'counter',
|
||||
'',
|
||||
'count',
|
||||
IF(MIN(metrics.counter.glean_error_io) IS NULL, NULL, COUNT(*))
|
||||
),
|
||||
('glean_error_io', 'counter', '', 'max', max(CAST(metrics.counter.glean_error_io AS INT64))),
|
||||
('glean_error_io', 'counter', '', 'min', min(CAST(metrics.counter.glean_error_io AS INT64))),
|
||||
('glean_error_io', 'counter', '', 'sum', sum(CAST(metrics.counter.glean_error_io AS INT64))),
|
||||
(
|
||||
'glean_error_preinit_tasks_overflow',
|
||||
'counter',
|
||||
'',
|
||||
'avg',
|
||||
avg(CAST(metrics.counter.glean_error_preinit_tasks_overflow AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_error_preinit_tasks_overflow',
|
||||
'counter',
|
||||
'',
|
||||
'count',
|
||||
IF(MIN(metrics.counter.glean_error_preinit_tasks_overflow) IS NULL, NULL, COUNT(*))
|
||||
),
|
||||
(
|
||||
'glean_error_preinit_tasks_overflow',
|
||||
'counter',
|
||||
'',
|
||||
'max',
|
||||
max(CAST(metrics.counter.glean_error_preinit_tasks_overflow AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_error_preinit_tasks_overflow',
|
||||
'counter',
|
||||
'',
|
||||
'min',
|
||||
min(CAST(metrics.counter.glean_error_preinit_tasks_overflow AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_error_preinit_tasks_overflow',
|
||||
'counter',
|
||||
'',
|
||||
'sum',
|
||||
sum(CAST(metrics.counter.glean_error_preinit_tasks_overflow AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_error_preinit_tasks_timeout',
|
||||
'boolean',
|
||||
'',
|
||||
'false',
|
||||
SUM(CAST(NOT metrics.boolean.glean_error_preinit_tasks_timeout AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_error_preinit_tasks_timeout',
|
||||
'boolean',
|
||||
'',
|
||||
'true',
|
||||
SUM(CAST(metrics.boolean.glean_error_preinit_tasks_timeout AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_time_invalid_timezone_offset',
|
||||
'counter',
|
||||
'',
|
||||
'avg',
|
||||
avg(CAST(metrics.counter.glean_time_invalid_timezone_offset AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_time_invalid_timezone_offset',
|
||||
'counter',
|
||||
'',
|
||||
'count',
|
||||
IF(MIN(metrics.counter.glean_time_invalid_timezone_offset) IS NULL, NULL, COUNT(*))
|
||||
),
|
||||
(
|
||||
'glean_time_invalid_timezone_offset',
|
||||
'counter',
|
||||
'',
|
||||
'max',
|
||||
max(CAST(metrics.counter.glean_time_invalid_timezone_offset AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_time_invalid_timezone_offset',
|
||||
'counter',
|
||||
'',
|
||||
'min',
|
||||
min(CAST(metrics.counter.glean_time_invalid_timezone_offset AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_time_invalid_timezone_offset',
|
||||
'counter',
|
||||
'',
|
||||
'sum',
|
||||
sum(CAST(metrics.counter.glean_time_invalid_timezone_offset AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_upload_deleted_pings_after_quota_hit',
|
||||
'counter',
|
||||
'',
|
||||
'avg',
|
||||
avg(CAST(metrics.counter.glean_upload_deleted_pings_after_quota_hit AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_upload_deleted_pings_after_quota_hit',
|
||||
'counter',
|
||||
'',
|
||||
'count',
|
||||
IF(MIN(metrics.counter.glean_upload_deleted_pings_after_quota_hit) IS NULL, NULL, COUNT(*))
|
||||
),
|
||||
(
|
||||
'glean_upload_deleted_pings_after_quota_hit',
|
||||
'counter',
|
||||
'',
|
||||
'max',
|
||||
max(CAST(metrics.counter.glean_upload_deleted_pings_after_quota_hit AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_upload_deleted_pings_after_quota_hit',
|
||||
'counter',
|
||||
'',
|
||||
'min',
|
||||
min(CAST(metrics.counter.glean_upload_deleted_pings_after_quota_hit AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_upload_deleted_pings_after_quota_hit',
|
||||
'counter',
|
||||
'',
|
||||
'sum',
|
||||
sum(CAST(metrics.counter.glean_upload_deleted_pings_after_quota_hit AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_upload_pending_pings',
|
||||
'counter',
|
||||
'',
|
||||
'avg',
|
||||
avg(CAST(metrics.counter.glean_upload_pending_pings AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_upload_pending_pings',
|
||||
'counter',
|
||||
'',
|
||||
'count',
|
||||
IF(MIN(metrics.counter.glean_upload_pending_pings) IS NULL, NULL, COUNT(*))
|
||||
),
|
||||
(
|
||||
'glean_upload_pending_pings',
|
||||
'counter',
|
||||
'',
|
||||
'max',
|
||||
max(CAST(metrics.counter.glean_upload_pending_pings AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_upload_pending_pings',
|
||||
'counter',
|
||||
'',
|
||||
'min',
|
||||
min(CAST(metrics.counter.glean_upload_pending_pings AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_upload_pending_pings',
|
||||
'counter',
|
||||
'',
|
||||
'sum',
|
||||
sum(CAST(metrics.counter.glean_upload_pending_pings AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_validation_app_forceclosed_count',
|
||||
'counter',
|
||||
'',
|
||||
'avg',
|
||||
avg(CAST(metrics.counter.glean_validation_app_forceclosed_count AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_validation_app_forceclosed_count',
|
||||
'counter',
|
||||
'',
|
||||
'count',
|
||||
IF(MIN(metrics.counter.glean_validation_app_forceclosed_count) IS NULL, NULL, COUNT(*))
|
||||
),
|
||||
(
|
||||
'glean_validation_app_forceclosed_count',
|
||||
'counter',
|
||||
'',
|
||||
'max',
|
||||
max(CAST(metrics.counter.glean_validation_app_forceclosed_count AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_validation_app_forceclosed_count',
|
||||
'counter',
|
||||
'',
|
||||
'min',
|
||||
min(CAST(metrics.counter.glean_validation_app_forceclosed_count AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_validation_app_forceclosed_count',
|
||||
'counter',
|
||||
'',
|
||||
'sum',
|
||||
sum(CAST(metrics.counter.glean_validation_app_forceclosed_count AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_validation_baseline_ping_count',
|
||||
'counter',
|
||||
'',
|
||||
'avg',
|
||||
avg(CAST(metrics.counter.glean_validation_baseline_ping_count AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_validation_baseline_ping_count',
|
||||
'counter',
|
||||
'',
|
||||
'count',
|
||||
IF(MIN(metrics.counter.glean_validation_baseline_ping_count) IS NULL, NULL, COUNT(*))
|
||||
),
|
||||
(
|
||||
'glean_validation_baseline_ping_count',
|
||||
'counter',
|
||||
'',
|
||||
'max',
|
||||
max(CAST(metrics.counter.glean_validation_baseline_ping_count AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_validation_baseline_ping_count',
|
||||
'counter',
|
||||
'',
|
||||
'min',
|
||||
min(CAST(metrics.counter.glean_validation_baseline_ping_count AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_validation_baseline_ping_count',
|
||||
'counter',
|
||||
'',
|
||||
'sum',
|
||||
sum(CAST(metrics.counter.glean_validation_baseline_ping_count AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_validation_foreground_count',
|
||||
'counter',
|
||||
'',
|
||||
'avg',
|
||||
avg(CAST(metrics.counter.glean_validation_foreground_count AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_validation_foreground_count',
|
||||
'counter',
|
||||
'',
|
||||
'count',
|
||||
IF(MIN(metrics.counter.glean_validation_foreground_count) IS NULL, NULL, COUNT(*))
|
||||
),
|
||||
(
|
||||
'glean_validation_foreground_count',
|
||||
'counter',
|
||||
'',
|
||||
'max',
|
||||
max(CAST(metrics.counter.glean_validation_foreground_count AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_validation_foreground_count',
|
||||
'counter',
|
||||
'',
|
||||
'min',
|
||||
min(CAST(metrics.counter.glean_validation_foreground_count AS INT64))
|
||||
),
|
||||
(
|
||||
'glean_validation_foreground_count',
|
||||
'counter',
|
||||
'',
|
||||
'sum',
|
||||
sum(CAST(metrics.counter.glean_validation_foreground_count AS INT64))
|
||||
),
|
||||
(
|
||||
'power_total_cpu_time_ms',
|
||||
'counter',
|
||||
'',
|
||||
'avg',
|
||||
avg(CAST(metrics.counter.power_total_cpu_time_ms AS INT64))
|
||||
),
|
||||
(
|
||||
'power_total_cpu_time_ms',
|
||||
'counter',
|
||||
'',
|
||||
'count',
|
||||
IF(MIN(metrics.counter.power_total_cpu_time_ms) IS NULL, NULL, COUNT(*))
|
||||
),
|
||||
(
|
||||
'power_total_cpu_time_ms',
|
||||
'counter',
|
||||
'',
|
||||
'max',
|
||||
max(CAST(metrics.counter.power_total_cpu_time_ms AS INT64))
|
||||
),
|
||||
(
|
||||
'power_total_cpu_time_ms',
|
||||
'counter',
|
||||
'',
|
||||
'min',
|
||||
min(CAST(metrics.counter.power_total_cpu_time_ms AS INT64))
|
||||
),
|
||||
(
|
||||
'power_total_cpu_time_ms',
|
||||
'counter',
|
||||
'',
|
||||
'sum',
|
||||
sum(CAST(metrics.counter.power_total_cpu_time_ms AS INT64))
|
||||
)
|
||||
] AS scalar_aggregates
|
||||
FROM
|
||||
extracted
|
||||
GROUP BY
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel
|
||||
),
|
||||
grouped_labeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
ARRAY<STRUCT<name STRING, type STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>[
|
||||
(
|
||||
'glean_error_invalid_label',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_error_invalid_label
|
||||
),
|
||||
(
|
||||
'glean_error_invalid_overflow',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_error_invalid_overflow
|
||||
),
|
||||
(
|
||||
'glean_error_invalid_state',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_error_invalid_state
|
||||
),
|
||||
(
|
||||
'glean_error_invalid_value',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_error_invalid_value
|
||||
),
|
||||
(
|
||||
'glean_upload_ping_upload_failure',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_upload_ping_upload_failure
|
||||
),
|
||||
(
|
||||
'glean_validation_pings_submitted',
|
||||
'labeled_counter',
|
||||
metrics.labeled_counter.glean_validation_pings_submitted
|
||||
)
|
||||
] AS metrics
|
||||
FROM
|
||||
extracted
|
||||
),
|
||||
flattened_labeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metrics.name AS metric,
|
||||
metrics.type AS metric_type,
|
||||
value.key AS key,
|
||||
value.value AS value
|
||||
FROM
|
||||
grouped_labeled_metrics
|
||||
CROSS JOIN
|
||||
UNNEST(metrics) AS metrics,
|
||||
UNNEST(metrics.value) AS value
|
||||
),
|
||||
aggregated_labeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
MAX(value) AS max,
|
||||
MIN(value) AS min,
|
||||
AVG(value) AS avg,
|
||||
SUM(value) AS sum,
|
||||
IF(MIN(value) IS NULL, NULL, COUNT(*)) AS count
|
||||
FROM
|
||||
flattened_labeled_metrics
|
||||
GROUP BY
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key
|
||||
),
|
||||
labeled_metrics AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
ARRAY_CONCAT_AGG(
|
||||
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
|
||||
(metric, metric_type, key, 'max', max),
|
||||
(metric, metric_type, key, 'min', min),
|
||||
(metric, metric_type, key, 'avg', avg),
|
||||
(metric, metric_type, key, 'sum', sum),
|
||||
(metric, metric_type, key, 'count', count)
|
||||
]
|
||||
) AS scalar_aggregates
|
||||
FROM
|
||||
aggregated_labeled_metrics
|
||||
GROUP BY
|
||||
client_id,
|
||||
ping_type,
|
||||
submission_date,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel
|
||||
)
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
unlabeled_metrics
|
||||
UNION ALL
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
labeled_metrics
|
|
@ -0,0 +1,10 @@
|
|||
-- view for firefox_desktop__view_clients_daily_histogram_aggregates_v1;
|
||||
-- View for histogram aggregates that handles time-partitioning
|
||||
CREATE OR REPLACE VIEW
|
||||
`glam-fenix-dev.glam_etl.firefox_desktop__view_clients_daily_histogram_aggregates_v1`
|
||||
AS
|
||||
SELECT
|
||||
* EXCEPT (submission_date),
|
||||
DATE(_PARTITIONTIME) AS submission_date
|
||||
FROM
|
||||
`glam-fenix-dev.glam_etl.firefox_desktop__clients_daily_histogram_aggregates*`
|
|
@ -0,0 +1,10 @@
|
|||
-- view for firefox_desktop__view_clients_daily_scalar_aggregates_v1;
|
||||
-- View to union daily scalar aggregates with date partitioning
|
||||
CREATE OR REPLACE VIEW
|
||||
`glam-fenix-dev.glam_etl.firefox_desktop__view_clients_daily_scalar_aggregates_v1`
|
||||
AS
|
||||
SELECT
|
||||
* EXCEPT (submission_date),
|
||||
DATE(_PARTITIONTIME) AS submission_date
|
||||
FROM
|
||||
`glam-fenix-dev.glam_etl.firefox_desktop__clients_daily_scalar_aggregates*`
|
|
@ -0,0 +1,26 @@
|
|||
-- init for firefox_desktop_glam_nightly__clients_histogram_aggregates_v1;
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`glam-fenix-dev.glam_etl.firefox_desktop_glam_nightly__clients_histogram_aggregates_v1`(
|
||||
sample_id INT64,
|
||||
client_id STRING,
|
||||
ping_type STRING,
|
||||
os STRING,
|
||||
app_version INT64,
|
||||
app_build_id STRING,
|
||||
channel STRING,
|
||||
histogram_aggregates ARRAY<
|
||||
STRUCT<
|
||||
metric STRING,
|
||||
metric_type STRING,
|
||||
key STRING,
|
||||
agg_type STRING,
|
||||
value ARRAY<STRUCT<key STRING, value INT64>>
|
||||
>
|
||||
>
|
||||
)
|
||||
PARTITION BY
|
||||
RANGE_BUCKET(sample_id, GENERATE_ARRAY(0, 100, 1))
|
||||
CLUSTER BY
|
||||
app_version,
|
||||
channel,
|
||||
client_id
|
|
@ -0,0 +1,212 @@
|
|||
-- query for firefox_desktop_glam_nightly__clients_histogram_aggregates_v1;
|
||||
CREATE TEMP FUNCTION udf_merged_user_data(aggs ANY TYPE)
|
||||
RETURNS ARRAY<
|
||||
STRUCT<
|
||||
metric STRING,
|
||||
metric_type STRING,
|
||||
key STRING,
|
||||
agg_type STRING,
|
||||
value ARRAY<STRUCT<key STRING, value INT64>>
|
||||
>
|
||||
> AS (
|
||||
(
|
||||
WITH unnested AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
UNNEST(aggs)
|
||||
),
|
||||
aggregated_data AS (
|
||||
SELECT AS STRUCT
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type,
|
||||
mozfun.map.sum(ARRAY_CONCAT_AGG(value)) AS value
|
||||
FROM
|
||||
unnested
|
||||
GROUP BY
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type
|
||||
)
|
||||
SELECT
|
||||
ARRAY_AGG((metric, metric_type, key, agg_type, value))
|
||||
FROM
|
||||
aggregated_data
|
||||
)
|
||||
);
|
||||
|
||||
CREATE TEMP FUNCTION filter_values(aggs ARRAY<STRUCT<key STRING, value INT64>>)
|
||||
RETURNS ARRAY<STRUCT<key STRING, value INT64>> AS (
|
||||
ARRAY(
|
||||
SELECT AS STRUCT
|
||||
agg.key,
|
||||
SUM(agg.value) AS value
|
||||
FROM
|
||||
UNNEST(aggs) agg
|
||||
-- Prevent overflows by only keeping buckets where value is less than 2^40
|
||||
-- allowing 2^24 entries. This value was chosen somewhat abitrarily, typically
|
||||
-- the max histogram value is somewhere on the order of ~20 bits.
|
||||
WHERE
|
||||
agg.value <= POW(2, 40)
|
||||
GROUP BY
|
||||
agg.key
|
||||
)
|
||||
);
|
||||
|
||||
WITH extracted_accumulated AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
glam_etl.firefox_desktop_glam_nightly__clients_histogram_aggregates_v1
|
||||
WHERE
|
||||
sample_id >= @min_sample_id
|
||||
AND sample_id <= @max_sample_id
|
||||
),
|
||||
filtered_accumulated AS (
|
||||
SELECT
|
||||
sample_id,
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
histogram_aggregates
|
||||
FROM
|
||||
extracted_accumulated
|
||||
LEFT JOIN
|
||||
glam_etl.firefox_desktop_glam_nightly__latest_versions_v1
|
||||
USING
|
||||
(channel)
|
||||
WHERE
|
||||
-- allow for builds to be slighly ahead of the current submission date, to
|
||||
-- account for a reasonable amount of clock skew
|
||||
mozfun.glam.build_hour_to_datetime(app_build_id) < DATE_ADD(@submission_date, INTERVAL 3 day)
|
||||
-- only keep builds from the last year
|
||||
AND mozfun.glam.build_hour_to_datetime(app_build_id) > DATE_SUB(
|
||||
@submission_date,
|
||||
INTERVAL 365 day
|
||||
)
|
||||
AND app_version > (latest_version - 3)
|
||||
),
|
||||
-- unnest the daily data
|
||||
extracted_daily AS (
|
||||
SELECT
|
||||
* EXCEPT (app_version, histogram_aggregates),
|
||||
CAST(app_version AS INT64) AS app_version,
|
||||
unnested_histogram_aggregates AS histogram_aggregates
|
||||
FROM
|
||||
glam_etl.firefox_desktop_glam_nightly__view_clients_daily_histogram_aggregates_v1,
|
||||
UNNEST(histogram_aggregates) unnested_histogram_aggregates
|
||||
WHERE
|
||||
submission_date = @submission_date
|
||||
AND value IS NOT NULL
|
||||
AND ARRAY_LENGTH(value) > 0
|
||||
),
|
||||
filtered_daily AS (
|
||||
SELECT
|
||||
sample_id,
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
histogram_aggregates.*
|
||||
FROM
|
||||
extracted_daily
|
||||
LEFT JOIN
|
||||
glam_etl.firefox_desktop_glam_nightly__latest_versions_v1
|
||||
USING
|
||||
(channel)
|
||||
WHERE
|
||||
-- allow for builds to be slighly ahead of the current submission date, to
|
||||
-- account for a reasonable amount of clock skew
|
||||
mozfun.glam.build_hour_to_datetime(app_build_id) < DATE_ADD(@submission_date, INTERVAL 3 day)
|
||||
-- only keep builds from the last year
|
||||
AND mozfun.glam.build_hour_to_datetime(app_build_id) > DATE_SUB(
|
||||
@submission_date,
|
||||
INTERVAL 365 day
|
||||
)
|
||||
AND app_version > (latest_version - 3)
|
||||
),
|
||||
-- re-aggregate based on the latest version
|
||||
aggregated_daily AS (
|
||||
SELECT
|
||||
sample_id,
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type,
|
||||
mozfun.map.sum(ARRAY_CONCAT_AGG(filter_values(value))) AS value
|
||||
FROM
|
||||
filtered_daily
|
||||
GROUP BY
|
||||
sample_id,
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type
|
||||
),
|
||||
-- note: this seems costly, if it's just going to be unnested again
|
||||
transformed_daily AS (
|
||||
SELECT
|
||||
sample_id,
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
ARRAY_AGG(
|
||||
STRUCT<
|
||||
metric STRING,
|
||||
metric_type STRING,
|
||||
key STRING,
|
||||
agg_type STRING,
|
||||
aggregates ARRAY<STRUCT<key STRING, value INT64>>
|
||||
>(metric, metric_type, key, agg_type, value)
|
||||
) AS histogram_aggregates
|
||||
FROM
|
||||
aggregated_daily
|
||||
GROUP BY
|
||||
sample_id,
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel
|
||||
)
|
||||
SELECT
|
||||
COALESCE(accumulated.sample_id, daily.sample_id) AS sample_id,
|
||||
COALESCE(accumulated.client_id, daily.client_id) AS client_id,
|
||||
COALESCE(accumulated.ping_type, daily.ping_type) AS ping_type,
|
||||
COALESCE(accumulated.os, daily.os) AS os,
|
||||
COALESCE(accumulated.app_version, daily.app_version) AS app_version,
|
||||
COALESCE(accumulated.app_build_id, daily.app_build_id) AS app_build_id,
|
||||
COALESCE(accumulated.channel, daily.channel) AS channel,
|
||||
udf_merged_user_data(
|
||||
ARRAY_CONCAT(accumulated.histogram_aggregates, daily.histogram_aggregates)
|
||||
) AS histogram_aggregates
|
||||
FROM
|
||||
filtered_accumulated AS accumulated
|
||||
FULL OUTER JOIN
|
||||
transformed_daily AS daily
|
||||
USING
|
||||
(sample_id, client_id, ping_type, os, app_version, app_build_id, channel)
|
|
@ -0,0 +1,19 @@
|
|||
-- init for firefox_desktop_glam_nightly__clients_scalar_aggregates_v1;
|
||||
CREATE TABLE IF NOT EXISTS
|
||||
`glam-fenix-dev.glam_etl.firefox_desktop_glam_nightly__clients_scalar_aggregates_v1`(
|
||||
client_id STRING,
|
||||
ping_type STRING,
|
||||
os STRING,
|
||||
app_version INT64,
|
||||
app_build_id STRING,
|
||||
channel STRING,
|
||||
scalar_aggregates ARRAY<
|
||||
STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>
|
||||
>
|
||||
)
|
||||
PARTITION BY
|
||||
RANGE_BUCKET(app_version, GENERATE_ARRAY(0, 100, 1))
|
||||
CLUSTER BY
|
||||
app_version,
|
||||
channel,
|
||||
client_id
|
|
@ -0,0 +1,258 @@
|
|||
-- query for firefox_desktop_glam_nightly__clients_scalar_aggregates_v1;
|
||||
CREATE TEMP FUNCTION udf_merged_user_data(
|
||||
aggs ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>
|
||||
)
|
||||
RETURNS ARRAY<
|
||||
STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>
|
||||
> AS (
|
||||
(
|
||||
WITH unnested AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
UNNEST(aggs)
|
||||
WHERE
|
||||
agg_type != "avg"
|
||||
),
|
||||
aggregated AS (
|
||||
SELECT
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type,
|
||||
--format:off
|
||||
CASE agg_type
|
||||
WHEN 'max' THEN max(value)
|
||||
WHEN 'min' THEN min(value)
|
||||
WHEN 'count' THEN sum(value)
|
||||
WHEN 'sum' THEN sum(value)
|
||||
WHEN 'false' THEN sum(value)
|
||||
WHEN 'true' THEN sum(value)
|
||||
END AS value
|
||||
--format:on
|
||||
FROM
|
||||
unnested
|
||||
WHERE
|
||||
value IS NOT NULL
|
||||
GROUP BY
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type
|
||||
),
|
||||
scalar_count_and_sum AS (
|
||||
SELECT
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
'avg' AS agg_type,
|
||||
--format:off
|
||||
CASE WHEN agg_type = 'count' THEN value ELSE 0 END AS count,
|
||||
CASE WHEN agg_type = 'sum' THEN value ELSE 0 END AS sum
|
||||
--format:on
|
||||
FROM
|
||||
aggregated
|
||||
WHERE
|
||||
agg_type IN ('sum', 'count')
|
||||
),
|
||||
scalar_averages AS (
|
||||
SELECT
|
||||
* EXCEPT (count, sum),
|
||||
SUM(sum) / SUM(count) AS agg_value
|
||||
FROM
|
||||
scalar_count_and_sum
|
||||
GROUP BY
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type
|
||||
),
|
||||
merged_data AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
aggregated
|
||||
UNION ALL
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
scalar_averages
|
||||
)
|
||||
SELECT
|
||||
ARRAY_AGG((metric, metric_type, key, agg_type, value))
|
||||
FROM
|
||||
merged_data
|
||||
)
|
||||
);
|
||||
|
||||
WITH filtered_date_channel AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
glam_etl.firefox_desktop_glam_nightly__view_clients_daily_scalar_aggregates_v1
|
||||
WHERE
|
||||
submission_date = @submission_date
|
||||
),
|
||||
filtered_aggregates AS (
|
||||
SELECT
|
||||
submission_date,
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type,
|
||||
value
|
||||
FROM
|
||||
filtered_date_channel
|
||||
CROSS JOIN
|
||||
UNNEST(scalar_aggregates)
|
||||
WHERE
|
||||
value IS NOT NULL
|
||||
),
|
||||
version_filtered_new AS (
|
||||
SELECT
|
||||
submission_date,
|
||||
scalar_aggs.client_id,
|
||||
scalar_aggs.ping_type,
|
||||
scalar_aggs.os,
|
||||
scalar_aggs.app_version,
|
||||
scalar_aggs.app_build_id,
|
||||
scalar_aggs.channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type,
|
||||
value
|
||||
FROM
|
||||
filtered_aggregates AS scalar_aggs
|
||||
LEFT JOIN
|
||||
glam_etl.firefox_desktop_glam_nightly__latest_versions_v1
|
||||
USING
|
||||
(channel)
|
||||
WHERE
|
||||
-- allow for builds to be slighly ahead of the current submission date, to
|
||||
-- account for a reasonable amount of clock skew
|
||||
mozfun.glam.build_hour_to_datetime(app_build_id) < DATE_ADD(@submission_date, INTERVAL 3 day)
|
||||
-- only keep builds from the last year
|
||||
AND mozfun.glam.build_hour_to_datetime(app_build_id) > DATE_SUB(
|
||||
@submission_date,
|
||||
INTERVAL 365 day
|
||||
)
|
||||
AND app_version > (latest_version - 3)
|
||||
),
|
||||
scalar_aggregates_new AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type,
|
||||
--format:off
|
||||
CASE agg_type
|
||||
WHEN 'max' THEN max(value)
|
||||
WHEN 'min' THEN min(value)
|
||||
WHEN 'count' THEN sum(value)
|
||||
WHEN 'sum' THEN sum(value)
|
||||
WHEN 'false' THEN sum(value)
|
||||
WHEN 'true' THEN sum(value)
|
||||
END AS value
|
||||
--format:on
|
||||
FROM
|
||||
version_filtered_new
|
||||
WHERE
|
||||
-- avoid overflows from very large numbers that are typically anomalies
|
||||
value <= POW(2, 40)
|
||||
GROUP BY
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type
|
||||
),
|
||||
filtered_new AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
ARRAY_AGG((metric, metric_type, key, agg_type, value)) AS scalar_aggregates
|
||||
FROM
|
||||
scalar_aggregates_new
|
||||
GROUP BY
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel
|
||||
),
|
||||
filtered_old AS (
|
||||
SELECT
|
||||
scalar_aggs.client_id,
|
||||
scalar_aggs.ping_type,
|
||||
scalar_aggs.os,
|
||||
scalar_aggs.app_version,
|
||||
scalar_aggs.app_build_id,
|
||||
scalar_aggs.channel,
|
||||
scalar_aggregates
|
||||
FROM
|
||||
glam_etl.firefox_desktop_glam_nightly__clients_scalar_aggregates_v1 AS scalar_aggs
|
||||
LEFT JOIN
|
||||
glam_etl.firefox_desktop_glam_nightly__latest_versions_v1
|
||||
USING
|
||||
(channel)
|
||||
WHERE
|
||||
-- allow for builds to be slighly ahead of the current submission date, to
|
||||
-- account for a reasonable amount of clock skew
|
||||
mozfun.glam.build_hour_to_datetime(app_build_id) < DATE_ADD(@submission_date, INTERVAL 3 day)
|
||||
-- only keep builds from the last year
|
||||
AND mozfun.glam.build_hour_to_datetime(app_build_id) > DATE_SUB(
|
||||
@submission_date,
|
||||
INTERVAL 365 day
|
||||
)
|
||||
AND app_version > (latest_version - 3)
|
||||
),
|
||||
joined_new_old AS (
|
||||
SELECT
|
||||
COALESCE(old_data.client_id, new_data.client_id) AS client_id,
|
||||
COALESCE(old_data.ping_type, new_data.ping_type) AS ping_type,
|
||||
COALESCE(old_data.os, new_data.os) AS os,
|
||||
COALESCE(old_data.app_version, new_data.app_version) AS app_version,
|
||||
COALESCE(old_data.app_build_id, new_data.app_build_id) AS app_build_id,
|
||||
COALESCE(old_data.channel, new_data.channel) AS channel,
|
||||
COALESCE(old_data.scalar_aggregates, []) AS old_aggs,
|
||||
COALESCE(new_data.scalar_aggregates, []) AS new_aggs
|
||||
FROM
|
||||
filtered_new AS new_data
|
||||
FULL OUTER JOIN
|
||||
filtered_old AS old_data
|
||||
USING
|
||||
(client_id, ping_type, os, app_version, app_build_id, channel)
|
||||
)
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
udf_merged_user_data(ARRAY_CONCAT(old_aggs, new_aggs)) AS scalar_aggregates
|
||||
FROM
|
||||
joined_new_old
|
|
@ -0,0 +1,38 @@
|
|||
-- query for firefox_desktop_glam_nightly__extract_probe_counts_v1;
|
||||
SELECT
|
||||
channel,
|
||||
app_version AS version,
|
||||
ping_type,
|
||||
os,
|
||||
app_build_id AS build_id,
|
||||
IF(
|
||||
app_build_id = "*",
|
||||
NULL,
|
||||
SAFE_CAST(mozfun.glam.build_hour_to_datetime(app_build_id) AS STRING)
|
||||
) AS build_date,
|
||||
metric,
|
||||
metric_type,
|
||||
-- BigQuery has some null unicode characters which Postgresql doesn't like,
|
||||
-- so we remove those here. Also limit string length to 200 to match column
|
||||
-- length.
|
||||
SUBSTR(REPLACE(key, r"\x00", ""), 0, 200) AS metric_key,
|
||||
client_agg_type,
|
||||
MAX(total_users) AS total_users,
|
||||
MAX(IF(agg_type = "histogram", mozfun.glam.histogram_cast_json(aggregates), NULL)) AS histogram,
|
||||
MAX(
|
||||
IF(agg_type = "percentiles", mozfun.glam.histogram_cast_json(aggregates), NULL)
|
||||
) AS percentiles,
|
||||
FROM
|
||||
`glam_etl.firefox_desktop_glam_nightly__view_probe_counts_v1`
|
||||
WHERE
|
||||
total_users > 10
|
||||
GROUP BY
|
||||
channel,
|
||||
app_version,
|
||||
ping_type,
|
||||
os,
|
||||
app_build_id,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
client_agg_type
|
|
@ -0,0 +1,17 @@
|
|||
-- query for firefox_desktop_glam_nightly__extract_sample_counts_v1;
|
||||
SELECT
|
||||
channel,
|
||||
app_version,
|
||||
metric,
|
||||
key,
|
||||
coalesce(ping_type, "*") AS ping_type,
|
||||
COALESCE(app_build_id, "*") AS app_build_id,
|
||||
IF(
|
||||
app_build_id = "*",
|
||||
NULL,
|
||||
SAFE_CAST(mozfun.glam.build_hour_to_datetime(app_build_id) AS STRING)
|
||||
) AS build_date,
|
||||
COALESCE(os, "*") AS os,
|
||||
total_sample
|
||||
FROM
|
||||
`glam_etl.firefox_desktop_glam_nightly__view_sample_counts_v1`
|
|
@ -0,0 +1,33 @@
|
|||
-- query for firefox_desktop_glam_nightly__extract_user_counts_v1;
|
||||
WITH deduped AS (
|
||||
SELECT
|
||||
*,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY
|
||||
channel,
|
||||
app_version,
|
||||
ping_type,
|
||||
app_build_id,
|
||||
os
|
||||
ORDER BY
|
||||
total_users DESC
|
||||
) AS rank
|
||||
FROM
|
||||
`glam_etl.firefox_desktop_glam_nightly__view_user_counts_v1`
|
||||
)
|
||||
SELECT
|
||||
channel,
|
||||
app_version,
|
||||
coalesce(ping_type, "*") AS ping_type,
|
||||
COALESCE(app_build_id, "*") AS app_build_id,
|
||||
IF(
|
||||
app_build_id = "*",
|
||||
NULL,
|
||||
SAFE_CAST(mozfun.glam.build_hour_to_datetime(app_build_id) AS STRING)
|
||||
) AS build_date,
|
||||
COALESCE(os, "*") AS os,
|
||||
total_users
|
||||
FROM
|
||||
deduped
|
||||
WHERE
|
||||
rank = 1;
|
|
@ -0,0 +1,254 @@
|
|||
-- query for firefox_desktop_glam_nightly__histogram_bucket_counts_v1;
|
||||
WITH
|
||||
-- Cross join with the attribute combinations to reduce the query complexity
|
||||
-- with respect to the number of operations. A table with n rows cross joined
|
||||
-- with a combination of m attributes will generate a new table with n*m rows.
|
||||
-- The glob ("*") symbol can be understood as selecting all of values belonging
|
||||
-- to that group.
|
||||
static_combos AS (
|
||||
SELECT
|
||||
combos.*
|
||||
FROM
|
||||
UNNEST(
|
||||
ARRAY<STRUCT<ping_type STRING, os STRING, app_build_id STRING>>[
|
||||
(NULL, NULL, NULL),
|
||||
(NULL, NULL, "*"),
|
||||
(NULL, "*", NULL),
|
||||
("*", NULL, NULL),
|
||||
(NULL, "*", "*"),
|
||||
("*", NULL, "*"),
|
||||
("*", "*", NULL),
|
||||
("*", "*", "*")
|
||||
]
|
||||
) AS combos
|
||||
),
|
||||
all_combos AS (
|
||||
SELECT
|
||||
table.* EXCEPT (ping_type, os, app_build_id),
|
||||
COALESCE(combo.ping_type, table.ping_type) AS ping_type,
|
||||
COALESCE(combo.os, table.os) AS os,
|
||||
COALESCE(combo.app_build_id, table.app_build_id) AS app_build_id
|
||||
FROM
|
||||
glam_etl.firefox_desktop_glam_nightly__clients_histogram_aggregates_v1 table
|
||||
CROSS JOIN
|
||||
static_combos combo
|
||||
),
|
||||
normalized_histograms AS (
|
||||
SELECT
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
ARRAY(
|
||||
SELECT AS STRUCT
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type,
|
||||
mozfun.glam.histogram_normalized_sum(value, 1.0) AS aggregates
|
||||
FROM
|
||||
UNNEST(histogram_aggregates)
|
||||
) AS histogram_aggregates
|
||||
FROM
|
||||
all_combos
|
||||
),
|
||||
unnested AS (
|
||||
SELECT
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
histogram_aggregates.metric AS metric,
|
||||
histogram_aggregates.metric_type AS metric_type,
|
||||
histogram_aggregates.key AS key,
|
||||
histogram_aggregates.agg_type AS agg_type,
|
||||
aggregates.key AS bucket,
|
||||
aggregates.value
|
||||
FROM
|
||||
normalized_histograms,
|
||||
UNNEST(histogram_aggregates) AS histogram_aggregates,
|
||||
UNNEST(aggregates) AS aggregates
|
||||
),
|
||||
-- Find information that can be used to construct the bucket range. Most of the
|
||||
-- distributions follow a bucketing rule of 8*log2(n). This doesn't apply to the
|
||||
-- custom distributions e.g. GeckoView, which needs to incorporate information
|
||||
-- from the probe info service.
|
||||
-- See: https://mozilla.github.io/glean/book/user/metrics/custom_distribution.html
|
||||
distribution_metadata AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
UNNEST(
|
||||
[
|
||||
STRUCT(
|
||||
"custom_distribution" AS metric_type,
|
||||
"geckoview_document_site_origins" AS metric,
|
||||
0 AS range_min,
|
||||
100 AS range_max,
|
||||
50 AS bucket_count,
|
||||
"exponential" AS histogram_type
|
||||
),
|
||||
STRUCT(
|
||||
"custom_distribution" AS metric_type,
|
||||
"geckoview_per_document_site_origins" AS metric,
|
||||
0 AS range_min,
|
||||
100 AS range_max,
|
||||
50 AS bucket_count,
|
||||
"exponential" AS histogram_type
|
||||
),
|
||||
STRUCT(
|
||||
"custom_distribution" AS metric_type,
|
||||
"gfx_checkerboard_peak_pixel_count" AS metric,
|
||||
1 AS range_min,
|
||||
66355200 AS range_max,
|
||||
50 AS bucket_count,
|
||||
"exponential" AS histogram_type
|
||||
),
|
||||
STRUCT(
|
||||
"custom_distribution" AS metric_type,
|
||||
"gfx_checkerboard_severity" AS metric,
|
||||
1 AS range_min,
|
||||
1073741824 AS range_max,
|
||||
50 AS bucket_count,
|
||||
"exponential" AS histogram_type
|
||||
),
|
||||
STRUCT(
|
||||
"custom_distribution" AS metric_type,
|
||||
"gfx_content_frame_time_from_paint" AS metric,
|
||||
1 AS range_min,
|
||||
5000 AS range_max,
|
||||
50 AS bucket_count,
|
||||
"exponential" AS histogram_type
|
||||
),
|
||||
STRUCT(
|
||||
"custom_distribution" AS metric_type,
|
||||
"gfx_content_frame_time_from_vsync" AS metric,
|
||||
8 AS range_min,
|
||||
792 AS range_max,
|
||||
100 AS bucket_count,
|
||||
"linear" AS histogram_type
|
||||
),
|
||||
STRUCT(
|
||||
"custom_distribution" AS metric_type,
|
||||
"gfx_content_frame_time_with_svg" AS metric,
|
||||
1 AS range_min,
|
||||
5000 AS range_max,
|
||||
50 AS bucket_count,
|
||||
"exponential" AS histogram_type
|
||||
),
|
||||
STRUCT(
|
||||
"custom_distribution" AS metric_type,
|
||||
"gfx_content_frame_time_without_resource_upload" AS metric,
|
||||
1 AS range_min,
|
||||
5000 AS range_max,
|
||||
50 AS bucket_count,
|
||||
"exponential" AS histogram_type
|
||||
),
|
||||
STRUCT(
|
||||
"custom_distribution" AS metric_type,
|
||||
"gfx_content_frame_time_without_upload" AS metric,
|
||||
1 AS range_min,
|
||||
5000 AS range_max,
|
||||
50 AS bucket_count,
|
||||
"exponential" AS histogram_type
|
||||
),
|
||||
STRUCT(
|
||||
"custom_distribution" AS metric_type,
|
||||
"js_baseline_compile_percentage" AS metric,
|
||||
0 AS range_min,
|
||||
100 AS range_max,
|
||||
20 AS bucket_count,
|
||||
"linear" AS histogram_type
|
||||
),
|
||||
STRUCT(
|
||||
"custom_distribution" AS metric_type,
|
||||
"js_delazification_percentage" AS metric,
|
||||
0 AS range_min,
|
||||
100 AS range_max,
|
||||
20 AS bucket_count,
|
||||
"linear" AS histogram_type
|
||||
),
|
||||
STRUCT(
|
||||
"custom_distribution" AS metric_type,
|
||||
"js_execution_percentage" AS metric,
|
||||
0 AS range_min,
|
||||
100 AS range_max,
|
||||
20 AS bucket_count,
|
||||
"linear" AS histogram_type
|
||||
),
|
||||
STRUCT(
|
||||
"custom_distribution" AS metric_type,
|
||||
"js_xdr_encode_percentage" AS metric,
|
||||
0 AS range_min,
|
||||
100 AS range_max,
|
||||
20 AS bucket_count,
|
||||
"linear" AS histogram_type
|
||||
),
|
||||
STRUCT(
|
||||
"custom_distribution" AS metric_type,
|
||||
"performance_clone_deserialize_items" AS metric,
|
||||
1 AS range_min,
|
||||
2147483646 AS range_max,
|
||||
50 AS bucket_count,
|
||||
"exponential" AS histogram_type
|
||||
)
|
||||
]
|
||||
)
|
||||
UNION ALL
|
||||
SELECT
|
||||
metric_type,
|
||||
metric,
|
||||
NULL AS range_min,
|
||||
MAX(SAFE_CAST(bucket AS INT64)) AS range_max,
|
||||
NULL AS bucket_count,
|
||||
NULL AS histogram_type
|
||||
FROM
|
||||
unnested
|
||||
WHERE
|
||||
metric_type <> "custom_distribution"
|
||||
GROUP BY
|
||||
metric_type,
|
||||
metric
|
||||
),
|
||||
records AS (
|
||||
SELECT
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type,
|
||||
STRUCT<key STRING, value FLOAT64>(CAST(bucket AS STRING), 1.0 * SUM(value)) AS record
|
||||
FROM
|
||||
unnested
|
||||
GROUP BY
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type,
|
||||
bucket
|
||||
)
|
||||
SELECT
|
||||
* EXCEPT (metric_type, histogram_type),
|
||||
-- Suffix `custom_distribution` with bucketing type
|
||||
IF(
|
||||
histogram_type IS NOT NULL,
|
||||
CONCAT(metric_type, "_", histogram_type),
|
||||
metric_type
|
||||
) AS metric_type
|
||||
FROM
|
||||
records
|
||||
LEFT OUTER JOIN
|
||||
distribution_metadata
|
||||
USING
|
||||
(metric_type, metric)
|
|
@ -0,0 +1,14 @@
|
|||
-- query for firefox_desktop_glam_nightly__histogram_percentiles_v1;
|
||||
SELECT
|
||||
* EXCEPT (aggregates) REPLACE('percentiles' AS agg_type),
|
||||
ARRAY<STRUCT<key STRING, value FLOAT64>>[
|
||||
('5', mozfun.glam.percentile(5, aggregates, metric_type)),
|
||||
('25', mozfun.glam.percentile(25, aggregates, metric_type)),
|
||||
('50', mozfun.glam.percentile(50, aggregates, metric_type)),
|
||||
('75', mozfun.glam.percentile(75, aggregates, metric_type)),
|
||||
('95', mozfun.glam.percentile(95, aggregates, metric_type)),
|
||||
('99', mozfun.glam.percentile(99, aggregates, metric_type)),
|
||||
('99.9', mozfun.glam.percentile(99.9, aggregates, metric_type))
|
||||
] AS aggregates
|
||||
FROM
|
||||
glam_etl.firefox_desktop_glam_nightly__histogram_probe_counts_v1
|
|
@ -0,0 +1,79 @@
|
|||
-- query for firefox_desktop_glam_nightly__histogram_probe_counts_v1;
|
||||
CREATE TEMP FUNCTION udf_get_buckets(
|
||||
metric_type STRING,
|
||||
range_min INT64,
|
||||
range_max INT64,
|
||||
bucket_count INT64
|
||||
)
|
||||
RETURNS ARRAY<INT64> AS (
|
||||
(
|
||||
WITH buckets AS (
|
||||
SELECT
|
||||
CASE
|
||||
WHEN
|
||||
metric_type = 'timing_distribution'
|
||||
THEN
|
||||
-- https://mozilla.github.io/glean/book/user/metrics/timing_distribution.html
|
||||
mozfun.glam.histogram_generate_functional_buckets(2, 8, range_max)
|
||||
WHEN
|
||||
metric_type = 'memory_distribution'
|
||||
THEN
|
||||
-- https://mozilla.github.io/glean/book/user/metrics/memory_distribution.html
|
||||
mozfun.glam.histogram_generate_functional_buckets(2, 16, range_max)
|
||||
WHEN
|
||||
metric_type = 'custom_distribution_exponential'
|
||||
THEN
|
||||
mozfun.glam.histogram_generate_exponential_buckets(range_min, range_max, bucket_count)
|
||||
WHEN
|
||||
metric_type = 'custom_distribution_linear'
|
||||
THEN
|
||||
mozfun.glam.histogram_generate_linear_buckets(range_min, range_max, bucket_count)
|
||||
ELSE
|
||||
[]
|
||||
END
|
||||
AS arr
|
||||
)
|
||||
SELECT
|
||||
ARRAY_AGG(CAST(item AS INT64))
|
||||
FROM
|
||||
buckets
|
||||
CROSS JOIN
|
||||
UNNEST(arr) AS item
|
||||
)
|
||||
);
|
||||
|
||||
SELECT
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type AS client_agg_type,
|
||||
'histogram' AS agg_type,
|
||||
CAST(ROUND(SUM(record.value)) AS INT64) AS total_users,
|
||||
mozfun.glam.histogram_fill_buckets_dirichlet(
|
||||
mozfun.map.sum(ARRAY_AGG(record)),
|
||||
mozfun.glam.histogram_buckets_cast_string_array(
|
||||
udf_get_buckets(metric_type, range_min, range_max, bucket_count)
|
||||
),
|
||||
CAST(ROUND(SUM(record.value)) AS INT64)
|
||||
) AS aggregates
|
||||
FROM
|
||||
glam_etl.firefox_desktop_glam_nightly__histogram_bucket_counts_v1
|
||||
GROUP BY
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
range_min,
|
||||
range_max,
|
||||
bucket_count,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
client_agg_type,
|
||||
agg_type
|
|
@ -0,0 +1,36 @@
|
|||
-- query for firefox_desktop_glam_nightly__latest_versions_v1;
|
||||
WITH extracted AS (
|
||||
SELECT
|
||||
client_id,
|
||||
channel,
|
||||
app_version
|
||||
FROM
|
||||
glam_etl.firefox_desktop_glam_nightly__view_clients_daily_scalar_aggregates_v1
|
||||
WHERE
|
||||
submission_date
|
||||
BETWEEN DATE_SUB(@submission_date, INTERVAL 28 DAY)
|
||||
AND @submission_date
|
||||
AND channel IS NOT NULL
|
||||
),
|
||||
transformed AS (
|
||||
SELECT
|
||||
channel,
|
||||
app_version
|
||||
FROM
|
||||
extracted
|
||||
GROUP BY
|
||||
channel,
|
||||
app_version
|
||||
HAVING
|
||||
COUNT(DISTINCT client_id) > 5
|
||||
ORDER BY
|
||||
channel,
|
||||
app_version DESC
|
||||
)
|
||||
SELECT
|
||||
channel,
|
||||
MAX(app_version) AS latest_version
|
||||
FROM
|
||||
transformed
|
||||
GROUP BY
|
||||
channel
|
|
@ -0,0 +1,273 @@
|
|||
-- query for firefox_desktop_glam_nightly__scalar_bucket_counts_v1;
|
||||
CREATE TEMP FUNCTION udf_boolean_buckets(
|
||||
scalar_aggs ARRAY<
|
||||
STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>
|
||||
>
|
||||
)
|
||||
RETURNS ARRAY<
|
||||
STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, bucket STRING>
|
||||
> AS (
|
||||
(
|
||||
WITH boolean_columns AS (
|
||||
SELECT
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type,
|
||||
CASE
|
||||
agg_type
|
||||
WHEN
|
||||
'true'
|
||||
THEN
|
||||
value
|
||||
ELSE
|
||||
0
|
||||
END
|
||||
AS bool_true,
|
||||
CASE
|
||||
agg_type
|
||||
WHEN
|
||||
'false'
|
||||
THEN
|
||||
value
|
||||
ELSE
|
||||
0
|
||||
END
|
||||
AS bool_false
|
||||
FROM
|
||||
UNNEST(scalar_aggs)
|
||||
WHERE
|
||||
metric_type IN ("boolean")
|
||||
),
|
||||
summed_bools AS (
|
||||
SELECT
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
'' AS agg_type,
|
||||
SUM(bool_true) AS bool_true,
|
||||
SUM(bool_false) AS bool_false
|
||||
FROM
|
||||
boolean_columns
|
||||
GROUP BY
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4
|
||||
),
|
||||
booleans AS (
|
||||
SELECT
|
||||
* EXCEPT (bool_true, bool_false),
|
||||
CASE
|
||||
WHEN
|
||||
bool_true > 0
|
||||
AND bool_false > 0
|
||||
THEN
|
||||
"sometimes"
|
||||
WHEN
|
||||
bool_true > 0
|
||||
AND bool_false = 0
|
||||
THEN
|
||||
"always"
|
||||
WHEN
|
||||
bool_true = 0
|
||||
AND bool_false > 0
|
||||
THEN
|
||||
"never"
|
||||
END
|
||||
AS bucket
|
||||
FROM
|
||||
summed_bools
|
||||
WHERE
|
||||
bool_true > 0
|
||||
OR bool_false > 0
|
||||
)
|
||||
SELECT
|
||||
ARRAY_AGG((metric, metric_type, key, agg_type, bucket))
|
||||
FROM
|
||||
booleans
|
||||
)
|
||||
);
|
||||
|
||||
WITH
|
||||
-- Cross join with the attribute combinations to reduce the query complexity
|
||||
-- with respect to the number of operations. A table with n rows cross joined
|
||||
-- with a combination of m attributes will generate a new table with n*m rows.
|
||||
-- The glob ("*") symbol can be understood as selecting all of values belonging
|
||||
-- to that group.
|
||||
static_combos AS (
|
||||
SELECT
|
||||
combos.*
|
||||
FROM
|
||||
UNNEST(
|
||||
ARRAY<STRUCT<ping_type STRING, os STRING, app_build_id STRING>>[
|
||||
(NULL, NULL, NULL),
|
||||
(NULL, NULL, "*"),
|
||||
(NULL, "*", NULL),
|
||||
("*", NULL, NULL),
|
||||
(NULL, "*", "*"),
|
||||
("*", NULL, "*"),
|
||||
("*", "*", NULL),
|
||||
("*", "*", "*")
|
||||
]
|
||||
) AS combos
|
||||
),
|
||||
all_combos AS (
|
||||
SELECT
|
||||
table.* EXCEPT (ping_type, os, app_build_id),
|
||||
COALESCE(combo.ping_type, table.ping_type) AS ping_type,
|
||||
COALESCE(combo.os, table.os) AS os,
|
||||
COALESCE(combo.app_build_id, table.app_build_id) AS app_build_id
|
||||
FROM
|
||||
glam_etl.firefox_desktop_glam_nightly__clients_scalar_aggregates_v1 table
|
||||
CROSS JOIN
|
||||
static_combos combo
|
||||
),
|
||||
bucketed_booleans AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
NULL AS range_min,
|
||||
NULL AS range_max,
|
||||
NULL AS bucket_count,
|
||||
udf_boolean_buckets(scalar_aggregates) AS scalar_aggregates,
|
||||
FROM
|
||||
all_combos
|
||||
),
|
||||
log_min_max AS (
|
||||
SELECT
|
||||
metric,
|
||||
key,
|
||||
LOG(IF(MIN(value) <= 0, 1, MIN(value)), 2) AS range_min,
|
||||
LOG(IF(MAX(value) <= 0, 1, MAX(value)), 2) AS range_max,
|
||||
100 AS bucket_count
|
||||
FROM
|
||||
all_combos
|
||||
CROSS JOIN
|
||||
UNNEST(scalar_aggregates)
|
||||
WHERE
|
||||
metric_type <> "boolean"
|
||||
GROUP BY
|
||||
1,
|
||||
2
|
||||
),
|
||||
buckets_by_metric AS (
|
||||
SELECT
|
||||
*,
|
||||
ARRAY(
|
||||
SELECT
|
||||
FORMAT("%.*f", 2, bucket)
|
||||
FROM
|
||||
UNNEST(
|
||||
mozfun.glam.histogram_generate_scalar_buckets(range_min, range_max, bucket_count)
|
||||
) AS bucket
|
||||
) AS buckets
|
||||
FROM
|
||||
log_min_max
|
||||
),
|
||||
bucketed_scalars AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type,
|
||||
range_min,
|
||||
range_max,
|
||||
bucket_count,
|
||||
-- Keep two decimal places before converting bucket to a string
|
||||
SAFE_CAST(
|
||||
FORMAT("%.*f", 2, mozfun.glam.histogram_bucket_from_value(buckets, value) + 0.0001) AS STRING
|
||||
) AS bucket
|
||||
FROM
|
||||
all_combos
|
||||
CROSS JOIN
|
||||
UNNEST(scalar_aggregates)
|
||||
LEFT JOIN
|
||||
buckets_by_metric
|
||||
USING
|
||||
(metric, key)
|
||||
WHERE
|
||||
metric_type IN ("counter", "quantity", "labeled_counter", "timespan")
|
||||
),
|
||||
booleans_and_scalars AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type,
|
||||
range_min,
|
||||
range_max,
|
||||
bucket_count,
|
||||
bucket
|
||||
FROM
|
||||
bucketed_booleans
|
||||
CROSS JOIN
|
||||
UNNEST(scalar_aggregates)
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type,
|
||||
range_min,
|
||||
range_max,
|
||||
bucket_count,
|
||||
bucket
|
||||
FROM
|
||||
bucketed_scalars
|
||||
)
|
||||
SELECT
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type AS client_agg_type,
|
||||
'histogram' AS agg_type,
|
||||
range_min,
|
||||
range_max,
|
||||
bucket_count,
|
||||
bucket,
|
||||
-- we could rely on count(*) because there is one row per client and bucket
|
||||
COUNT(DISTINCT client_id) AS count
|
||||
FROM
|
||||
booleans_and_scalars
|
||||
GROUP BY
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
client_agg_type,
|
||||
range_min,
|
||||
range_max,
|
||||
bucket_count,
|
||||
bucket
|
|
@ -0,0 +1,78 @@
|
|||
-- query for firefox_desktop_glam_nightly__scalar_percentiles_v1;
|
||||
WITH flat_clients_scalar_aggregates AS (
|
||||
SELECT
|
||||
* EXCEPT (scalar_aggregates)
|
||||
FROM
|
||||
glam_etl.firefox_desktop_glam_nightly__clients_scalar_aggregates_v1
|
||||
CROSS JOIN
|
||||
UNNEST(scalar_aggregates)
|
||||
),
|
||||
-- Cross join with the attribute combinations to reduce the query complexity
|
||||
-- with respect to the number of operations. A table with n rows cross joined
|
||||
-- with a combination of m attributes will generate a new table with n*m rows.
|
||||
-- The glob ("*") symbol can be understood as selecting all of values belonging
|
||||
-- to that group.
|
||||
static_combos AS (
|
||||
SELECT
|
||||
combos.*
|
||||
FROM
|
||||
UNNEST(
|
||||
ARRAY<STRUCT<ping_type STRING, os STRING, app_build_id STRING>>[
|
||||
(NULL, NULL, NULL),
|
||||
(NULL, NULL, "*"),
|
||||
(NULL, "*", NULL),
|
||||
("*", NULL, NULL),
|
||||
(NULL, "*", "*"),
|
||||
("*", NULL, "*"),
|
||||
("*", "*", NULL),
|
||||
("*", "*", "*")
|
||||
]
|
||||
) AS combos
|
||||
),
|
||||
all_combos AS (
|
||||
SELECT
|
||||
table.* EXCEPT (ping_type, os, app_build_id),
|
||||
COALESCE(combo.ping_type, table.ping_type) AS ping_type,
|
||||
COALESCE(combo.os, table.os) AS os,
|
||||
COALESCE(combo.app_build_id, table.app_build_id) AS app_build_id
|
||||
FROM
|
||||
flat_clients_scalar_aggregates table
|
||||
CROSS JOIN
|
||||
static_combos combo
|
||||
),
|
||||
percentiles AS (
|
||||
SELECT
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type AS client_agg_type,
|
||||
'percentiles' AS agg_type,
|
||||
COUNT(DISTINCT(client_id)) AS total_users,
|
||||
APPROX_QUANTILES(value, 1000) AS aggregates
|
||||
FROM
|
||||
all_combos
|
||||
GROUP BY
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
client_agg_type
|
||||
)
|
||||
SELECT
|
||||
* REPLACE (
|
||||
mozfun.glam.map_from_array_offsets_precise(
|
||||
[5.0, 25.0, 50.0, 75.0, 95.0, 99.0, 99.9],
|
||||
aggregates
|
||||
) AS aggregates
|
||||
)
|
||||
FROM
|
||||
percentiles
|
|
@ -0,0 +1,53 @@
|
|||
-- query for firefox_desktop_glam_nightly__scalar_probe_counts_v1;
|
||||
SELECT
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
client_agg_type,
|
||||
agg_type,
|
||||
SUM(count) AS total_users,
|
||||
mozfun.glam.histogram_fill_buckets_dirichlet(
|
||||
mozfun.map.sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
|
||||
CASE
|
||||
WHEN
|
||||
metric_type IN ("counter", "quantity", "labeled_counter", "timespan")
|
||||
THEN
|
||||
ARRAY(
|
||||
SELECT
|
||||
FORMAT("%.*f", 2, bucket)
|
||||
FROM
|
||||
UNNEST(
|
||||
mozfun.glam.histogram_generate_scalar_buckets(range_min, range_max, bucket_count)
|
||||
) AS bucket
|
||||
ORDER BY
|
||||
bucket
|
||||
)
|
||||
WHEN
|
||||
metric_type IN ("boolean")
|
||||
THEN
|
||||
['always', 'never', 'sometimes']
|
||||
END
|
||||
,
|
||||
SUM(count)
|
||||
) AS aggregates
|
||||
FROM
|
||||
glam_etl.firefox_desktop_glam_nightly__scalar_bucket_counts_v1
|
||||
GROUP BY
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
range_min,
|
||||
range_max,
|
||||
bucket_count,
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
client_agg_type,
|
||||
agg_type
|
|
@ -0,0 +1,17 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`glam-fenix-dev`.glam_etl.firefox_desktop_glam_nightly__view_clients_daily_histogram_aggregates_v1
|
||||
AS
|
||||
WITH extracted AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`glam-fenix-dev`.glam_etl.firefox_desktop__view_clients_daily_histogram_aggregates_v1
|
||||
WHERE
|
||||
channel = 'nightly'
|
||||
)
|
||||
SELECT
|
||||
* EXCEPT (app_build_id, channel),
|
||||
`mozfun.glam.build_seconds_to_hour`(app_build_id) AS app_build_id,
|
||||
"*" AS channel
|
||||
FROM
|
||||
extracted
|
|
@ -0,0 +1,17 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`glam-fenix-dev`.glam_etl.firefox_desktop_glam_nightly__view_clients_daily_scalar_aggregates_v1
|
||||
AS
|
||||
WITH extracted AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`glam-fenix-dev`.glam_etl.firefox_desktop__view_clients_daily_scalar_aggregates_v1
|
||||
WHERE
|
||||
channel = 'nightly'
|
||||
)
|
||||
SELECT
|
||||
* EXCEPT (app_build_id, channel),
|
||||
`mozfun.glam.build_seconds_to_hour`(app_build_id) AS app_build_id,
|
||||
"*" AS channel
|
||||
FROM
|
||||
extracted
|
|
@ -0,0 +1,29 @@
|
|||
-- view for firefox_desktop_glam_nightly__view_probe_counts_v1;
|
||||
CREATE OR REPLACE VIEW
|
||||
`glam-fenix-dev.glam_etl.firefox_desktop_glam_nightly__view_probe_counts_v1`
|
||||
AS
|
||||
WITH all_counts AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`glam-fenix-dev.glam_etl.firefox_desktop_glam_nightly__scalar_probe_counts_v1`
|
||||
UNION ALL
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`glam-fenix-dev.glam_etl.firefox_desktop_glam_nightly__histogram_probe_counts_v1`
|
||||
UNION ALL
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`glam-fenix-dev.glam_etl.firefox_desktop_glam_nightly__scalar_percentiles_v1`
|
||||
UNION ALL
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`glam-fenix-dev.glam_etl.firefox_desktop_glam_nightly__histogram_percentiles_v1`
|
||||
)
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
all_counts
|
|
@ -0,0 +1,71 @@
|
|||
-- view for firefox_desktop_glam_nightly__view_sample_counts_v1;
|
||||
CREATE OR REPLACE VIEW
|
||||
`glam-fenix-dev.glam_etl.firefox_desktop_glam_nightly__view_sample_counts_v1`
|
||||
AS
|
||||
WITH all_clients AS (
|
||||
SELECT
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
key,
|
||||
metric,
|
||||
value
|
||||
FROM
|
||||
`glam-fenix-dev`.glam_etl.firefox_desktop_glam_nightly__clients_histogram_aggregates_v1,
|
||||
UNNEST(histogram_aggregates) h1
|
||||
),
|
||||
-- Cross join with the attribute combinations to reduce the query complexity
|
||||
-- with respect to the number of operations. A table with n rows cross joined
|
||||
-- with a combination of m attributes will generate a new table with n*m rows.
|
||||
-- The glob ("*") symbol can be understood as selecting all of values belonging
|
||||
-- to that group.
|
||||
static_combos AS (
|
||||
SELECT
|
||||
combos.*
|
||||
FROM
|
||||
UNNEST(
|
||||
ARRAY<STRUCT<ping_type STRING, os STRING, app_build_id STRING>>[
|
||||
(NULL, NULL, NULL),
|
||||
(NULL, NULL, "*"),
|
||||
(NULL, "*", NULL),
|
||||
("*", NULL, NULL),
|
||||
(NULL, "*", "*"),
|
||||
("*", NULL, "*"),
|
||||
("*", "*", NULL),
|
||||
("*", "*", "*")
|
||||
]
|
||||
) AS combos
|
||||
),
|
||||
all_combos AS (
|
||||
SELECT
|
||||
table.* EXCEPT (ping_type, os, app_build_id),
|
||||
COALESCE(combo.ping_type, table.ping_type) AS ping_type,
|
||||
COALESCE(combo.os, table.os) AS os,
|
||||
COALESCE(combo.app_build_id, table.app_build_id) AS app_build_id
|
||||
FROM
|
||||
all_clients table
|
||||
CROSS JOIN
|
||||
static_combos combo
|
||||
)
|
||||
SELECT
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
all_combos.key,
|
||||
metric,
|
||||
SUM(v1.value) AS total_sample
|
||||
FROM
|
||||
all_combos,
|
||||
UNNEST(value) AS v1
|
||||
GROUP BY
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
key,
|
||||
metric
|
|
@ -0,0 +1,73 @@
|
|||
-- view for firefox_desktop_glam_nightly__view_user_counts_v1;
|
||||
CREATE OR REPLACE VIEW
|
||||
`glam-fenix-dev.glam_etl.firefox_desktop_glam_nightly__view_user_counts_v1`
|
||||
AS
|
||||
WITH all_clients AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel
|
||||
FROM
|
||||
`glam-fenix-dev`.glam_etl.firefox_desktop_glam_nightly__clients_scalar_aggregates_v1
|
||||
UNION ALL
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel
|
||||
FROM
|
||||
`glam-fenix-dev`.glam_etl.firefox_desktop_glam_nightly__clients_histogram_aggregates_v1
|
||||
),
|
||||
-- Cross join with the attribute combinations to reduce the query complexity
|
||||
-- with respect to the number of operations. A table with n rows cross joined
|
||||
-- with a combination of m attributes will generate a new table with n*m rows.
|
||||
-- The glob ("*") symbol can be understood as selecting all of values belonging
|
||||
-- to that group.
|
||||
static_combos AS (
|
||||
SELECT
|
||||
combos.*
|
||||
FROM
|
||||
UNNEST(
|
||||
ARRAY<STRUCT<ping_type STRING, os STRING, app_build_id STRING>>[
|
||||
(NULL, NULL, NULL),
|
||||
(NULL, NULL, "*"),
|
||||
(NULL, "*", NULL),
|
||||
("*", NULL, NULL),
|
||||
(NULL, "*", "*"),
|
||||
("*", NULL, "*"),
|
||||
("*", "*", NULL),
|
||||
("*", "*", "*")
|
||||
]
|
||||
) AS combos
|
||||
),
|
||||
all_combos AS (
|
||||
SELECT
|
||||
table.* EXCEPT (ping_type, os, app_build_id),
|
||||
COALESCE(combo.ping_type, table.ping_type) AS ping_type,
|
||||
COALESCE(combo.os, table.os) AS os,
|
||||
COALESCE(combo.app_build_id, table.app_build_id) AS app_build_id
|
||||
FROM
|
||||
all_clients table
|
||||
CROSS JOIN
|
||||
static_combos combo
|
||||
)
|
||||
SELECT
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
COUNT(DISTINCT client_id) AS total_users
|
||||
FROM
|
||||
all_combos
|
||||
GROUP BY
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel
|
Загрузка…
Ссылка в новой задаче