This commit is contained in:
Anna Scholtz 2020-02-03 14:49:52 -08:00
Родитель 97b5386b41
Коммит 867543a660
18 изменённых файлов: 143 добавлений и 158 удалений

Просмотреть файл

@ -25,20 +25,6 @@ jobs:
paths:
- venv/
key: python-packages-v1-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "constraints.txt" }}
verify-generated-sql:
docker:
- image: python:3.8
steps:
- checkout
- *build
- run:
name: Verify that all of generated SQL is committed
command: |
rm -rf sql/
venv/bin/python script/generate_sql
echo "Inconsistencies between templates and generated SQL:"
git ls-files --other --modified -x sql/*
test `git ls-files --other --modified -x sql/* | wc -l` = 0
verify-format-sql:
docker:
- image: python:3.8
@ -85,7 +71,7 @@ workflows:
jobs:
- build:
context: data-eng-circleci-tests
- verify-generated-sql
- publish-persistent-udfs
- verify-format-sql
- dry-run-sql
- deploy:
@ -93,7 +79,6 @@ workflows:
requires:
# can't run in parallel because CIRCLE_BUILD_NUM is same
- build
- verify-generated-sql
filters:
branches:
only: master

Просмотреть файл

@ -11,7 +11,7 @@ SELECT
FROM
`moz-fx-data-shared-prod.telemetry_stable.*`,
UNNEST(
`moz-fx-data-shared-prod`.udf.js.json_extract_missing_cols(
`moz-fx-data-shared-prod`.udf_js.json_extract_missing_cols(
additional_properties,
["histogram_type"],
["activeAddons", "userPrefs", "activeGMPlugins", "simpleMeasurements"]

Просмотреть файл

@ -36,7 +36,7 @@ transformed AS (
FROM
extracted,
UNNEST(
`moz-fx-data-shared-prod`.udf.js.json_extract_missing_cols(
`moz-fx-data-shared-prod`.udf_js.json_extract_missing_cols(
additional_properties,
[],
-- Manually curated list of known missing sections. The process to

Просмотреть файл

@ -132,9 +132,9 @@ WITH
datasource,
type,
submission_date,
udf.js_jackknife_sum_ci(20, ARRAY_AGG(mau)) AS mau_ci,
udf.js_jackknife_sum_ci(20, ARRAY_AGG(wau)) AS wau_ci,
udf.js_jackknife_sum_ci(20, ARRAY_AGG(dau)) AS dau_ci
udf_js.jackknife_sum_ci(20, ARRAY_AGG(mau)) AS mau_ci,
udf_js.jackknife_sum_ci(20, ARRAY_AGG(wau)) AS wau_ci,
udf_js.jackknife_sum_ci(20, ARRAY_AGG(dau)) AS dau_ci
FROM
per_bucket
GROUP BY

Просмотреть файл

@ -2,7 +2,7 @@
-- telemetry data accepts countries as two-digit codes, but FxA
-- data includes long-form country names. The logic here is specific
-- to the FxA data.
CREATE TEMP FUNCTION udf.contains_tier1_country(x ANY TYPE) AS (
CREATE TEMP FUNCTION udf_contains_tier1_country(x ANY TYPE) AS (
EXISTS(
SELECT
country
@ -20,7 +20,7 @@ CREATE TEMP FUNCTION udf.contains_tier1_country(x ANY TYPE) AS (
);
-- This UDF is also only applicable in the context of this query.
CREATE TEMP FUNCTION udf.contains_registration(x ANY TYPE) AS (
CREATE TEMP FUNCTION udf_contains_registration(x ANY TYPE) AS (
EXISTS(
SELECT
event_type
@ -43,8 +43,8 @@ WITH windowed AS (
udf.mode_last(ARRAY_AGG(app_version) OVER w1) AS app_version,
udf.mode_last(ARRAY_AGG(os_name) OVER w1) AS os_name,
udf.mode_last(ARRAY_AGG(os_version) OVER w1) AS os_version,
udf.contains_tier1_country(ARRAY_AGG(country) OVER w1) AS seen_in_tier1_country,
udf.contains_registration(ARRAY_AGG(event_type) OVER w1) AS registered,
udf_contains_tier1_country(ARRAY_AGG(country) OVER w1) AS seen_in_tier1_country,
udf_contains_registration(ARRAY_AGG(event_type) OVER w1) AS registered,
COUNTIF(
NOT (event_type = 'fxa_rp - engage' AND service = 'fx-monitor')
) OVER w1 = 0 AS monitor_only

Просмотреть файл

@ -22,7 +22,7 @@ LANGUAGE js AS
''';
CREATE TEMP FUNCTION udf.get_bucket_range(histograms ARRAY<STRING>) AS ((
CREATE TEMP FUNCTION udf_get_bucket_range(histograms ARRAY<STRING>) AS ((
WITH buckets AS (
SELECT
string_to_arr(JSON_EXTRACT(histogram, "$.range")) AS bucket_range,
@ -42,7 +42,7 @@ CREATE TEMP FUNCTION udf.get_bucket_range(histograms ARRAY<STRING>) AS ((
buckets));
CREATE TEMP FUNCTION udf.get_histogram_type(histograms ARRAY<STRING>) AS ((
CREATE TEMP FUNCTION udf_get_histogram_type(histograms ARRAY<STRING>) AS ((
SELECT
CASE SAFE_CAST(JSON_EXTRACT(histogram, "$.histogram_type") AS INT64)
WHEN 0 THEN 'histogram-exponential'
@ -59,7 +59,7 @@ CREATE TEMP FUNCTION udf.get_histogram_type(histograms ARRAY<STRING>) AS ((
));
CREATE TEMP FUNCTION
udf.aggregate_json_sum(histograms ARRAY<STRING>) AS (ARRAY(
udf_aggregate_json_sum(histograms ARRAY<STRING>) AS (ARRAY(
SELECT
AS STRUCT SPLIT(keyval, ':')[OFFSET(0)] AS key,
SUM(SAFE_CAST(SPLIT(keyval, ':')[OFFSET(1)] AS INT64)) AS value
@ -4923,12 +4923,12 @@ SELECT
bucket_range STRUCT<first_bucket INT64, last_bucket INT64, num_buckets INT64>,
value ARRAY<STRUCT<key STRING, value INT64>>
> (metric,
udf.get_histogram_type(value),
udf_get_histogram_type(value),
'',
process,
'summed_histogram',
udf.get_bucket_range(value),
udf.aggregate_json_sum(value))) AS histogram_aggregates
udf_get_bucket_range(value),
udf_aggregate_json_sum(value))) AS histogram_aggregates
FROM aggregated
GROUP BY
1, 2, 3, 4, 5, 6

Просмотреть файл

@ -22,7 +22,7 @@ LANGUAGE js AS
''';
CREATE TEMP FUNCTION udf.get_bucket_range(histograms ARRAY<STRING>) AS ((
CREATE TEMP FUNCTION udf_get_bucket_range(histograms ARRAY<STRING>) AS ((
WITH buckets AS (
SELECT
string_to_arr(JSON_EXTRACT(histogram, "$.range")) AS bucket_range,
@ -42,7 +42,7 @@ CREATE TEMP FUNCTION udf.get_bucket_range(histograms ARRAY<STRING>) AS ((
buckets));
CREATE TEMP FUNCTION udf.get_histogram_type(histograms ARRAY<STRING>) AS ((
CREATE TEMP FUNCTION udf_get_histogram_type(histograms ARRAY<STRING>) AS ((
SELECT
CASE SAFE_CAST(JSON_EXTRACT(histogram, "$.histogram_type") AS INT64)
WHEN 0 THEN 'histogram-exponential'
@ -59,7 +59,7 @@ CREATE TEMP FUNCTION udf.get_histogram_type(histograms ARRAY<STRING>) AS ((
));
CREATE TEMP FUNCTION
udf.aggregate_json_sum(histograms ARRAY<STRING>) AS (ARRAY(
udf_aggregate_json_sum(histograms ARRAY<STRING>) AS (ARRAY(
SELECT
AS STRUCT SPLIT(keyval, ':')[OFFSET(0)] AS key,
SUM(SAFE_CAST(SPLIT(keyval, ':')[OFFSET(1)] AS INT64)) AS value
@ -465,12 +465,12 @@ SELECT
value ARRAY<STRUCT<key STRING, value INT64>>
>(
metric,
udf.get_histogram_type(bucket_range),
udf_get_histogram_type(bucket_range),
key,
process,
'',
udf.get_bucket_range(bucket_range),
udf.aggregate_json_sum(value)
udf_get_bucket_range(bucket_range),
udf_aggregate_json_sum(value)
)) AS histogram_aggregates
FROM aggregated
GROUP BY

Просмотреть файл

@ -1,4 +1,4 @@
CREATE TEMP FUNCTION udf.merged_user_data(old_aggs ANY TYPE, new_aggs ANY TYPE)
CREATE TEMP FUNCTION udf_merged_user_data(old_aggs ANY TYPE, new_aggs ANY TYPE)
RETURNS ARRAY<STRUCT<
first_bucket INT64,
last_bucket INT64,
@ -216,5 +216,5 @@ SELECT
app_version,
app_build_id,
channel,
udf.merged_user_data(old_aggs, new_aggs) AS histogram_aggregates
udf_merged_user_data(old_aggs, new_aggs) AS histogram_aggregates
FROM joined_new_old

Просмотреть файл

@ -1,4 +1,4 @@
CREATE TEMP FUNCTION udf.normalized_sum (arrs ARRAY<STRUCT<key STRING, value INT64>>)
CREATE TEMP FUNCTION udf_normalized_sum (arrs ARRAY<STRUCT<key STRING, value INT64>>)
RETURNS ARRAY<STRUCT<key STRING, value FLOAT64>> AS (
-- Returns the normalized sum of the input maps.
-- It returns the total_count[k] / SUM(total_count)
@ -40,7 +40,7 @@ RETURNS ARRAY<STRUCT<key STRING, value FLOAT64>> AS (
)
);
CREATE TEMP FUNCTION udf.normalize_histograms (
CREATE TEMP FUNCTION udf_normalize_histograms (
arrs ARRAY<STRUCT<
first_bucket INT64,
last_bucket INT64,
@ -75,7 +75,7 @@ RETURNS ARRAY<STRUCT<
key,
process,
agg_type,
udf.normalized_sum(aggregates) AS aggregates
udf_normalized_sum(aggregates) AS aggregates
FROM UNNEST(arrs))
SELECT ARRAY_AGG((first_bucket, last_bucket, num_buckets, latest_version, metric, metric_type, key, process, agg_type, aggregates))
@ -89,7 +89,7 @@ WITH normalized_histograms AS (
app_version,
app_build_id,
channel,
udf.normalize_histograms(histogram_aggregates) AS histogram_aggregates
udf_normalize_histograms(histogram_aggregates) AS histogram_aggregates
FROM clients_histogram_aggregates_v1),
unnested AS (

Просмотреть файл

@ -1,4 +1,4 @@
CREATE TEMP FUNCTION udf.exponential_buckets(min FLOAT64, max FLOAT64, nBuckets FLOAT64)
CREATE TEMP FUNCTION udf_exponential_buckets(min FLOAT64, max FLOAT64, nBuckets FLOAT64)
RETURNS ARRAY<FLOAT64>
LANGUAGE js AS
'''
@ -19,7 +19,7 @@ LANGUAGE js AS
return retArray
''';
CREATE TEMP FUNCTION udf.linear_buckets(min FLOAT64, max FLOAT64, nBuckets FLOAT64)
CREATE TEMP FUNCTION udf_linear_buckets(min FLOAT64, max FLOAT64, nBuckets FLOAT64)
RETURNS ARRAY<FLOAT64>
LANGUAGE js AS
'''
@ -31,7 +31,7 @@ LANGUAGE js AS
return result;
''';
CREATE TEMP FUNCTION udf.to_string_arr(buckets ARRAY<INT64>)
CREATE TEMP FUNCTION udf_to_string_arr(buckets ARRAY<INT64>)
RETURNS ARRAY<STRING> AS (
(
SELECT ARRAY_AGG(CAST(bucket AS STRING))
@ -40,15 +40,15 @@ RETURNS ARRAY<STRING> AS (
);
CREATE TEMP FUNCTION udf.get_buckets(min INT64, max INT64, num INT64, metric_type STRING)
CREATE TEMP FUNCTION udf_get_buckets(min INT64, max INT64, num INT64, metric_type STRING)
RETURNS ARRAY<INT64> AS (
(
WITH buckets AS (
SELECT
CASE
WHEN metric_type = 'histogram-exponential'
THEN udf.exponential_buckets(min, max, num)
ELSE udf.linear_buckets(min, max, num)
THEN udf_exponential_buckets(min, max, num)
ELSE udf_linear_buckets(min, max, num)
END AS arr
)
@ -58,7 +58,7 @@ RETURNS ARRAY<INT64> AS (
)
);
CREATE TEMP FUNCTION udf.dedupe_map_sum (map ARRAY<STRUCT<key STRING, value FLOAT64>>)
CREATE TEMP FUNCTION udf_dedupe_map_sum (map ARRAY<STRUCT<key STRING, value FLOAT64>>)
RETURNS ARRAY<STRUCT<key STRING, value FLOAT64>> AS (
-- Given a MAP with duplicate keys, de-duplicates by summing the values of duplicate keys
(
@ -78,7 +78,7 @@ RETURNS ARRAY<STRUCT<key STRING, value FLOAT64>> AS (
)
);
CREATE TEMP FUNCTION udf.buckets_to_map (buckets ARRAY<STRING>)
CREATE TEMP FUNCTION udf_buckets_to_map (buckets ARRAY<STRING>)
RETURNS ARRAY<STRUCT<key STRING, value FLOAT64>> AS (
-- Given an array of values, transform them into a histogram MAP
-- with the number of each key in the `buckets` array
@ -90,7 +90,7 @@ RETURNS ARRAY<STRUCT<key STRING, value FLOAT64>> AS (
)
);
CREATE TEMP FUNCTION udf.fill_buckets(input_map ARRAY<STRUCT<key STRING, value FLOAT64>>, buckets ARRAY<STRING>)
CREATE TEMP FUNCTION udf_fill_buckets(input_map ARRAY<STRUCT<key STRING, value FLOAT64>>, buckets ARRAY<STRING>)
RETURNS ARRAY<STRUCT<key STRING, value FLOAT64>> AS (
-- Given a MAP `input_map`, fill in any missing keys with value `0.0`
(
@ -123,9 +123,9 @@ SELECT
agg_type AS client_agg_type,
'histogram' AS agg_type,
CAST(ROUND(SUM(record.value)) AS INT64) AS total_users,
udf.fill_buckets(udf_dedupe_map_sum(
udf_fill_buckets(udf_dedupe_map_sum(
ARRAY_AGG(record)
), udf.to_string_arr(udf_get_buckets(first_bucket, last_bucket, num_buckets, metric_type))) AS aggregates
), udf_to_string_arr(udf_get_buckets(first_bucket, last_bucket, num_buckets, metric_type))) AS aggregates
FROM clients_histogram_bucket_counts_v1
WHERE first_bucket IS NOT NULL
AND os IS NOT NULL
@ -157,9 +157,9 @@ SELECT
agg_type AS client_agg_type,
'histogram' AS agg_type,
CAST(ROUND(SUM(record.value)) AS INT64) AS total_users,
udf.fill_buckets(udf_dedupe_map_sum(
udf_fill_buckets(udf_dedupe_map_sum(
ARRAY_AGG(record)
), udf.to_string_arr(udf_get_buckets(first_bucket, last_bucket, num_buckets, metric_type))) AS aggregates
), udf_to_string_arr(udf_get_buckets(first_bucket, last_bucket, num_buckets, metric_type))) AS aggregates
FROM clients_histogram_bucket_counts_v1
WHERE first_bucket IS NOT NULL
GROUP BY
@ -189,9 +189,9 @@ SELECT
agg_type AS client_agg_type,
'histogram' AS agg_type,
CAST(ROUND(SUM(record.value)) AS INT64) AS total_users,
udf.fill_buckets(udf_dedupe_map_sum(
udf_fill_buckets(udf_dedupe_map_sum(
ARRAY_AGG(record)
), udf.to_string_arr(udf_get_buckets(first_bucket, last_bucket, num_buckets, metric_type))) AS aggregates
), udf_to_string_arr(udf_get_buckets(first_bucket, last_bucket, num_buckets, metric_type))) AS aggregates
FROM clients_histogram_bucket_counts_v1
WHERE first_bucket IS NOT NULL
AND os IS NOT NULL
@ -222,9 +222,9 @@ SELECT
agg_type AS client_agg_type,
'histogram' AS agg_type,
CAST(ROUND(SUM(record.value)) AS INT64) AS total_users,
udf.fill_buckets(udf_dedupe_map_sum(
udf_fill_buckets(udf_dedupe_map_sum(
ARRAY_AGG(record)
), udf.to_string_arr(udf_get_buckets(first_bucket, last_bucket, num_buckets, metric_type))) AS aggregates
), udf_to_string_arr(udf_get_buckets(first_bucket, last_bucket, num_buckets, metric_type))) AS aggregates
FROM clients_histogram_bucket_counts_v1
WHERE first_bucket IS NOT NULL
AND os IS NOT NULL
@ -255,9 +255,9 @@ SELECT
agg_type AS client_agg_type,
'histogram' AS agg_type,
CAST(ROUND(SUM(record.value)) AS INT64) AS total_users,
udf.fill_buckets(udf_dedupe_map_sum(
udf_fill_buckets(udf_dedupe_map_sum(
ARRAY_AGG(record)
), udf.to_string_arr(udf_get_buckets(first_bucket, last_bucket, num_buckets, metric_type))) AS aggregates
), udf_to_string_arr(udf_get_buckets(first_bucket, last_bucket, num_buckets, metric_type))) AS aggregates
FROM clients_histogram_bucket_counts_v1
WHERE first_bucket IS NOT NULL
AND os IS NOT NULL
@ -287,9 +287,9 @@ SELECT
agg_type AS client_agg_type,
'histogram' AS agg_type,
CAST(ROUND(SUM(record.value)) AS INT64) AS total_users,
udf.fill_buckets(udf_dedupe_map_sum(
udf_fill_buckets(udf_dedupe_map_sum(
ARRAY_AGG(record)
), udf.to_string_arr(udf_get_buckets(first_bucket, last_bucket, num_buckets, metric_type))) AS aggregates
), udf_to_string_arr(udf_get_buckets(first_bucket, last_bucket, num_buckets, metric_type))) AS aggregates
FROM clients_histogram_bucket_counts_v1
WHERE first_bucket IS NOT NULL
GROUP BY
@ -318,9 +318,9 @@ SELECT
agg_type AS client_agg_type,
'histogram' AS agg_type,
CAST(ROUND(SUM(record.value)) AS INT64) AS total_users,
udf.fill_buckets(udf_dedupe_map_sum(
udf_fill_buckets(udf_dedupe_map_sum(
ARRAY_AGG(record)
), udf.to_string_arr(udf_get_buckets(first_bucket, last_bucket, num_buckets, metric_type))) AS aggregates
), udf_to_string_arr(udf_get_buckets(first_bucket, last_bucket, num_buckets, metric_type))) AS aggregates
FROM clients_histogram_bucket_counts_v1
WHERE first_bucket IS NOT NULL
GROUP BY
@ -348,9 +348,9 @@ SELECT
agg_type AS client_agg_type,
'histogram' AS agg_type,
CAST(ROUND(SUM(record.value)) AS INT64) AS total_users,
udf.fill_buckets(udf_dedupe_map_sum(
udf_fill_buckets(udf_dedupe_map_sum(
ARRAY_AGG(record)
), udf.to_string_arr(udf_get_buckets(first_bucket, last_bucket, num_buckets, metric_type))) AS aggregates
), udf_to_string_arr(udf_get_buckets(first_bucket, last_bucket, num_buckets, metric_type))) AS aggregates
FROM clients_histogram_bucket_counts_v1
WHERE first_bucket IS NOT NULL
AND os IS NOT NULL
@ -379,9 +379,9 @@ SELECT
agg_type AS client_agg_type,
'histogram' AS agg_type,
CAST(ROUND(SUM(record.value)) AS INT64) AS total_users,
udf.fill_buckets(udf_dedupe_map_sum(
udf_fill_buckets(udf_dedupe_map_sum(
ARRAY_AGG(record)
), udf.to_string_arr(udf_get_buckets(first_bucket, last_bucket, num_buckets, metric_type))) AS aggregates
), udf_to_string_arr(udf_get_buckets(first_bucket, last_bucket, num_buckets, metric_type))) AS aggregates
FROM clients_histogram_bucket_counts_v1
WHERE first_bucket IS NOT NULL
GROUP BY
@ -409,9 +409,9 @@ SELECT
agg_type AS client_agg_type,
'histogram' AS agg_type,
CAST(ROUND(SUM(record.value)) AS INT64) AS total_users,
udf.fill_buckets(udf_dedupe_map_sum(
udf_fill_buckets(udf_dedupe_map_sum(
ARRAY_AGG(record)
), udf.to_string_arr(udf_get_buckets(first_bucket, last_bucket, num_buckets, metric_type))) AS aggregates
), udf_to_string_arr(udf_get_buckets(first_bucket, last_bucket, num_buckets, metric_type))) AS aggregates
FROM clients_histogram_bucket_counts_v1
WHERE first_bucket IS NOT NULL
GROUP BY

Просмотреть файл

@ -1,4 +1,4 @@
CREATE TEMP FUNCTION udf.merged_user_data(
CREATE TEMP FUNCTION udf_merged_user_data(
old_aggs ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, process STRING, agg_type STRING, value FLOAT64>>,
new_aggs ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, process STRING, agg_type STRING, value FLOAT64>>)
@ -215,5 +215,5 @@ SELECT
app_version,
app_build_id,
channel,
udf.merged_user_data(old_aggs, new_aggs) AS scalar_aggregates
udf_merged_user_data(old_aggs, new_aggs) AS scalar_aggregates
FROM joined_new_old

Просмотреть файл

@ -1,4 +1,4 @@
CREATE TEMP FUNCTION udf.bucket (
CREATE TEMP FUNCTION udf_bucket (
val FLOAT64
)
RETURNS FLOAT64 AS (
@ -10,7 +10,7 @@ RETURNS FLOAT64 AS (
)
);
CREATE TEMP FUNCTION udf.boolean_buckets(
CREATE TEMP FUNCTION udf_boolean_buckets(
scalar_aggs ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, process STRING, agg_type STRING, value FLOAT64>>)
RETURNS ARRAY<STRUCT<metric STRING,
metric_type STRING,
@ -72,7 +72,7 @@ WITH bucketed_booleans AS (
app_version,
app_build_id,
channel,
udf.boolean_buckets(scalar_aggregates) AS scalar_aggregates
udf_boolean_buckets(scalar_aggregates) AS scalar_aggregates
FROM
clients_scalar_aggregates_v1),
@ -88,7 +88,7 @@ bucketed_scalars AS (
key,
process,
agg_type,
SAFE_CAST(udf.bucket(SAFE_CAST(value AS FLOAT64)) AS STRING) AS bucket
SAFE_CAST(udf_bucket(SAFE_CAST(value AS FLOAT64)) AS STRING) AS bucket
FROM
clients_scalar_aggregates_v1
CROSS JOIN UNNEST(scalar_aggregates)

Просмотреть файл

@ -1,4 +1,4 @@
CREATE TEMP FUNCTION udf.get_buckets()
CREATE TEMP FUNCTION udf_get_buckets()
RETURNS ARRAY<STRING> AS (
(
SELECT ARRAY_AGG(CAST(bucket AS STRING))
@ -6,7 +6,7 @@ RETURNS ARRAY<STRING> AS (
)
);
CREATE TEMP FUNCTION udf.dedupe_map_sum (map ARRAY<STRUCT<key STRING, value FLOAT64>>)
CREATE TEMP FUNCTION udf_dedupe_map_sum (map ARRAY<STRUCT<key STRING, value FLOAT64>>)
RETURNS ARRAY<STRUCT<key STRING, value FLOAT64>> AS (
-- Given a MAP with duplicate keys, de-duplicates by summing the values of duplicate keys
(
@ -26,7 +26,7 @@ RETURNS ARRAY<STRUCT<key STRING, value FLOAT64>> AS (
)
);
CREATE TEMP FUNCTION udf.fill_buckets(input_map ARRAY<STRUCT<key STRING, value FLOAT64>>, buckets ARRAY<STRING>)
CREATE TEMP FUNCTION udf_fill_buckets(input_map ARRAY<STRUCT<key STRING, value FLOAT64>>, buckets ARRAY<STRING>)
RETURNS ARRAY<STRUCT<key STRING, value FLOAT64>> AS (
-- Given a MAP `input_map`, fill in any missing keys with value `0.0`
(
@ -61,13 +61,13 @@ SELECT
SUM(count) AS total_users,
CASE
WHEN metric_type = 'scalar' OR metric_type = 'keyed-scalar'
THEN udf.fill_buckets(
udf.dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
udf.get_buckets()
THEN udf_fill_buckets(
udf_dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
udf_get_buckets()
)
WHEN metric_type = 'boolean' OR metric_type = 'keyed-scalar-boolean'
THEN udf.fill_buckets(
udf.dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
THEN udf_fill_buckets(
udf_dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
['always','never','sometimes'])
END AS aggregates
FROM
@ -101,13 +101,13 @@ SELECT
SUM(count) AS total_users,
CASE
WHEN metric_type = 'scalar' OR metric_type = 'keyed-scalar'
THEN udf.fill_buckets(
udf.dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
udf.get_buckets()
THEN udf_fill_buckets(
udf_dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
udf_get_buckets()
)
WHEN metric_type = 'boolean' OR metric_type = 'keyed-scalar-boolean'
THEN udf.fill_buckets(
udf.dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
THEN udf_fill_buckets(
udf_dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
['always','never','sometimes'])
END AS aggregates
FROM
@ -139,13 +139,13 @@ SELECT
SUM(count) AS total_users,
CASE
WHEN metric_type = 'scalar' OR metric_type = 'keyed-scalar'
THEN udf.fill_buckets(
udf.dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
udf.get_buckets()
THEN udf_fill_buckets(
udf_dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
udf_get_buckets()
)
WHEN metric_type = 'boolean' OR metric_type = 'keyed-scalar-boolean'
THEN udf.fill_buckets(
udf.dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
THEN udf_fill_buckets(
udf_dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
['always','never','sometimes'])
END AS aggregates
FROM
@ -178,13 +178,13 @@ SELECT
SUM(count) AS total_users,
CASE
WHEN metric_type = 'scalar' OR metric_type = 'keyed-scalar'
THEN udf.fill_buckets(
udf.dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
udf.get_buckets()
THEN udf_fill_buckets(
udf_dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
udf_get_buckets()
)
WHEN metric_type = 'boolean' OR metric_type = 'keyed-scalar-boolean'
THEN udf.fill_buckets(
udf.dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
THEN udf_fill_buckets(
udf_dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
['always','never','sometimes'])
END AS aggregates
FROM
@ -217,13 +217,13 @@ SELECT
SUM(count) AS total_users,
CASE
WHEN metric_type = 'scalar' OR metric_type = 'keyed-scalar'
THEN udf.fill_buckets(
udf.dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
udf.get_buckets()
THEN udf_fill_buckets(
udf_dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
udf_get_buckets()
)
WHEN metric_type = 'boolean' OR metric_type = 'keyed-scalar-boolean'
THEN udf.fill_buckets(
udf.dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
THEN udf_fill_buckets(
udf_dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
['always','never','sometimes'])
END AS aggregates
FROM
@ -255,13 +255,13 @@ SELECT
SUM(count) AS total_users,
CASE
WHEN metric_type = 'scalar' OR metric_type = 'keyed-scalar'
THEN udf.fill_buckets(
udf.dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
udf.get_buckets()
THEN udf_fill_buckets(
udf_dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
udf_get_buckets()
)
WHEN metric_type = 'boolean' OR metric_type = 'keyed-scalar-boolean'
THEN udf.fill_buckets(
udf.dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
THEN udf_fill_buckets(
udf_dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
['always','never','sometimes'])
END AS aggregates
FROM
@ -292,13 +292,13 @@ SELECT
SUM(count) AS total_users,
CASE
WHEN metric_type = 'scalar' OR metric_type = 'keyed-scalar'
THEN udf.fill_buckets(
udf.dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
udf.get_buckets()
THEN udf_fill_buckets(
udf_dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
udf_get_buckets()
)
WHEN metric_type = 'boolean' OR metric_type = 'keyed-scalar-boolean'
THEN udf.fill_buckets(
udf.dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
THEN udf_fill_buckets(
udf_dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
['always','never','sometimes'])
END AS aggregates
FROM
@ -328,13 +328,13 @@ SELECT
SUM(count) AS total_users,
CASE
WHEN metric_type = 'scalar' OR metric_type = 'keyed-scalar'
THEN udf.fill_buckets(
udf.dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
udf.get_buckets()
THEN udf_fill_buckets(
udf_dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
udf_get_buckets()
)
WHEN metric_type = 'boolean' OR metric_type = 'keyed-scalar-boolean'
THEN udf.fill_buckets(
udf.dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
THEN udf_fill_buckets(
udf_dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
['always','never','sometimes'])
END AS aggregates
FROM
@ -365,13 +365,13 @@ SELECT
SUM(count) AS total_users,
CASE
WHEN metric_type = 'scalar' OR metric_type = 'keyed-scalar'
THEN udf.fill_buckets(
udf.dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
udf.get_buckets()
THEN udf_fill_buckets(
udf_dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
udf_get_buckets()
)
WHEN metric_type = 'boolean' OR metric_type = 'keyed-scalar-boolean'
THEN udf.fill_buckets(
udf.dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
THEN udf_fill_buckets(
udf_dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
['always','never','sometimes'])
END AS aggregates
FROM
@ -401,13 +401,13 @@ SELECT
SUM(count) AS total_users,
CASE
WHEN metric_type = 'scalar' OR metric_type = 'keyed-scalar'
THEN udf.fill_buckets(
udf.dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
udf.get_buckets()
THEN udf_fill_buckets(
udf_dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
udf_get_buckets()
)
WHEN metric_type = 'boolean' OR metric_type = 'keyed-scalar-boolean'
THEN udf.fill_buckets(
udf.dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
THEN udf_fill_buckets(
udf_dedupe_map_sum(ARRAY_AGG(STRUCT<key STRING, value FLOAT64>(bucket, count))),
['always','never','sometimes'])
END AS aggregates
FROM

Просмотреть файл

@ -30,7 +30,7 @@ WITH
-- For all other dimensions, we use the mode of observed values in the day.
udf.mode_last(ARRAY_AGG(metadata.uri.app_name) OVER w1) AS app_name,
udf.mode_last(ARRAY_AGG(os) OVER w1) AS os,
udf.json_mode_last(ARRAY_AGG(udf_geo_struct(metadata.geo.country, metadata.geo.city, NULL, NULL)) OVER w1).* EXCEPT (geo_subdivision1, geo_subdivision2),
udf.json_mode_last(ARRAY_AGG(udf.geo_struct(metadata.geo.country, metadata.geo.city, NULL, NULL)) OVER w1).* EXCEPT (geo_subdivision1, geo_subdivision2),
udf.mode_last(ARRAY_AGG(metadata.uri.app_build_id) OVER w1) AS app_build_id,
udf.mode_last(ARRAY_AGG(normalized_channel) OVER w1) AS normalized_channel,
udf.mode_last(ARRAY_AGG(locale) OVER w1) AS locale,

Просмотреть файл

@ -1,5 +1,5 @@
CREATE TEMP FUNCTION
udf.contains_tier1_country(x ANY TYPE) AS ( --
udf_contains_tier1_country(x ANY TYPE) AS ( --
EXISTS(
SELECT
country
@ -15,7 +15,7 @@ CREATE TEMP FUNCTION
--
-- This UDF is also only applicable in the context of this query.
CREATE TEMP FUNCTION
udf.contains_registration(x ANY TYPE) AS ( --
udf_contains_registration(x ANY TYPE) AS ( --
EXISTS(
SELECT
event_type
@ -62,8 +62,8 @@ WITH
udf.mode_last(ARRAY_AGG(app_version) OVER w1) AS app_version,
udf.mode_last(ARRAY_AGG(os_name) OVER w1) AS os_name,
udf.mode_last(ARRAY_AGG(os_version) OVER w1) AS os_version,
udf.contains_tier1_country(ARRAY_AGG(country) OVER w1) AS seen_in_tier1_country,
udf.contains_registration(ARRAY_AGG(event_type) OVER w1) AS registered
udf_contains_tier1_country(ARRAY_AGG(country) OVER w1) AS seen_in_tier1_country,
udf_contains_registration(ARRAY_AGG(event_type) OVER w1) AS registered
FROM
base
WHERE

Просмотреть файл

@ -1,4 +1,4 @@
CREATE TEMP FUNCTION udf.percentile(percentile FLOAT64, histogram ARRAY<STRUCT<key STRING, value FLOAT64>>, type STRING)
CREATE TEMP FUNCTION udf_percentile(percentile FLOAT64, histogram ARRAY<STRUCT<key STRING, value FLOAT64>>, type STRING)
RETURNS FLOAT64
LANGUAGE js AS
'''
@ -48,11 +48,11 @@ SELECT *
key STRING,
value FLOAT64
>> [
('5', udf.percentile(5, aggregates, metric_type)),
('25', udf.percentile(25, aggregates, metric_type)),
('50', udf.percentile(50, aggregates, metric_type)),
('75', udf.percentile(75, aggregates, metric_type)),
('95', udf.percentile(95, aggregates, metric_type))
('5', udf_percentile(5, aggregates, metric_type)),
('25', udf_percentile(25, aggregates, metric_type)),
('50', udf_percentile(50, aggregates, metric_type)),
('75', udf_percentile(75, aggregates, metric_type)),
('95', udf_percentile(95, aggregates, metric_type))
] AS aggregates
FROM client_probe_counts_v1
WHERE metric_type LIKE "%histogram%"

Просмотреть файл

@ -110,20 +110,20 @@ SELECT
payload.info.reason,
payload.info.timezone_offset,
-- Different types of crashes / hangs; format:off
udf.json_extract_histogram(udf_get_key(payload.keyed_histograms.subprocess_crashes_with_dump, 'pluginhang')).sum AS plugin_hangs,
udf.json_extract_histogram(udf_get_key(payload.keyed_histograms.subprocess_abnormal_abort, 'plugin')).sum AS aborts_plugin,
udf.json_extract_histogram(udf_get_key(payload.keyed_histograms.subprocess_abnormal_abort, 'content')).sum AS aborts_content,
udf.json_extract_histogram(udf_get_key(payload.keyed_histograms.subprocess_abnormal_abort, 'gmplugin')).sum AS aborts_gmplugin,
udf.json_extract_histogram(udf_get_key(payload.keyed_histograms.subprocess_crashes_with_dump, 'plugin')).sum AS crashes_detected_plugin,
udf.json_extract_histogram(udf_get_key(payload.keyed_histograms.subprocess_crashes_with_dump, 'content')).sum AS crashes_detected_content,
udf.json_extract_histogram(udf_get_key(payload.keyed_histograms.subprocess_crashes_with_dump, 'gmplugin')).sum AS crashes_detected_gmplugin,
udf.json_extract_histogram(udf_get_key(payload.keyed_histograms.process_crash_submit_attempt, 'main-crash')).sum AS crash_submit_attempt_main,
udf.json_extract_histogram(udf_get_key(payload.keyed_histograms.process_crash_submit_attempt, 'content-crash')).sum AS crash_submit_attempt_content,
udf.json_extract_histogram(udf_get_key(payload.keyed_histograms.process_crash_submit_attempt, 'plugin-crash')).sum AS crash_submit_attempt_plugin,
udf.json_extract_histogram(udf_get_key(payload.keyed_histograms.process_crash_submit_success, 'main-crash')).sum AS crash_submit_success_main,
udf.json_extract_histogram(udf_get_key(payload.keyed_histograms.process_crash_submit_success, 'content-crash')).sum AS crash_submit_success_content,
udf.json_extract_histogram(udf_get_key(payload.keyed_histograms.process_crash_submit_success, 'plugin-crash')).sum AS crash_submit_success_plugin,
udf.json_extract_histogram(udf_get_key(payload.keyed_histograms.subprocess_kill_hard, 'ShutDownKill')).sum AS shutdown_kill,
udf.json_extract_histogram(udf.get_key(payload.keyed_histograms.subprocess_crashes_with_dump, 'pluginhang')).sum AS plugin_hangs,
udf.json_extract_histogram(udf.get_key(payload.keyed_histograms.subprocess_abnormal_abort, 'plugin')).sum AS aborts_plugin,
udf.json_extract_histogram(udf.get_key(payload.keyed_histograms.subprocess_abnormal_abort, 'content')).sum AS aborts_content,
udf.json_extract_histogram(udf.get_key(payload.keyed_histograms.subprocess_abnormal_abort, 'gmplugin')).sum AS aborts_gmplugin,
udf.json_extract_histogram(udf.get_key(payload.keyed_histograms.subprocess_crashes_with_dump, 'plugin')).sum AS crashes_detected_plugin,
udf.json_extract_histogram(udf.get_key(payload.keyed_histograms.subprocess_crashes_with_dump, 'content')).sum AS crashes_detected_content,
udf.json_extract_histogram(udf.get_key(payload.keyed_histograms.subprocess_crashes_with_dump, 'gmplugin')).sum AS crashes_detected_gmplugin,
udf.json_extract_histogram(udf.get_key(payload.keyed_histograms.process_crash_submit_attempt, 'main-crash')).sum AS crash_submit_attempt_main,
udf.json_extract_histogram(udf.get_key(payload.keyed_histograms.process_crash_submit_attempt, 'content-crash')).sum AS crash_submit_attempt_content,
udf.json_extract_histogram(udf.get_key(payload.keyed_histograms.process_crash_submit_attempt, 'plugin-crash')).sum AS crash_submit_attempt_plugin,
udf.json_extract_histogram(udf.get_key(payload.keyed_histograms.process_crash_submit_success, 'main-crash')).sum AS crash_submit_success_main,
udf.json_extract_histogram(udf.get_key(payload.keyed_histograms.process_crash_submit_success, 'content-crash')).sum AS crash_submit_success_content,
udf.json_extract_histogram(udf.get_key(payload.keyed_histograms.process_crash_submit_success, 'plugin-crash')).sum AS crash_submit_success_plugin,
udf.json_extract_histogram(udf.get_key(payload.keyed_histograms.subprocess_kill_hard, 'ShutDownKill')).sum AS shutdown_kill,
-- format:on
ARRAY_LENGTH(environment.addons.active_addons) AS active_addons_count,
-- See https://github.com/mozilla-services/data-pipeline/blob/master/hindsight/modules/fx/ping.lua#L82

Просмотреть файл

@ -1,4 +1,4 @@
CREATE TEMP FUNCTION udf.get_values(required ARRAY<FLOAT64>, values ARRAY<FLOAT64>)
CREATE TEMP FUNCTION udf_get_values(required ARRAY<FLOAT64>, values ARRAY<FLOAT64>)
RETURNS ARRAY<STRUCT<key STRING, value FLOAT64>> AS (
(
SELECT ARRAY_AGG(record)
@ -279,7 +279,7 @@ percentiles AS (
client_agg_type)
SELECT *
REPLACE(udf.get_values(
REPLACE(udf_get_values(
[5.0, 25.0, 50.0, 75.0, 95.0],
aggregates
) AS aggregates)