fix(glam_fog) SUM overflow (#6218)
* fix(glam_fog): Create function histogram_filter_high_values to prevent INT64 overflow * fix(glam_fog): Use histogram_filter_high_values to avoid overflow
This commit is contained in:
Родитель
7d613414e8
Коммит
68937b4ad0
|
@ -71,7 +71,7 @@ aggregated AS (
|
||||||
{{ attributes }},
|
{{ attributes }},
|
||||||
metric,
|
metric,
|
||||||
metric_type,
|
metric_type,
|
||||||
mozfun.map.sum(ARRAY_CONCAT_AGG(value)) as value
|
mozfun.map.sum(ARRAY_CONCAT_AGG(mozfun.glam.histogram_filter_high_values(value))) as value
|
||||||
FROM
|
FROM
|
||||||
flattened_histograms
|
flattened_histograms
|
||||||
GROUP BY
|
GROUP BY
|
||||||
|
|
|
@ -18,22 +18,6 @@
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% endset %}
|
{% endset %}
|
||||||
|
|
||||||
CREATE TEMP FUNCTION filter_values(aggs ARRAY<STRUCT<key STRING, value INT64>>)
|
|
||||||
RETURNS ARRAY<STRUCT<key STRING, value INT64>>
|
|
||||||
AS (
|
|
||||||
ARRAY(
|
|
||||||
SELECT AS STRUCT agg.key, SUM(agg.value) AS value
|
|
||||||
FROM UNNEST(aggs) agg
|
|
||||||
-- Prevent overflows by only keeping buckets where value is less than 2^40
|
|
||||||
-- allowing 2^24 entries. This value was chosen somewhat abitrarily, typically
|
|
||||||
-- the max histogram value is somewhere on the order of ~20 bits.
|
|
||||||
-- Negative values are incorrect and should not happen but were observed,
|
|
||||||
-- probably due to some bit flips.
|
|
||||||
WHERE agg.value BETWEEN 0 AND POW(2, 40)
|
|
||||||
GROUP BY agg.key
|
|
||||||
)
|
|
||||||
);
|
|
||||||
|
|
||||||
WITH extracted_accumulated AS (
|
WITH extracted_accumulated AS (
|
||||||
SELECT
|
SELECT
|
||||||
*
|
*
|
||||||
|
@ -84,7 +68,7 @@ aggregated_daily AS (
|
||||||
SELECT
|
SELECT
|
||||||
{{ attributes }},
|
{{ attributes }},
|
||||||
{{ metric_attributes }},
|
{{ metric_attributes }},
|
||||||
mozfun.map.sum(ARRAY_CONCAT_AGG(filter_values(value))) AS value
|
mozfun.map.sum(ARRAY_CONCAT_AGG(mozfun.glam.histogram_filter_high_values(value))) AS value
|
||||||
FROM
|
FROM
|
||||||
filtered_daily
|
filtered_daily
|
||||||
GROUP BY
|
GROUP BY
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
description: |
|
||||||
|
Prevent overflows by only keeping buckets where value is less than 2^40
|
||||||
|
allowing 2^24 entries. This value was chosen somewhat abitrarily, typically
|
||||||
|
the max histogram value is somewhere on the order of ~20 bits.
|
||||||
|
Negative values are incorrect and should not happen but were observed,
|
||||||
|
probably due to some bit flips.
|
||||||
|
friendly_name: Histogram filter high values
|
|
@ -0,0 +1,44 @@
|
||||||
|
CREATE OR REPLACE FUNCTION glam.histogram_filter_high_values(
|
||||||
|
aggs ARRAY<STRUCT<key STRING, value INT64>>
|
||||||
|
)
|
||||||
|
RETURNS ARRAY<STRUCT<key STRING, value INT64>> AS (
|
||||||
|
ARRAY(
|
||||||
|
SELECT AS STRUCT
|
||||||
|
agg.key,
|
||||||
|
SUM(agg.value) AS value
|
||||||
|
FROM
|
||||||
|
UNNEST(aggs) agg
|
||||||
|
WHERE
|
||||||
|
agg.value
|
||||||
|
BETWEEN 0
|
||||||
|
AND POW(2, 40)
|
||||||
|
GROUP BY
|
||||||
|
agg.key
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
assert.array_equals(
|
||||||
|
ARRAY<STRUCT<key STRING, value FLOAT64>>[('key1', 3), ('key2', 1099511627776)],
|
||||||
|
glam.histogram_filter_high_values(
|
||||||
|
[
|
||||||
|
STRUCT(
|
||||||
|
'key1' AS key,
|
||||||
|
1 AS value
|
||||||
|
), -- The first two elements have the same key and should be summed
|
||||||
|
STRUCT('key1' AS key, 2 AS value),
|
||||||
|
STRUCT(
|
||||||
|
'key2' AS key,
|
||||||
|
1099511627776 AS value
|
||||||
|
), -- This is exactly 2^40 and should not be excluded
|
||||||
|
STRUCT('key2' AS key, 1099511627777 AS value), -- This exceeds 2^40 and should be excluded
|
||||||
|
STRUCT('key1' AS key, -5 AS value) -- Should be excluded due to being negative
|
||||||
|
]
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
assert.array_equals(
|
||||||
|
ARRAY<STRUCT<key STRING, value FLOAT64>>[],
|
||||||
|
glam.histogram_filter_high_values([])
|
||||||
|
);
|
Загрузка…
Ссылка в новой задаче