Fix monthly_searches to account for null (#612)
* Fix monthly_searches to account for null * Fix missing closing parens * Add UDF to get NULL key
This commit is contained in:
Родитель
2d743ee80d
Коммит
65053ad5e1
|
@ -56,24 +56,26 @@ CREATE TEMP FUNCTION
|
|||
curr ARRAY<STRUCT<key STRING, value STRUCT<total_searches ARRAY<INT64>, tagged_searches ARRAY<INT64>, search_with_ads ARRAY<INT64>, ad_click ARRAY<INT64>>>>,
|
||||
submission_date DATE) AS (ARRAY(
|
||||
WITH prev_tbl AS (
|
||||
SELECT *
|
||||
SELECT * REPLACE(COALESCE(key, "null_engine") AS key)
|
||||
FROM UNNEST(prev)
|
||||
), curr_tbl AS (
|
||||
SELECT * REPLACE(COALESCE(key, "null_engine") AS key)
|
||||
FROM UNNEST(curr)
|
||||
)
|
||||
|
||||
SELECT
|
||||
STRUCT(
|
||||
key,
|
||||
NULLIF(key, "null_engine") AS key,
|
||||
udf_add_monthly_engine_searches(
|
||||
COALESCE(p.value, udf_new_monthly_engine_searches_struct()),
|
||||
COALESCE(c.value, udf_new_monthly_engine_searches_struct()),
|
||||
submission_date) AS value
|
||||
)
|
||||
FROM
|
||||
UNNEST(curr) AS c
|
||||
curr_tbl AS c
|
||||
FULL OUTER JOIN
|
||||
prev_tbl AS p
|
||||
USING (key)
|
||||
|
||||
));
|
||||
CREATE TEMP FUNCTION
|
||||
udf_array_11_zeroes_then(val INT64) AS (
|
||||
|
|
|
@ -10,24 +10,26 @@ CREATE TEMP FUNCTION
|
|||
curr ARRAY<STRUCT<key STRING, value STRUCT<total_searches ARRAY<INT64>, tagged_searches ARRAY<INT64>, search_with_ads ARRAY<INT64>, ad_click ARRAY<INT64>>>>,
|
||||
submission_date DATE) AS (ARRAY(
|
||||
WITH prev_tbl AS (
|
||||
SELECT *
|
||||
SELECT * REPLACE(COALESCE(key, "null_engine") AS key)
|
||||
FROM UNNEST(prev)
|
||||
), curr_tbl AS (
|
||||
SELECT * REPLACE(COALESCE(key, "null_engine") AS key)
|
||||
FROM UNNEST(curr)
|
||||
)
|
||||
|
||||
SELECT
|
||||
SELECT
|
||||
STRUCT(
|
||||
key,
|
||||
NULLIF(key, "null_engine") AS key,
|
||||
udf_add_monthly_engine_searches(
|
||||
COALESCE(p.value, udf_new_monthly_engine_searches_struct()),
|
||||
COALESCE(c.value, udf_new_monthly_engine_searches_struct()),
|
||||
submission_date) AS value
|
||||
)
|
||||
FROM
|
||||
UNNEST(curr) AS c
|
||||
curr_tbl AS c
|
||||
FULL OUTER JOIN
|
||||
prev_tbl AS p
|
||||
USING (key)
|
||||
|
||||
));
|
||||
|
||||
-- Tests
|
||||
|
@ -48,7 +50,10 @@ WITH previous_examples AS (
|
|||
udf_zeroed_array(12) AS ad_click) AS value
|
||||
)] AS prev, "google" as type
|
||||
UNION ALL
|
||||
SELECT NULL AS prev, "null" as type
|
||||
SELECT [ STRUCT(
|
||||
CAST(NULL AS STRING) AS key,
|
||||
udf_new_monthly_engine_searches_struct() AS value
|
||||
) ] AS prev, "null" as type
|
||||
), current_examples AS (
|
||||
SELECT
|
||||
[
|
||||
|
@ -69,7 +74,10 @@ WITH previous_examples AS (
|
|||
)] AS curr,
|
||||
"bing" as type
|
||||
UNION ALL
|
||||
SELECT NULL as curr, "null" as type
|
||||
SELECT [ STRUCT(
|
||||
CAST(NULL AS STRING) AS key,
|
||||
udf_new_monthly_engine_searches_struct() AS value
|
||||
) ] as curr, "null" as type
|
||||
), dates AS (
|
||||
SELECT d AS date
|
||||
FROM UNNEST([DATE "2019-10-01", DATE "2019-10-02"]) AS d
|
||||
|
@ -116,18 +124,24 @@ WITH previous_examples AS (
|
|||
STRUCT("google" AS p_type, "bing" AS c_type, DATE "2019-10-01" AS date, "bing" AS key, "total_searches" AS res_type, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] AS exp),
|
||||
STRUCT("google" AS p_type, "bing" AS c_type, DATE "2019-10-01" AS date, "bing" AS key, "tagged_searches" AS res_type, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2] AS exp),
|
||||
STRUCT("google" AS p_type, "bing" AS c_type, DATE "2019-10-01" AS date, "bing" AS key, "search_with_ads" AS res_type, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3] AS exp),
|
||||
STRUCT("google" AS p_type, "bing" AS c_type, DATE "2019-10-01" AS date, "bing" AS key, "ad_click" AS res_type, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4] AS exp)
|
||||
STRUCT("google" AS p_type, "bing" AS c_type, DATE "2019-10-01" AS date, "bing" AS key, "ad_click" AS res_type, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4] AS exp),
|
||||
|
||||
-- Check NULL join
|
||||
STRUCT("null" AS p_type, "null" AS c_type, DATE "2019-10-01" AS date, NULL AS key, "total_searches" AS res_type, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] AS exp),
|
||||
STRUCT("null" AS p_type, "null" AS c_type, DATE "2019-10-01" AS date, NULL AS key, "tagged_searches" AS res_type, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] AS exp),
|
||||
STRUCT("null" AS p_type, "null" AS c_type, DATE "2019-10-01" AS date, NULL AS key, "search_with_ads" AS res_type, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] AS exp),
|
||||
STRUCT("null" AS p_type, "null" AS c_type, DATE "2019-10-01" AS date, NULL AS key, "ad_click" AS res_type, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] AS exp)
|
||||
])
|
||||
)
|
||||
|
||||
SELECT
|
||||
assert_array_equals(exp,
|
||||
CASE WHEN res_type = "total_searches" THEN udf_get_key(res, key).total_searches
|
||||
WHEN res_type = "tagged_searches" THEN udf_get_key(res, key).tagged_searches
|
||||
WHEN res_type = "search_with_ads" THEN udf_get_key(res, key).search_with_ads
|
||||
WHEN res_type = "ad_click" THEN udf_get_key(res, key).ad_click
|
||||
END)
|
||||
CASE WHEN res_type = "total_searches" THEN udf_get_key_with_null(res, key).total_searches
|
||||
WHEN res_type = "tagged_searches" THEN udf_get_key_with_null(res, key).tagged_searches
|
||||
WHEN res_type = "search_with_ads" THEN udf_get_key_with_null(res, key).search_with_ads
|
||||
WHEN res_type = "ad_click" THEN udf_get_key_with_null(res, key).ad_click
|
||||
END),
|
||||
FROM
|
||||
results
|
||||
INNER JOIN
|
||||
expected USING (p_type, c_type, date)
|
||||
expected USING (p_type, c_type, date);
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
Fetch the value associated with a given key from an array of key/value structs.
|
||||
|
||||
Because map types aren't available in BigQuery, we model maps as arrays
|
||||
of structs instead, and this function provides map-like access to such fields.
|
||||
|
||||
This version matches NULL keys as well.
|
||||
|
||||
*/
|
||||
|
||||
CREATE TEMP FUNCTION udf_get_key_with_null(map ANY TYPE, k ANY TYPE) AS (
|
||||
(
|
||||
SELECT key_value.value
|
||||
FROM UNNEST(map) AS key_value
|
||||
WHERE key_value.key = k
|
||||
OR key_value.key IS NULL and k IS NULL
|
||||
LIMIT 1
|
||||
)
|
||||
);
|
||||
|
||||
-- Tests
|
||||
|
||||
SELECT
|
||||
assert_equals(12, udf_get_key_with_null([STRUCT('foo' AS key, 42 AS value), ('bar', 12)], 'bar')),
|
||||
assert_equals(12, udf_get_key_with_null([STRUCT('foo' AS key, 42 AS value), (CAST(NULL AS STRING), 12)], CAST(NULL AS STRING)));
|
Загрузка…
Ссылка в новой задаче