Fix monthly_searches to account for null (#612)

* Fix monthly_searches to account for null

* Fix missing closing parens

* Add UDF to get NULL key
This commit is contained in:
Frank Bertsch 2019-12-19 15:19:10 -05:00 коммит произвёл GitHub
Родитель 2d743ee80d
Коммит 65053ad5e1
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 59 добавлений и 18 удалений

Просмотреть файл

@ -56,24 +56,26 @@ CREATE TEMP FUNCTION
curr ARRAY<STRUCT<key STRING, value STRUCT<total_searches ARRAY<INT64>, tagged_searches ARRAY<INT64>, search_with_ads ARRAY<INT64>, ad_click ARRAY<INT64>>>>,
submission_date DATE) AS (ARRAY(
WITH prev_tbl AS (
SELECT *
SELECT * REPLACE(COALESCE(key, "null_engine") AS key)
FROM UNNEST(prev)
), curr_tbl AS (
SELECT * REPLACE(COALESCE(key, "null_engine") AS key)
FROM UNNEST(curr)
)
SELECT
STRUCT(
key,
NULLIF(key, "null_engine") AS key,
udf_add_monthly_engine_searches(
COALESCE(p.value, udf_new_monthly_engine_searches_struct()),
COALESCE(c.value, udf_new_monthly_engine_searches_struct()),
submission_date) AS value
)
FROM
UNNEST(curr) AS c
curr_tbl AS c
FULL OUTER JOIN
prev_tbl AS p
USING (key)
));
CREATE TEMP FUNCTION
udf_array_11_zeroes_then(val INT64) AS (

Просмотреть файл

@ -10,24 +10,26 @@ CREATE TEMP FUNCTION
curr ARRAY<STRUCT<key STRING, value STRUCT<total_searches ARRAY<INT64>, tagged_searches ARRAY<INT64>, search_with_ads ARRAY<INT64>, ad_click ARRAY<INT64>>>>,
submission_date DATE) AS (ARRAY(
WITH prev_tbl AS (
SELECT *
SELECT * REPLACE(COALESCE(key, "null_engine") AS key)
FROM UNNEST(prev)
), curr_tbl AS (
SELECT * REPLACE(COALESCE(key, "null_engine") AS key)
FROM UNNEST(curr)
)
SELECT
SELECT
STRUCT(
key,
NULLIF(key, "null_engine") AS key,
udf_add_monthly_engine_searches(
COALESCE(p.value, udf_new_monthly_engine_searches_struct()),
COALESCE(c.value, udf_new_monthly_engine_searches_struct()),
submission_date) AS value
)
FROM
UNNEST(curr) AS c
curr_tbl AS c
FULL OUTER JOIN
prev_tbl AS p
USING (key)
));
-- Tests
@ -48,7 +50,10 @@ WITH previous_examples AS (
udf_zeroed_array(12) AS ad_click) AS value
)] AS prev, "google" as type
UNION ALL
SELECT NULL AS prev, "null" as type
SELECT [ STRUCT(
CAST(NULL AS STRING) AS key,
udf_new_monthly_engine_searches_struct() AS value
) ] AS prev, "null" as type
), current_examples AS (
SELECT
[
@ -69,7 +74,10 @@ WITH previous_examples AS (
)] AS curr,
"bing" as type
UNION ALL
SELECT NULL as curr, "null" as type
SELECT [ STRUCT(
CAST(NULL AS STRING) AS key,
udf_new_monthly_engine_searches_struct() AS value
) ] as curr, "null" as type
), dates AS (
SELECT d AS date
FROM UNNEST([DATE "2019-10-01", DATE "2019-10-02"]) AS d
@ -116,18 +124,24 @@ WITH previous_examples AS (
STRUCT("google" AS p_type, "bing" AS c_type, DATE "2019-10-01" AS date, "bing" AS key, "total_searches" AS res_type, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] AS exp),
STRUCT("google" AS p_type, "bing" AS c_type, DATE "2019-10-01" AS date, "bing" AS key, "tagged_searches" AS res_type, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2] AS exp),
STRUCT("google" AS p_type, "bing" AS c_type, DATE "2019-10-01" AS date, "bing" AS key, "search_with_ads" AS res_type, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3] AS exp),
STRUCT("google" AS p_type, "bing" AS c_type, DATE "2019-10-01" AS date, "bing" AS key, "ad_click" AS res_type, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4] AS exp)
STRUCT("google" AS p_type, "bing" AS c_type, DATE "2019-10-01" AS date, "bing" AS key, "ad_click" AS res_type, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4] AS exp),
-- Check NULL join
STRUCT("null" AS p_type, "null" AS c_type, DATE "2019-10-01" AS date, NULL AS key, "total_searches" AS res_type, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] AS exp),
STRUCT("null" AS p_type, "null" AS c_type, DATE "2019-10-01" AS date, NULL AS key, "tagged_searches" AS res_type, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] AS exp),
STRUCT("null" AS p_type, "null" AS c_type, DATE "2019-10-01" AS date, NULL AS key, "search_with_ads" AS res_type, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] AS exp),
STRUCT("null" AS p_type, "null" AS c_type, DATE "2019-10-01" AS date, NULL AS key, "ad_click" AS res_type, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] AS exp)
])
)
SELECT
assert_array_equals(exp,
CASE WHEN res_type = "total_searches" THEN udf_get_key(res, key).total_searches
WHEN res_type = "tagged_searches" THEN udf_get_key(res, key).tagged_searches
WHEN res_type = "search_with_ads" THEN udf_get_key(res, key).search_with_ads
WHEN res_type = "ad_click" THEN udf_get_key(res, key).ad_click
END)
CASE WHEN res_type = "total_searches" THEN udf_get_key_with_null(res, key).total_searches
WHEN res_type = "tagged_searches" THEN udf_get_key_with_null(res, key).tagged_searches
WHEN res_type = "search_with_ads" THEN udf_get_key_with_null(res, key).search_with_ads
WHEN res_type = "ad_click" THEN udf_get_key_with_null(res, key).ad_click
END),
FROM
results
INNER JOIN
expected USING (p_type, c_type, date)
expected USING (p_type, c_type, date);

25
udf/get_key_with_null.sql Normal file
Просмотреть файл

@ -0,0 +1,25 @@
/*
Fetch the value associated with a given key from an array of key/value structs.
Because map types aren't available in BigQuery, we model maps as arrays
of structs instead, and this function provides map-like access to such fields.
This version matches NULL keys as well.
*/
CREATE TEMP FUNCTION udf_get_key_with_null(map ANY TYPE, k ANY TYPE) AS (
(
SELECT key_value.value
FROM UNNEST(map) AS key_value
WHERE key_value.key = k
OR key_value.key IS NULL and k IS NULL
LIMIT 1
)
);
-- Tests
SELECT
assert_equals(12, udf_get_key_with_null([STRUCT('foo' AS key, 42 AS value), ('bar', 12)], 'bar')),
assert_equals(12, udf_get_key_with_null([STRUCT('foo' AS key, 42 AS value), (CAST(NULL AS STRING), 12)], CAST(NULL AS STRING)));