Bug 1632635 - FxA Amplitude export for active events (#941)
* WIP: Initial implementation of FxA Amplitude export * Use submission_date parameter * Add hmac-sha256 SQL implementation * Escape language column name Co-Authored-By: Jeff Klukas <jeff@klukas.net> * Use hmac_sha256; update for review feedback * Reformat sql files * Add docs for HMAC implementation * Validate hmac_sha256 against NIST test vectors * Add filepath as from_text arg Co-authored-by: Daniel Thorn <dthorn@mozilla.com> * Explicitly use named argument Co-authored-by: Daniel Thorn <dthorn@mozilla.com> * Add docs for hmac validation * WIP: Derive os_used_week/month as incremental query * Retrieve hmac_key from encrypted keys table Co-authored-by: Jeff Klukas <jeff@klukas.net> * Remove fxa_hmac param * Reformat SQL files * Use bytes_seen for os_used * Rename udfs * Format UDF sql * Don't include NULL os values * Don't include NULL user properties * Update comment for UDF * Use fully-named datasets, not fxa* * Cast key_id to bytes * Fix failing tests * Fix test failures * Use new dataset for view query * Add access denied exception for secret access * Remove flake8 changes * Update description of fxa_amplitude_export Co-authored-by: Jeff Klukas <jeff@klukas.net> * Remove version suffix from view Co-authored-by: Jeff Klukas <jeff@klukas.net> Co-authored-by: Daniel Thorn <dthorn@mozilla.com>
This commit is contained in:
Родитель
069c24ac60
Коммит
bd7be1606c
|
@ -41,12 +41,28 @@ class RawUdf:
|
|||
with open(filepath) as f:
|
||||
text = f.read()
|
||||
|
||||
name = basename.replace(".sql", "")
|
||||
dataset = os.path.basename(dirpath)
|
||||
|
||||
try:
|
||||
return RawUdf.from_text(text, dataset, name, filepath)
|
||||
except ValueError as e:
|
||||
raise ValueError(str(e) + f" in {filepath}")
|
||||
|
||||
@staticmethod
|
||||
def from_text(text, dataset, name, filepath=None, is_defined=True):
|
||||
"""Create a RawUdf instance from text.
|
||||
|
||||
If is_defined is False, then the UDF does not
|
||||
need to be defined in the text; it could be
|
||||
just tests.
|
||||
"""
|
||||
sql = sqlparse.format(text, strip_comments=True)
|
||||
statements = [s for s in sqlparse.split(sql) if s.strip()]
|
||||
|
||||
prod_name = basename.replace(".sql", "")
|
||||
persistent_name = os.path.basename(dirpath) + "." + prod_name
|
||||
temp_name = os.path.basename(dirpath) + "_" + prod_name
|
||||
prod_name = name
|
||||
persistent_name = f"{dataset}.{name}"
|
||||
temp_name = f"{dataset}_{name}"
|
||||
internal_name = None
|
||||
|
||||
definitions = []
|
||||
|
@ -68,10 +84,10 @@ class RawUdf:
|
|||
tests.append(s)
|
||||
|
||||
for name in (prod_name, internal_name):
|
||||
if not UDF_NAME_RE.match(name):
|
||||
if is_defined and not UDF_NAME_RE.match(name):
|
||||
raise ValueError(
|
||||
f"Invalid UDF name {name}: Must start with alpha char, "
|
||||
f"limited to chars {UDF_CHAR}, be at most 256 chars long."
|
||||
f"limited to chars {UDF_CHAR}, be at most 256 chars long"
|
||||
)
|
||||
|
||||
# find usages of both persistent and temporary UDFs
|
||||
|
@ -79,12 +95,13 @@ class RawUdf:
|
|||
dependencies = [".".join(t) for t in dependencies]
|
||||
dependencies.extend(re.findall(TEMP_UDF_RE, "\n".join(definitions)))
|
||||
|
||||
if internal_name is None:
|
||||
raise ValueError(
|
||||
f"Expected a UDF named {persistent_name} or {temp_name} "
|
||||
f"to be defined in {filepath}"
|
||||
)
|
||||
dependencies.remove(internal_name)
|
||||
if is_defined:
|
||||
if internal_name is None:
|
||||
raise ValueError(
|
||||
f"Expected a UDF named {persistent_name} or {temp_name} "
|
||||
f"to be defined"
|
||||
)
|
||||
dependencies.remove(internal_name)
|
||||
|
||||
return RawUdf(
|
||||
internal_name,
|
||||
|
|
|
@ -46,6 +46,7 @@ SKIP = {
|
|||
"sql/search/search_clients_last_seen_v1/view.sql",
|
||||
"sql/search/search_clients_last_seen/view.sql",
|
||||
"sql/telemetry_derived/deviations_v1/query.sql",
|
||||
"sql/firefox_accounts_derived/fxa_amplitude_export_v1/query.sql",
|
||||
# Already exists (and lacks an "OR REPLACE" clause)
|
||||
"sql/org_mozilla_firefox_derived/clients_first_seen_v1/init.sql",
|
||||
"sql/org_mozilla_firefox_derived/clients_last_seen_v1/init.sql",
|
||||
|
|
|
@ -0,0 +1,125 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`moz-fx-data-shared-prod.firefox_accounts.fxa_amplitude_export`
|
||||
AS
|
||||
WITH active_users AS (
|
||||
SELECT
|
||||
`moz-fx-data-shared-prod`.udf.active_values_from_days_seen_map(os_used_month, 0, 1) AS os_used_day,
|
||||
`moz-fx-data-shared-prod`.udf.active_values_from_days_seen_map(os_used_month, -6, 7) AS os_used_week,
|
||||
`moz-fx-data-shared-prod`.udf.active_values_from_days_seen_map(os_used_month, -27, 28) AS os_used_month,
|
||||
* EXCEPT (days_seen_bits, os_used_month)
|
||||
FROM
|
||||
`moz-fx-data-shared-prod`.firefox_accounts_derived.fxa_amplitude_export_v1
|
||||
WHERE
|
||||
`moz-fx-data-shared-prod`.udf.pos_of_trailing_set_bit(days_seen_bits) = 0
|
||||
),
|
||||
active_events AS (
|
||||
SELECT
|
||||
submission_timestamp,
|
||||
user_id,
|
||||
insert_id,
|
||||
'fxa_activity - active' AS event_type,
|
||||
timestamp,
|
||||
TO_JSON_STRING(STRUCT(services, oauth_client_ids)) AS event_properties,
|
||||
'' AS user_events
|
||||
FROM
|
||||
active_users
|
||||
),
|
||||
user_properties AS (
|
||||
SELECT
|
||||
submission_timestamp,
|
||||
user_id,
|
||||
'' AS insert_id,
|
||||
'$identify' AS event_type,
|
||||
timestamp,
|
||||
'' AS event_properties,
|
||||
-- We don't want to include user_properties if they are null, so we need
|
||||
-- to list them out explicitly and filter with WHERE
|
||||
CONCAT(
|
||||
"{",
|
||||
ARRAY_TO_STRING(
|
||||
ARRAY(
|
||||
SELECT
|
||||
CONCAT(TO_JSON_STRING(key), ":", value)
|
||||
FROM
|
||||
(
|
||||
SELECT AS STRUCT
|
||||
"region" AS key,
|
||||
TO_JSON_STRING(region) AS value,
|
||||
UNION ALL
|
||||
SELECT AS STRUCT
|
||||
"country" AS key,
|
||||
TO_JSON_STRING(country) AS value,
|
||||
UNION ALL
|
||||
SELECT AS STRUCT
|
||||
"LANGUAGE" AS key,
|
||||
TO_JSON_STRING(LANGUAGE) AS value,
|
||||
UNION ALL
|
||||
SELECT AS STRUCT
|
||||
"os_used_day" AS key,
|
||||
TO_JSON_STRING(os_used_day) AS value,
|
||||
UNION ALL
|
||||
SELECT AS STRUCT
|
||||
"os_used_week" AS key,
|
||||
TO_JSON_STRING(os_used_week) AS value,
|
||||
UNION ALL
|
||||
SELECT AS STRUCT
|
||||
"os_used_month" AS key,
|
||||
TO_JSON_STRING(os_used_month) AS value,
|
||||
UNION ALL
|
||||
SELECT AS STRUCT
|
||||
"sync_device_count" AS key,
|
||||
TO_JSON_STRING(sync_device_count) AS value,
|
||||
UNION ALL
|
||||
SELECT AS STRUCT
|
||||
"sync_active_devices_day" AS key,
|
||||
TO_JSON_STRING(sync_active_devices_day) AS value,
|
||||
UNION ALL
|
||||
SELECT AS STRUCT
|
||||
"sync_active_devices_week" AS key,
|
||||
TO_JSON_STRING(sync_active_devices_week) AS value,
|
||||
UNION ALL
|
||||
SELECT AS STRUCT
|
||||
"sync_active_devices_month" AS key,
|
||||
TO_JSON_STRING(sync_active_devices_month) AS value,
|
||||
UNION ALL
|
||||
SELECT AS STRUCT
|
||||
"ua_version" AS key,
|
||||
TO_JSON_STRING(ua_version) AS value,
|
||||
UNION ALL
|
||||
SELECT AS STRUCT
|
||||
"ua_browser" AS key,
|
||||
TO_JSON_STRING(ua_browser) AS value,
|
||||
UNION ALL
|
||||
SELECT AS STRUCT
|
||||
"app_version" AS key,
|
||||
TO_JSON_STRING(app_version) AS value,
|
||||
UNION ALL
|
||||
SELECT AS STRUCT
|
||||
"$postInsert",
|
||||
TO_JSON_STRING(STRUCT(fxa_services_used)) AS value
|
||||
)
|
||||
WHERE
|
||||
value != "null"
|
||||
),
|
||||
","
|
||||
),
|
||||
"}"
|
||||
) AS used_properties
|
||||
FROM
|
||||
active_users
|
||||
),
|
||||
all_events AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
active_events
|
||||
UNION ALL
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
user_properties
|
||||
)
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
all_events
|
|
@ -0,0 +1,35 @@
|
|||
CREATE OR REPLACE TABLE
|
||||
`moz-fx-data-shared-prod`.firefox_accounts_derived.fxa_amplitude_export_v1
|
||||
PARTITION BY
|
||||
(DATE(submission_timestamp))
|
||||
CLUSTER BY
|
||||
(user_id)
|
||||
AS
|
||||
WITH columns AS (
|
||||
SELECT
|
||||
CAST(NULL AS TIMESTAMP) AS submission_timestamp,
|
||||
CAST(NULL AS STRING) AS user_id,
|
||||
CAST(NULL AS STRING) AS insert_id,
|
||||
CAST(NULL AS DATETIME) AS timestamp,
|
||||
CAST(NULL AS STRING) AS region,
|
||||
CAST(NULL AS STRING) AS country,
|
||||
CAST(NULL AS STRING) AS `language`,
|
||||
CAST(NULL AS ARRAY<STRING>) AS services,
|
||||
CAST(NULL AS ARRAY<STRING>) AS oauth_client_ids,
|
||||
CAST(NULL AS ARRAY<STRING>) AS fxa_services_used,
|
||||
CAST(NULL AS ARRAY<STRUCT<key STRING, value INT64>>) AS os_used_month,
|
||||
CAST(NULL AS INT64) AS sync_device_count,
|
||||
CAST(NULL AS INT64) AS sync_active_devices_day,
|
||||
CAST(NULL AS INT64) AS sync_active_devices_week,
|
||||
CAST(NULL AS INT64) AS sync_active_devices_month,
|
||||
CAST(NULL AS STRING) AS ua_version,
|
||||
CAST(NULL AS STRING) AS ua_browser,
|
||||
CAST(NULL AS FLOAT64) AS app_version,
|
||||
CAST(NULL AS INT64) AS days_seen_bits,
|
||||
)
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
columns
|
||||
WHERE
|
||||
FALSE
|
|
@ -0,0 +1,10 @@
|
|||
friendly_name: FxA Amplitude Export
|
||||
description: >
|
||||
Derived from FxA logs, this table contains active events and user property
|
||||
updates for FxA users. A view of this table is exported to Amplitude.
|
||||
owners:
|
||||
- frank@mozilla.com
|
||||
labels:
|
||||
application: FxA
|
||||
incremental: true
|
||||
schedule: daily
|
|
@ -0,0 +1,130 @@
|
|||
WITH hmac_key AS (
|
||||
SELECT
|
||||
AEAD.DECRYPT_BYTES(
|
||||
(SELECT keyset FROM `moz-fx-dataops-secrets.airflow_query_keys.fxa_prod`),
|
||||
ciphertext,
|
||||
CAST(key_id AS BYTES)
|
||||
) AS value
|
||||
FROM
|
||||
`moz-fx-data-shared-prod.firefox_accounts_derived.encrypted_keys_v1`
|
||||
WHERE
|
||||
key_id = 'fxa_hmac_prod'
|
||||
),
|
||||
base_events AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
`moz-fx-fxa-prod-0712.fxa_prod_logs.docker_fxa_auth_20*`
|
||||
WHERE
|
||||
_TABLE_SUFFIX = FORMAT_DATE('%g%m%d', @submission_date)
|
||||
AND jsonPayload.fields.event_type IN (
|
||||
'fxa_activity - cert_signed',
|
||||
'fxa_activity - access_token_checked',
|
||||
'fxa_activity - access_token_created'
|
||||
)
|
||||
AND jsonPayload.fields.user_id IS NOT NULL
|
||||
),
|
||||
grouped_by_user AS (
|
||||
SELECT
|
||||
-- to prevent weirdness from timestamp field, use provided
|
||||
-- submission date parameter as timestamp
|
||||
TO_HEX(
|
||||
udf.hmac_sha256((SELECT * FROM hmac_key), CAST(jsonPayload.fields.user_id AS BYTES))
|
||||
) AS user_id,
|
||||
MIN(CONCAT(insertId, '-user')) AS insert_id,
|
||||
CAST(@submission_date AS DATETIME) AS timestamp,
|
||||
-- Amplitude properties, scalars
|
||||
`moz-fx-data-shared-prod`.udf.mode_last(ARRAY_AGG(jsonPayload.fields.region)) AS region,
|
||||
`moz-fx-data-shared-prod`.udf.mode_last(ARRAY_AGG(jsonPayload.fields.country)) AS country,
|
||||
`moz-fx-data-shared-prod`.udf.mode_last(ARRAY_AGG(jsonPayload.fields.`language`)) AS `language`,
|
||||
-- Event properties, arrays
|
||||
ARRAY_AGG(
|
||||
DISTINCT JSON_EXTRACT_SCALAR(jsonPayload.fields.event_properties, "$.service") IGNORE NULLS
|
||||
) AS services,
|
||||
ARRAY_AGG(
|
||||
DISTINCT JSON_EXTRACT_SCALAR(
|
||||
jsonPayload.fields.event_properties,
|
||||
"$.oauth_client_id"
|
||||
) IGNORE NULLS
|
||||
) AS oauth_client_ids,
|
||||
-- User properties, arrays
|
||||
ARRAY_AGG(
|
||||
DISTINCT JSON_EXTRACT_SCALAR(
|
||||
jsonPayload.fields.user_properties,
|
||||
"$['$append'].fxa_services_used"
|
||||
) IGNORE NULLS
|
||||
) AS fxa_services_used,
|
||||
ARRAY_AGG(DISTINCT jsonPayload.fields.os_name IGNORE NULLS) AS os_used_month,
|
||||
-- User properties, scalars
|
||||
MAX(
|
||||
CAST(JSON_EXTRACT_SCALAR(jsonPayload.fields.user_properties, "$.sync_device_count") AS INT64)
|
||||
) AS sync_device_count,
|
||||
MAX(
|
||||
CAST(
|
||||
JSON_EXTRACT_SCALAR(
|
||||
jsonPayload.fields.user_properties,
|
||||
"$.sync_active_devices_day"
|
||||
) AS INT64
|
||||
)
|
||||
) AS sync_active_devices_day,
|
||||
MAX(
|
||||
CAST(
|
||||
JSON_EXTRACT_SCALAR(
|
||||
jsonPayload.fields.user_properties,
|
||||
"$.sync_active_devices_week"
|
||||
) AS INT64
|
||||
)
|
||||
) AS sync_active_devices_week,
|
||||
MAX(
|
||||
CAST(
|
||||
JSON_EXTRACT_SCALAR(
|
||||
jsonPayload.fields.user_properties,
|
||||
"$.sync_active_devices_month"
|
||||
) AS INT64
|
||||
)
|
||||
) AS sync_active_devices_month,
|
||||
`moz-fx-data-shared-prod`.udf.mode_last(
|
||||
ARRAY_AGG(
|
||||
JSON_EXTRACT_SCALAR(jsonPayload.fields.user_properties, "$.ua_version") IGNORE NULLS
|
||||
)
|
||||
) AS ua_version,
|
||||
`moz-fx-data-shared-prod`.udf.mode_last(
|
||||
ARRAY_AGG(
|
||||
JSON_EXTRACT_SCALAR(jsonPayload.fields.user_properties, "$.ua_version") IGNORE NULLS
|
||||
)
|
||||
) AS ua_browser,
|
||||
MAX(CAST(jsonPayload.fields.app_version AS FLOAT64)) AS app_version,
|
||||
CAST(TRUE AS INT64) AS days_seen_bits,
|
||||
FROM
|
||||
base_events
|
||||
GROUP BY
|
||||
user_id
|
||||
),
|
||||
_previous AS (
|
||||
SELECT
|
||||
* EXCEPT (submission_timestamp)
|
||||
FROM
|
||||
firefox_accounts_derived.fxa_amplitude_export_v1
|
||||
WHERE
|
||||
DATE(submission_timestamp) = DATE_SUB(@submission_date, INTERVAL 1 DAY)
|
||||
AND udf.shift_28_bits_one_day(days_seen_bits) > 0
|
||||
)
|
||||
SELECT
|
||||
CAST(@submission_date AS TIMESTAMP) AS submission_timestamp,
|
||||
_current.* REPLACE (
|
||||
COALESCE(_current.user_id, _previous.user_id) AS user_id,
|
||||
udf.combine_adjacent_days_28_bits(
|
||||
_previous.days_seen_bits,
|
||||
_current.days_seen_bits
|
||||
) AS days_seen_bits,
|
||||
udf.combine_days_seen_maps(
|
||||
_previous.os_used_month,
|
||||
ARRAY(SELECT STRUCT(key, CAST(TRUE AS INT64) AS value) FROM _current.os_used_month AS key)
|
||||
) AS os_used_month
|
||||
)
|
||||
FROM
|
||||
grouped_by_user _current
|
||||
FULL OUTER JOIN
|
||||
_previous
|
||||
USING
|
||||
(user_id)
|
|
@ -0,0 +1,4 @@
|
|||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, you can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
"""Validation Tests."""
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,55 @@
|
|||
"""
|
||||
Validate HMAC-SHA256 implementation against the NIST test vectors.
|
||||
|
||||
The vectors are located in tests/validation/data/hmac_sha256_validation.json.
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
from bigquery_etl.parse_udf import read_udf_dirs, RawUdf, udf_tests_sql
|
||||
from google.cloud import bigquery
|
||||
|
||||
validation_data_file = "tests/validation/data/hmac_sha256_validation.json"
|
||||
|
||||
|
||||
def udfs():
|
||||
"""Get all udfs and assertions."""
|
||||
return read_udf_dirs("tests/assert", "udf", "udf_js")
|
||||
|
||||
|
||||
def load_data():
|
||||
"""Load test data."""
|
||||
with open(validation_data_file, "r") as f:
|
||||
return json.load(f)["data"]
|
||||
|
||||
|
||||
def generate_raw_udf(test_cases):
|
||||
"""Generate a SQL test for each instance in hmac_sha256_validation.json."""
|
||||
test_sql_fixture = (
|
||||
"SELECT assert_equals("
|
||||
"'{Mac}',"
|
||||
"TO_HEX(SUBSTR("
|
||||
"udf.hmac_sha256("
|
||||
"FROM_HEX('{Key}'),"
|
||||
"FROM_HEX('{Msg}')),"
|
||||
"1,"
|
||||
"{Tlen})));"
|
||||
)
|
||||
test_sql_stmnts = [test_sql_fixture.format(**test_case) for test_case in test_cases]
|
||||
|
||||
return RawUdf.from_text(
|
||||
"\n".join(test_sql_stmnts), "udf", "hmac_sha256", is_defined=False
|
||||
)
|
||||
|
||||
|
||||
def generate_sql():
|
||||
"""Generate SQL statements to test."""
|
||||
return udf_tests_sql(generate_raw_udf(load_data()), udfs())
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sql", generate_sql())
|
||||
def test_validate_hmac_sha256(sql):
|
||||
"""Validate hmac_sha256."""
|
||||
job_config = bigquery.QueryJobConfig(use_legacy_sql=False)
|
||||
job = bigquery.Client().query(sql, job_config=job_config)
|
||||
job.result()
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
Given a map of representing activity for STRING `key`s, this
|
||||
function returns an array of which `key`s were active for the
|
||||
time period in question.
|
||||
|
||||
start_offset should be at most 0.
|
||||
n_bits should be at most the remaining bits.
|
||||
*/
|
||||
CREATE OR REPLACE FUNCTION udf.active_values_from_days_seen_map(
|
||||
days_seen_bits_map ARRAY<STRUCT<key STRING, value INT64>>,
|
||||
start_offset INT64,
|
||||
n_bits INT64
|
||||
) AS (
|
||||
ARRAY(
|
||||
SELECT
|
||||
DISTINCT key
|
||||
FROM
|
||||
UNNEST(days_seen_bits_map)
|
||||
WHERE
|
||||
-- TODO: Use udf.bits28_active_in_range when it's available
|
||||
BIT_COUNT(value << (64 + start_offset - 1) >> (64 - n_bits)) > 0
|
||||
)
|
||||
);
|
||||
|
||||
-- Tests
|
||||
SELECT
|
||||
assert_array_equals(
|
||||
['a', 'b'],
|
||||
udf.active_values_from_days_seen_map(
|
||||
[STRUCT('a' AS key, 1 AS value), STRUCT('b' AS key, 3 AS value)],
|
||||
0,
|
||||
1
|
||||
)
|
||||
),
|
||||
assert_array_equals(
|
||||
['a'],
|
||||
udf.active_values_from_days_seen_map(
|
||||
[STRUCT('a' AS key, 2048 AS value), STRUCT('b' AS key, 3 AS value)],
|
||||
-14,
|
||||
7
|
||||
)
|
||||
),
|
||||
assert_array_equals(
|
||||
['b'],
|
||||
udf.active_values_from_days_seen_map(
|
||||
[STRUCT('a' AS key, 2048 AS value), STRUCT('b' AS key, 3 AS value)],
|
||||
-6,
|
||||
7
|
||||
)
|
||||
),
|
||||
assert_array_equals(
|
||||
['a', 'b'],
|
||||
udf.active_values_from_days_seen_map(
|
||||
[STRUCT('a' AS key, 1 AS value), STRUCT('b' AS key, 3 AS value)],
|
||||
-27,
|
||||
28
|
||||
)
|
||||
);
|
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
The "clients_last_seen" class of tables represent various types of client
|
||||
activity within a 28-day window as bit patterns.
|
||||
|
||||
This function takes in two arrays of structs (aka maps) where each entry gives the
|
||||
bit pattern for days in which we saw a ping for a given user in a given
|
||||
key. We combine the bit patterns for the previous day and the
|
||||
current day, returning a single map.
|
||||
|
||||
See `udf.combine_experiment_days` for a more specific example of this approach.
|
||||
*/
|
||||
CREATE OR REPLACE FUNCTION udf.combine_days_seen_maps(
|
||||
--
|
||||
prev ARRAY<STRUCT<key STRING, value INT64>>,
|
||||
--
|
||||
curr ARRAY<STRUCT<key STRING, value INT64>>
|
||||
) AS (
|
||||
-- The below is logically a FULL JOIN, but BigQuery returns error
|
||||
-- "Array scan is not allowed with FULL JOIN" so we have to do two
|
||||
-- separate scans.
|
||||
ARRAY_CONCAT(
|
||||
-- Keys present in prev (and potentially in curr too)
|
||||
ARRAY(
|
||||
SELECT AS STRUCT
|
||||
key,
|
||||
udf.combine_adjacent_days_28_bits(prev.value, curr.value) AS value
|
||||
FROM
|
||||
UNNEST(prev) AS prev
|
||||
LEFT JOIN
|
||||
UNNEST(curr) AS curr
|
||||
USING
|
||||
(key)
|
||||
WHERE
|
||||
udf.combine_adjacent_days_28_bits(prev.value, curr.value) > 0
|
||||
),
|
||||
-- Keys present in curr only
|
||||
ARRAY(
|
||||
SELECT AS STRUCT
|
||||
key,
|
||||
curr.value
|
||||
FROM
|
||||
UNNEST(curr) AS curr
|
||||
LEFT JOIN
|
||||
UNNEST(prev) AS prev
|
||||
USING
|
||||
(key)
|
||||
WHERE
|
||||
prev IS NULL
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
-- Tests
|
||||
SELECT
|
||||
assert_array_equals(
|
||||
[
|
||||
STRUCT("key1" AS key, 3 AS value),
|
||||
STRUCT("key2" AS key, 6 AS value),
|
||||
STRUCT("key3" AS key, 1 AS value)
|
||||
],
|
||||
udf.combine_days_seen_maps(
|
||||
[
|
||||
STRUCT("key1" AS key, 1 AS value),
|
||||
STRUCT("key2" AS key, 3 AS value),
|
||||
STRUCT("key3" AS key, 1 << 27 AS value)
|
||||
],
|
||||
[STRUCT("key1" AS key, 1 AS value), STRUCT("key3" AS key, 1 AS value)]
|
||||
)
|
||||
);
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
Given a key and message, return the HMAC-SHA256 hash.
|
||||
|
||||
This algorithm can be found in Wikipedia:
|
||||
https://en.wikipedia.org/wiki/HMAC#Implementation
|
||||
|
||||
This implentation is validated against the NIST test vectors.
|
||||
See test/validation/hmac_sha256.py for more information.
|
||||
*/
|
||||
CREATE OR REPLACE FUNCTION udf.hmac_sha256(key BYTES, message BYTES) AS (
|
||||
SHA256(
|
||||
CONCAT(
|
||||
RPAD(IF(BYTE_LENGTH(key) > 64, SHA256(key), key), 64, b'\x00') ^ REPEAT(b'\x5c', 64),
|
||||
SHA256(
|
||||
CONCAT(
|
||||
RPAD(IF(BYTE_LENGTH(key) > 64, SHA256(key), key), 64, b'\x00') ^ REPEAT(b'\x36', 64),
|
||||
message
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
Загрузка…
Ссылка в новой задаче