Factor profile days logic into 2 udfs

This commit is contained in:
Jeff Klukas 2019-05-20 12:26:08 -04:00
Родитель c6fed78815
Коммит 9c358c8baa
3 изменённых файлов: 96 добавлений и 15 удалений

Просмотреть файл

@ -10,8 +10,23 @@ CREATE TEMP FUNCTION
0));
--
CREATE TEMP FUNCTION
udf_combine_days(prev INT64,
udf_combine_adjacent_days_bits(prev INT64,
curr INT64) AS (udf_shift_one_day(prev) + IFNULL(curr,
0));
--
CREATE TEMP FUNCTION
udf_coalesce_adjacent_days_bits(prev INT64,
curr INT64) AS ( COALESCE( NULLIF(udf_shift_one_day(prev),
0),
curr,
0));
--
CREATE TEMP FUNCTION
udf_bits_from_days_since_created_profile(days_since_created_profile INT64) AS (
IF
(days_since_created_profile BETWEEN 0
AND 6,
1 << days_since_created_profile,
0));
--
CREATE TEMP FUNCTION
@ -33,7 +48,7 @@ CREATE TEMP FUNCTION
SELECT
AS STRUCT experiment,
branch,
udf_combine_days(prev.bits,
udf_combine_adjacent_days_bits(prev.bits,
curr.bits) AS bits
FROM
UNNEST(prev) AS prev
@ -43,7 +58,7 @@ CREATE TEMP FUNCTION
(experiment,
branch)
WHERE
udf_combine_days(prev.bits,
udf_combine_adjacent_days_bits(prev.bits,
curr.bits) > 0),
-- Experiments present in curr only
ARRAY(
@ -75,9 +90,9 @@ WITH
CAST(devtools_toolbox_opened_count_sum > 0 AS INT64) AS days_opened_dev_tools_bits,
-- We only trust profile_date if it is within one week of the ping submission,
-- so we ignore any value more than seven days old.
IFNULL(1 << DATE_DIFF(submission_date_s3, SAFE.PARSE_DATE("%F",
SUBSTR(profile_creation_date, 0, 10)), DAY),
0) & udf_bitmask_lowest_7() AS days_created_profile_bits,
udf_bits_from_days_since_created_profile(
DATE_DIFF(submission_date_s3, SAFE.PARSE_DATE("%F",
SUBSTR(profile_creation_date, 0, 10)), DAY)) AS days_created_profile_bits,
-- Experiments are an array, so we keep track of a usage bit pattern per experiment.
ARRAY(
SELECT
@ -109,18 +124,13 @@ IF
(_current.client_id IS NOT NULL,
_current,
_previous).* REPLACE (
udf_combine_days(_previous.days_seen_bits,
udf_combine_adjacent_days_bits(_previous.days_seen_bits,
_current.days_seen_bits) AS days_seen_bits,
udf_combine_days(_previous.days_visited_5_uri_bits,
udf_combine_adjacent_days_bits(_previous.days_visited_5_uri_bits,
_current.days_visited_5_uri_bits) AS days_visited_5_uri_bits,
udf_combine_days(_previous.days_opened_dev_tools_bits,
udf_combine_adjacent_days_bits(_previous.days_opened_dev_tools_bits,
_current.days_opened_dev_tools_bits) AS days_opened_dev_tools_bits,
-- We want to base new profile creation date on the first profile_creation_date
-- value we observe, so we propagate an existing non-null value in preference
-- to a non-null value on today's observation.
COALESCE( --
NULLIF(udf_shift_one_day(_previous.days_created_profile_bits),
0),
udf_coalesce_adjacent_days_bits(_previous.days_created_profile_bits,
_current.days_created_profile_bits) AS days_created_profile_bits,
udf_combine_experiment_days(_previous.days_seen_in_experiment,
_current.days_seen_in_experiment) AS days_seen_in_experiment)

Просмотреть файл

@ -0,0 +1,31 @@
CREATE TEMP FUNCTION
udf_bits_from_days_since_created_profile(days_since_created_profile INT64) AS (
IF
(days_since_created_profile BETWEEN 0
AND 6,
1 << days_since_created_profile,
0));
/*
Takes in a difference between submission date and profile creation date
and returns a bit pattern representing the profile creation date IFF
the profile date is the same as the submission date or no more than
6 days earlier.
Analysis has shown that client-reported profile creation dates are much
less reliable outside of this range and cannot be used as reliable indicators
of new profile creation.
Example:
SELECT
udf_bits_from_days_since_created_profile(0),
udf_bits_from_days_since_created_profile(1),
udf_bits_from_days_since_created_profile(6)
udf_bits_from_days_since_created_profile(-1),
udf_bits_from_days_since_created_profile(NULL),
udf_bits_from_days_since_created_profile(7);
1, 2, 64, 0, 0, 0
*/

Просмотреть файл

@ -0,0 +1,40 @@
CREATE TEMP FUNCTION
udf_bitmask_lowest_28() AS (0x0FFFFFFF);
--
CREATE TEMP FUNCTION
udf_shift_one_day(x INT64) AS (IFNULL((x << 1) & udf_bitmask_lowest_28(),
0));
--
CREATE TEMP FUNCTION
udf_coalesce_adjacent_days_bits(prev INT64,
curr INT64) AS ( COALESCE( NULLIF(udf_shift_one_day(prev),
0),
curr,
0));
/*
We generally want to believe only the first reasonable profile creation
date that we receive from a client.
Given bits representing usage from the previous day and the current day,
this function shifts the first argument by one day and returns either that
value if non-zero and non-null, the current day value if non-zero and non-null,
or else 0.
Example:
SELECT
udf_coalesce_adjacent_days_bits(1,
64),
udf_coalesce_adjacent_days_bits(64,
1),
udf_coalesce_adjacent_days_bits(0,
64),
udf_coalesce_adjacent_days_bits(NULL,
64),
udf_coalesce_adjacent_days_bits(NULL,
NULL);
2, 128, 64, 64, 0
*/