Factor profile days logic into 2 udfs
This commit is contained in:
Родитель
c6fed78815
Коммит
9c358c8baa
|
@ -10,8 +10,23 @@ CREATE TEMP FUNCTION
|
|||
0));
|
||||
--
|
||||
CREATE TEMP FUNCTION
|
||||
udf_combine_days(prev INT64,
|
||||
udf_combine_adjacent_days_bits(prev INT64,
|
||||
curr INT64) AS (udf_shift_one_day(prev) + IFNULL(curr,
|
||||
0));
|
||||
--
|
||||
CREATE TEMP FUNCTION
|
||||
udf_coalesce_adjacent_days_bits(prev INT64,
|
||||
curr INT64) AS ( COALESCE( NULLIF(udf_shift_one_day(prev),
|
||||
0),
|
||||
curr,
|
||||
0));
|
||||
--
|
||||
CREATE TEMP FUNCTION
|
||||
udf_bits_from_days_since_created_profile(days_since_created_profile INT64) AS (
|
||||
IF
|
||||
(days_since_created_profile BETWEEN 0
|
||||
AND 6,
|
||||
1 << days_since_created_profile,
|
||||
0));
|
||||
--
|
||||
CREATE TEMP FUNCTION
|
||||
|
@ -33,7 +48,7 @@ CREATE TEMP FUNCTION
|
|||
SELECT
|
||||
AS STRUCT experiment,
|
||||
branch,
|
||||
udf_combine_days(prev.bits,
|
||||
udf_combine_adjacent_days_bits(prev.bits,
|
||||
curr.bits) AS bits
|
||||
FROM
|
||||
UNNEST(prev) AS prev
|
||||
|
@ -43,7 +58,7 @@ CREATE TEMP FUNCTION
|
|||
(experiment,
|
||||
branch)
|
||||
WHERE
|
||||
udf_combine_days(prev.bits,
|
||||
udf_combine_adjacent_days_bits(prev.bits,
|
||||
curr.bits) > 0),
|
||||
-- Experiments present in curr only
|
||||
ARRAY(
|
||||
|
@ -75,9 +90,9 @@ WITH
|
|||
CAST(devtools_toolbox_opened_count_sum > 0 AS INT64) AS days_opened_dev_tools_bits,
|
||||
-- We only trust profile_date if it is within one week of the ping submission,
|
||||
-- so we ignore any value more than seven days old.
|
||||
IFNULL(1 << DATE_DIFF(submission_date_s3, SAFE.PARSE_DATE("%F",
|
||||
SUBSTR(profile_creation_date, 0, 10)), DAY),
|
||||
0) & udf_bitmask_lowest_7() AS days_created_profile_bits,
|
||||
udf_bits_from_days_since_created_profile(
|
||||
DATE_DIFF(submission_date_s3, SAFE.PARSE_DATE("%F",
|
||||
SUBSTR(profile_creation_date, 0, 10)), DAY)) AS days_created_profile_bits,
|
||||
-- Experiments are an array, so we keep track of a usage bit pattern per experiment.
|
||||
ARRAY(
|
||||
SELECT
|
||||
|
@ -109,18 +124,13 @@ IF
|
|||
(_current.client_id IS NOT NULL,
|
||||
_current,
|
||||
_previous).* REPLACE (
|
||||
udf_combine_days(_previous.days_seen_bits,
|
||||
udf_combine_adjacent_days_bits(_previous.days_seen_bits,
|
||||
_current.days_seen_bits) AS days_seen_bits,
|
||||
udf_combine_days(_previous.days_visited_5_uri_bits,
|
||||
udf_combine_adjacent_days_bits(_previous.days_visited_5_uri_bits,
|
||||
_current.days_visited_5_uri_bits) AS days_visited_5_uri_bits,
|
||||
udf_combine_days(_previous.days_opened_dev_tools_bits,
|
||||
udf_combine_adjacent_days_bits(_previous.days_opened_dev_tools_bits,
|
||||
_current.days_opened_dev_tools_bits) AS days_opened_dev_tools_bits,
|
||||
-- We want to base new profile creation date on the first profile_creation_date
|
||||
-- value we observe, so we propagate an existing non-null value in preference
|
||||
-- to a non-null value on today's observation.
|
||||
COALESCE( --
|
||||
NULLIF(udf_shift_one_day(_previous.days_created_profile_bits),
|
||||
0),
|
||||
udf_coalesce_adjacent_days_bits(_previous.days_created_profile_bits,
|
||||
_current.days_created_profile_bits) AS days_created_profile_bits,
|
||||
udf_combine_experiment_days(_previous.days_seen_in_experiment,
|
||||
_current.days_seen_in_experiment) AS days_seen_in_experiment)
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
CREATE TEMP FUNCTION
|
||||
udf_bits_from_days_since_created_profile(days_since_created_profile INT64) AS (
|
||||
IF
|
||||
(days_since_created_profile BETWEEN 0
|
||||
AND 6,
|
||||
1 << days_since_created_profile,
|
||||
0));
|
||||
|
||||
/*
|
||||
|
||||
Takes in a difference between submission date and profile creation date
|
||||
and returns a bit pattern representing the profile creation date IFF
|
||||
the profile date is the same as the submission date or no more than
|
||||
6 days earlier.
|
||||
|
||||
Analysis has shown that client-reported profile creation dates are much
|
||||
less reliable outside of this range and cannot be used as reliable indicators
|
||||
of new profile creation.
|
||||
|
||||
Example:
|
||||
|
||||
SELECT
|
||||
udf_bits_from_days_since_created_profile(0),
|
||||
udf_bits_from_days_since_created_profile(1),
|
||||
udf_bits_from_days_since_created_profile(6)
|
||||
udf_bits_from_days_since_created_profile(-1),
|
||||
udf_bits_from_days_since_created_profile(NULL),
|
||||
udf_bits_from_days_since_created_profile(7);
|
||||
1, 2, 64, 0, 0, 0
|
||||
|
||||
*/
|
|
@ -0,0 +1,40 @@
|
|||
CREATE TEMP FUNCTION
|
||||
udf_bitmask_lowest_28() AS (0x0FFFFFFF);
|
||||
--
|
||||
CREATE TEMP FUNCTION
|
||||
udf_shift_one_day(x INT64) AS (IFNULL((x << 1) & udf_bitmask_lowest_28(),
|
||||
0));
|
||||
--
|
||||
CREATE TEMP FUNCTION
|
||||
udf_coalesce_adjacent_days_bits(prev INT64,
|
||||
curr INT64) AS ( COALESCE( NULLIF(udf_shift_one_day(prev),
|
||||
0),
|
||||
curr,
|
||||
0));
|
||||
|
||||
/*
|
||||
|
||||
We generally want to believe only the first reasonable profile creation
|
||||
date that we receive from a client.
|
||||
Given bits representing usage from the previous day and the current day,
|
||||
this function shifts the first argument by one day and returns either that
|
||||
value if non-zero and non-null, the current day value if non-zero and non-null,
|
||||
or else 0.
|
||||
|
||||
Example:
|
||||
|
||||
SELECT
|
||||
udf_coalesce_adjacent_days_bits(1,
|
||||
64),
|
||||
udf_coalesce_adjacent_days_bits(64,
|
||||
1),
|
||||
udf_coalesce_adjacent_days_bits(0,
|
||||
64),
|
||||
udf_coalesce_adjacent_days_bits(NULL,
|
||||
64),
|
||||
udf_coalesce_adjacent_days_bits(NULL,
|
||||
NULL);
|
||||
2, 128, 64, 64, 0
|
||||
|
||||
*/
|
||||
|
Загрузка…
Ссылка в новой задаче