Use days_since_* and new table name (#41)
This commit is contained in:
Родитель
904f25f242
Коммит
51378e8049
11
README.md
11
README.md
|
@ -9,7 +9,8 @@ Recommended practices
|
|||
===
|
||||
|
||||
- Queries
|
||||
- Should be defined in files named as `sql/table_version.sql` e.g. `sql/clients_daily_v6.sql`
|
||||
- Should be defined in files named as `sql/table_version.sql` e.g.
|
||||
`sql/clients_daily_v6.sql`
|
||||
- Should not specify a project or dataset in table names to simplify testing
|
||||
- Should be [incremental](#incremental-queries)
|
||||
- Should filter input tables on partition and clustering columns
|
||||
|
@ -48,10 +49,10 @@ Incremental queries have these properties:
|
|||
- Should be impacted by values from a finite number of preceding partitions
|
||||
- This allows for backfilling in chunks instead of serially for all time
|
||||
and limiting backfills to a certain number of days following updated data
|
||||
- For example `sql/nondesktop_clients_last_seen_v1.sql` can be run serially
|
||||
on any 28 day period and the last day will be the same whether or not the
|
||||
partition preceding the first day was missing because values are only
|
||||
impacted by 27 preceding days
|
||||
- For example `sql/clients_last_seen_v1.sql` can be run serially on any 28 day
|
||||
period and the last day will be the same whether or not the partition
|
||||
preceding the first day was missing because values are only impacted by
|
||||
27 preceding days
|
||||
|
||||
Tests
|
||||
=====
|
||||
|
|
|
@ -1,30 +1,43 @@
|
|||
WITH current_sample AS (
|
||||
WITH
|
||||
_current AS (
|
||||
SELECT
|
||||
submission_date_s3 AS last_seen_date,
|
||||
* EXCEPT (submission_date_s3)
|
||||
* EXCEPT (submission_date_s3),
|
||||
0 AS days_since_seen,
|
||||
-- For measuring Active MAU, where this is the day since this
|
||||
-- client_id was an Active User as defined by
|
||||
-- https://docs.telemetry.mozilla.org/cookbooks/active_dau.html
|
||||
IF(scalar_parent_browser_engagement_total_uri_count_sum >= 5,
|
||||
0,
|
||||
NULL) AS days_since_visited_5_uri
|
||||
FROM
|
||||
clients_daily_v6
|
||||
WHERE
|
||||
submission_date_s3 = @submission_date
|
||||
), previous AS (
|
||||
submission_date_s3 = @submission_date ),
|
||||
_previous AS (
|
||||
SELECT
|
||||
* EXCEPT (submission_date,
|
||||
generated_time)
|
||||
* EXCEPT (submission_date) REPLACE(
|
||||
-- omit values outside 28 day window
|
||||
IF(days_since_visited_5_uri < 27,
|
||||
days_since_visited_5_uri,
|
||||
NULL) AS days_since_visited_5_uri)
|
||||
FROM
|
||||
analysis.clients_last_seen_v1
|
||||
clients_last_seen_v1
|
||||
WHERE
|
||||
submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY)
|
||||
AND last_seen_date > DATE_SUB(@submission_date, INTERVAL 28 DAY)
|
||||
)
|
||||
AND clients_last_seen_v1.days_since_seen < 27 )
|
||||
SELECT
|
||||
@submission_date AS submission_date,
|
||||
CURRENT_DATETIME() AS generated_time,
|
||||
IF(current_sample.client_id IS NOT NULL,
|
||||
current_sample,
|
||||
previous).*
|
||||
IF(_current.client_id IS NOT NULL,
|
||||
_current,
|
||||
_previous).* EXCEPT (days_since_seen,
|
||||
days_since_visited_5_uri),
|
||||
COALESCE(_current.days_since_seen,
|
||||
_previous.days_since_seen + 1) AS days_since_seen,
|
||||
COALESCE(_current.days_since_visited_5_uri,
|
||||
_previous.days_since_visited_5_uri + 1) AS days_since_visited_5_uri
|
||||
FROM
|
||||
current_sample
|
||||
_current
|
||||
FULL JOIN
|
||||
previous
|
||||
_previous
|
||||
USING
|
||||
(client_id)
|
||||
|
|
|
@ -1,18 +1,15 @@
|
|||
WITH
|
||||
inactive_days AS (
|
||||
SELECT
|
||||
*,
|
||||
DATE_DIFF(submission_date, last_seen_date, DAY) AS _inactive_days
|
||||
FROM
|
||||
clients_last_seen_v1
|
||||
)
|
||||
|
||||
SELECT
|
||||
submission_date,
|
||||
CURRENT_DATETIME() AS generated_time,
|
||||
COUNTIF(_inactive_days < 28) AS mau,
|
||||
COUNTIF(_inactive_days < 7) AS wau,
|
||||
COUNTIF(_inactive_days < 1) AS dau,
|
||||
COUNTIF(days_since_seen < 28) AS mau,
|
||||
COUNTIF(days_since_seen < 7) AS wau,
|
||||
COUNTIF(days_since_seen < 1) AS dau,
|
||||
-- Active MAU counts all Active Users on any day in the last 28 days not just
|
||||
-- the most recent day making COUNTIF(_days_since_seen < 28 AND visited_5_uri)
|
||||
-- incorrect. Instead we track days_since_visited_5_uri and use that.
|
||||
-- https://docs.telemetry.mozilla.org/cookbooks/active_dau.html
|
||||
COUNTIF(days_since_visited_5_uri < 28) AS visited_5_uri_mau,
|
||||
COUNTIF(days_since_visited_5_uri < 7) AS visited_5_uri_wau,
|
||||
COUNTIF(days_since_visited_5_uri < 1) AS visited_5_uri_dau,
|
||||
-- We hash client_ids into 20 buckets to aid in computing
|
||||
-- confidence intervals for mau/wau/dau sums; the particular hash
|
||||
-- function and number of buckets is subject to change in the future.
|
||||
|
@ -25,7 +22,7 @@ SELECT
|
|||
country,
|
||||
distribution_id
|
||||
FROM
|
||||
inactive_days
|
||||
clients_last_seen_v1
|
||||
WHERE
|
||||
client_id IS NOT NULL
|
||||
AND submission_date = @submission_date
|
||||
|
|
|
@ -1,12 +1,14 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
firefox_desktop_exact_mau28_v1 AS
|
||||
SELECT
|
||||
submission_date,
|
||||
CURRENT_DATETIME() AS generated_time,
|
||||
SUM(mau) AS mau,
|
||||
SUM(wau) AS wau,
|
||||
SUM(dau) AS dau
|
||||
SUM(dau) AS dau,
|
||||
SUM(visited_5_uri_mau) AS visited_5_uri_mau,
|
||||
SUM(visited_5_uri_wau) AS visited_5_uri_wau,
|
||||
SUM(visited_5_uri_dau) AS visited_5_uri_dau
|
||||
FROM
|
||||
firefox_desktop_exact_mau28_by_dimensions_v1
|
||||
WHERE
|
||||
submission_date = @submission_date
|
||||
`moz-fx-data-derived-datasets.telemetry.firefox_desktop_exact_mau28_by_dimensions_v1`
|
||||
GROUP BY
|
||||
submission_date
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
id,v,day,last_seen
|
||||
a,0,2019-01-01,2019-01-01
|
||||
c,0,2019-01-01,2019-01-01
|
||||
e,0,2019-01-01,2019-01-01
|
||||
g,0,2019-01-01,2019-01-01
|
|
|
@ -0,0 +1,9 @@
|
|||
id,v,day
|
||||
a,2,2019-01-03
|
||||
b,2,2019-01-03
|
||||
c,2,2019-01-03
|
||||
d,2,2019-01-03
|
||||
c,1,2019-01-02
|
||||
d,1,2019-01-02
|
||||
e,1,2019-01-02
|
||||
f,1,2019-01-02
|
|
|
@ -0,0 +1,14 @@
|
|||
id,v,day,last_seen
|
||||
a,0,2019-01-02,2019-01-01
|
||||
a,2,2019-01-03,2019-01-03
|
||||
b,2,2019-01-03,2019-01-03
|
||||
c,1,2019-01-02,2019-01-02
|
||||
c,2,2019-01-03,2019-01-03
|
||||
d,1,2019-01-02,2019-01-02
|
||||
d,2,2019-01-03,2019-01-03
|
||||
e,1,2019-01-02,2019-01-02
|
||||
e,1,2019-01-02,2019-01-02
|
||||
f,1,2019-01-02,2019-01-02
|
||||
f,1,2019-01-02,2019-01-02
|
||||
g,0,2019-01-02,2019-01-01
|
||||
g,0,2019-01-02,2019-01-01
|
|
|
@ -0,0 +1,12 @@
|
|||
loads:
|
||||
- source: ''
|
||||
destination: clients_daily_v6
|
||||
job_config:
|
||||
schema:
|
||||
fields:
|
||||
query:
|
||||
queryParameters:
|
||||
- name: submission_date
|
||||
parameterType: {type: DATE}
|
||||
parameterValue: {value: 2019-1-2}
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
{"submission_date":"2019-01-01","generated_time":"2019-01-02T01:00:00","last_seen_date":"2019-01-01","active_hours_sum":0.0,"attribution":{"source":"prev"},"client_id":"a"}
|
||||
{"submission_date":"2019-01-01","generated_time":"2019-01-02T01:00:00","last_seen_date":"2019-01-01","active_hours_sum":0.0,"attribution":{"source":"prev"},"client_id":"b"}
|
|
@ -5,6 +5,11 @@
|
|||
"mode": "REQUIRED",
|
||||
"description": "time_partitioning_field"
|
||||
},
|
||||
{
|
||||
"name": "client_id",
|
||||
"type": "STRING",
|
||||
"mode": "REQUIRED"
|
||||
},
|
||||
{
|
||||
"name": "active_hours_sum",
|
||||
"type": "FLOAT",
|
||||
|
@ -23,8 +28,8 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"name": "client_id",
|
||||
"type": "STRING",
|
||||
"mode": "REQUIRED"
|
||||
"name": "scalar_parent_browser_engagement_total_uri_count_sum",
|
||||
"type": "INT64",
|
||||
"mode": "NULLABLE"
|
||||
}
|
||||
]
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
{"submission_date":"2019-01-01","active_hours_sum":0.0,"attribution":{"source":"prev"},"client_id":"a","days_since_seen":0}
|
||||
{"submission_date":"2019-01-01","active_hours_sum":0.0,"attribution":{"source":"prev"},"client_id":"b","days_since_seen":0}
|
|
@ -5,13 +5,8 @@
|
|||
"mode": "REQUIRED"
|
||||
},
|
||||
{
|
||||
"name": "generated_time",
|
||||
"type": "DATETIME",
|
||||
"mode": "REQUIRED"
|
||||
},
|
||||
{
|
||||
"name": "last_seen_date",
|
||||
"type": "DATE",
|
||||
"name": "client_id",
|
||||
"type": "STRING",
|
||||
"mode": "REQUIRED"
|
||||
},
|
||||
{
|
||||
|
@ -32,8 +27,18 @@
|
|||
]
|
||||
},
|
||||
{
|
||||
"name": "client_id",
|
||||
"type": "STRING",
|
||||
"name": "scalar_parent_browser_engagement_total_uri_count_sum",
|
||||
"type": "INT64",
|
||||
"mode": "NULLABLE"
|
||||
},
|
||||
{
|
||||
"name": "days_since_seen",
|
||||
"type": "INT64",
|
||||
"mode": "REQUIRED"
|
||||
},
|
||||
{
|
||||
"name": "days_since_visited_5_uri",
|
||||
"type": "INT64",
|
||||
"mode": "NULLABLE"
|
||||
}
|
||||
]
|
|
@ -1,3 +1,3 @@
|
|||
{"submission_date":"2019-01-02","active_hours_sum":0.0,"attribution":{"source":"prev"},"client_id":"a","last_seen_date":"2019-01-01"}
|
||||
{"submission_date":"2019-01-02","active_hours_sum":1.0,"attribution":{"source":"test"},"client_id":"b","last_seen_date":"2019-01-02"}
|
||||
{"submission_date":"2019-01-02","active_hours_sum":1.0,"attribution":{"source":"test"},"client_id":"c","last_seen_date":"2019-01-02"}
|
||||
{"submission_date":"2019-01-02","client_id":"a","active_hours_sum":0.0,"attribution":{"source":"prev"},"scalar_parent_browser_engagement_total_uri_count_sum":null,"days_since_seen":1,"days_since_visited_5_uri":null}
|
||||
{"submission_date":"2019-01-02","client_id":"b","active_hours_sum":1.0,"attribution":{"source":"test"},"scalar_parent_browser_engagement_total_uri_count_sum":null,"days_since_seen":0,"days_since_visited_5_uri":null}
|
||||
{"submission_date":"2019-01-02","client_id":"c","active_hours_sum":1.0,"attribution":{"source":"test"},"scalar_parent_browser_engagement_total_uri_count_sum":null,"days_since_seen":0,"days_since_visited_5_uri":null}
|
||||
|
|
Загрузка…
Ссылка в новой задаче