Use days_since_* and new table name (#41)
This commit is contained in:
Родитель
904f25f242
Коммит
51378e8049
11
README.md
11
README.md
|
@ -9,7 +9,8 @@ Recommended practices
|
||||||
===
|
===
|
||||||
|
|
||||||
- Queries
|
- Queries
|
||||||
- Should be defined in files named as `sql/table_version.sql` e.g. `sql/clients_daily_v6.sql`
|
- Should be defined in files named as `sql/table_version.sql` e.g.
|
||||||
|
`sql/clients_daily_v6.sql`
|
||||||
- Should not specify a project or dataset in table names to simplify testing
|
- Should not specify a project or dataset in table names to simplify testing
|
||||||
- Should be [incremental](#incremental-queries)
|
- Should be [incremental](#incremental-queries)
|
||||||
- Should filter input tables on partition and clustering columns
|
- Should filter input tables on partition and clustering columns
|
||||||
|
@ -48,10 +49,10 @@ Incremental queries have these properties:
|
||||||
- Should be impacted by values from a finite number of preceding partitions
|
- Should be impacted by values from a finite number of preceding partitions
|
||||||
- This allows for backfilling in chunks instead of serially for all time
|
- This allows for backfilling in chunks instead of serially for all time
|
||||||
and limiting backfills to a certain number of days following updated data
|
and limiting backfills to a certain number of days following updated data
|
||||||
- For example `sql/nondesktop_clients_last_seen_v1.sql` can be run serially
|
- For example `sql/clients_last_seen_v1.sql` can be run serially on any 28 day
|
||||||
on any 28 day period and the last day will be the same whether or not the
|
period and the last day will be the same whether or not the partition
|
||||||
partition preceding the first day was missing because values are only
|
preceding the first day was missing because values are only impacted by
|
||||||
impacted by 27 preceding days
|
27 preceding days
|
||||||
|
|
||||||
Tests
|
Tests
|
||||||
=====
|
=====
|
||||||
|
|
|
@ -1,30 +1,43 @@
|
||||||
WITH current_sample AS (
|
WITH
|
||||||
|
_current AS (
|
||||||
SELECT
|
SELECT
|
||||||
submission_date_s3 AS last_seen_date,
|
* EXCEPT (submission_date_s3),
|
||||||
* EXCEPT (submission_date_s3)
|
0 AS days_since_seen,
|
||||||
|
-- For measuring Active MAU, where this is the day since this
|
||||||
|
-- client_id was an Active User as defined by
|
||||||
|
-- https://docs.telemetry.mozilla.org/cookbooks/active_dau.html
|
||||||
|
IF(scalar_parent_browser_engagement_total_uri_count_sum >= 5,
|
||||||
|
0,
|
||||||
|
NULL) AS days_since_visited_5_uri
|
||||||
FROM
|
FROM
|
||||||
clients_daily_v6
|
clients_daily_v6
|
||||||
WHERE
|
WHERE
|
||||||
submission_date_s3 = @submission_date
|
submission_date_s3 = @submission_date ),
|
||||||
), previous AS (
|
_previous AS (
|
||||||
SELECT
|
SELECT
|
||||||
* EXCEPT (submission_date,
|
* EXCEPT (submission_date) REPLACE(
|
||||||
generated_time)
|
-- omit values outside 28 day window
|
||||||
|
IF(days_since_visited_5_uri < 27,
|
||||||
|
days_since_visited_5_uri,
|
||||||
|
NULL) AS days_since_visited_5_uri)
|
||||||
FROM
|
FROM
|
||||||
analysis.clients_last_seen_v1
|
clients_last_seen_v1
|
||||||
WHERE
|
WHERE
|
||||||
submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY)
|
submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY)
|
||||||
AND last_seen_date > DATE_SUB(@submission_date, INTERVAL 28 DAY)
|
AND clients_last_seen_v1.days_since_seen < 27 )
|
||||||
)
|
|
||||||
SELECT
|
SELECT
|
||||||
@submission_date AS submission_date,
|
@submission_date AS submission_date,
|
||||||
CURRENT_DATETIME() AS generated_time,
|
IF(_current.client_id IS NOT NULL,
|
||||||
IF(current_sample.client_id IS NOT NULL,
|
_current,
|
||||||
current_sample,
|
_previous).* EXCEPT (days_since_seen,
|
||||||
previous).*
|
days_since_visited_5_uri),
|
||||||
|
COALESCE(_current.days_since_seen,
|
||||||
|
_previous.days_since_seen + 1) AS days_since_seen,
|
||||||
|
COALESCE(_current.days_since_visited_5_uri,
|
||||||
|
_previous.days_since_visited_5_uri + 1) AS days_since_visited_5_uri
|
||||||
FROM
|
FROM
|
||||||
current_sample
|
_current
|
||||||
FULL JOIN
|
FULL JOIN
|
||||||
previous
|
_previous
|
||||||
USING
|
USING
|
||||||
(client_id)
|
(client_id)
|
||||||
|
|
|
@ -1,18 +1,15 @@
|
||||||
WITH
|
|
||||||
inactive_days AS (
|
|
||||||
SELECT
|
|
||||||
*,
|
|
||||||
DATE_DIFF(submission_date, last_seen_date, DAY) AS _inactive_days
|
|
||||||
FROM
|
|
||||||
clients_last_seen_v1
|
|
||||||
)
|
|
||||||
|
|
||||||
SELECT
|
SELECT
|
||||||
submission_date,
|
submission_date,
|
||||||
CURRENT_DATETIME() AS generated_time,
|
COUNTIF(days_since_seen < 28) AS mau,
|
||||||
COUNTIF(_inactive_days < 28) AS mau,
|
COUNTIF(days_since_seen < 7) AS wau,
|
||||||
COUNTIF(_inactive_days < 7) AS wau,
|
COUNTIF(days_since_seen < 1) AS dau,
|
||||||
COUNTIF(_inactive_days < 1) AS dau,
|
-- Active MAU counts all Active Users on any day in the last 28 days not just
|
||||||
|
-- the most recent day making COUNTIF(_days_since_seen < 28 AND visited_5_uri)
|
||||||
|
-- incorrect. Instead we track days_since_visited_5_uri and use that.
|
||||||
|
-- https://docs.telemetry.mozilla.org/cookbooks/active_dau.html
|
||||||
|
COUNTIF(days_since_visited_5_uri < 28) AS visited_5_uri_mau,
|
||||||
|
COUNTIF(days_since_visited_5_uri < 7) AS visited_5_uri_wau,
|
||||||
|
COUNTIF(days_since_visited_5_uri < 1) AS visited_5_uri_dau,
|
||||||
-- We hash client_ids into 20 buckets to aid in computing
|
-- We hash client_ids into 20 buckets to aid in computing
|
||||||
-- confidence intervals for mau/wau/dau sums; the particular hash
|
-- confidence intervals for mau/wau/dau sums; the particular hash
|
||||||
-- function and number of buckets is subject to change in the future.
|
-- function and number of buckets is subject to change in the future.
|
||||||
|
@ -25,7 +22,7 @@ SELECT
|
||||||
country,
|
country,
|
||||||
distribution_id
|
distribution_id
|
||||||
FROM
|
FROM
|
||||||
inactive_days
|
clients_last_seen_v1
|
||||||
WHERE
|
WHERE
|
||||||
client_id IS NOT NULL
|
client_id IS NOT NULL
|
||||||
AND submission_date = @submission_date
|
AND submission_date = @submission_date
|
||||||
|
|
|
@ -1,12 +1,14 @@
|
||||||
|
CREATE OR REPLACE VIEW
|
||||||
|
firefox_desktop_exact_mau28_v1 AS
|
||||||
SELECT
|
SELECT
|
||||||
submission_date,
|
submission_date,
|
||||||
CURRENT_DATETIME() AS generated_time,
|
|
||||||
SUM(mau) AS mau,
|
SUM(mau) AS mau,
|
||||||
SUM(wau) AS wau,
|
SUM(wau) AS wau,
|
||||||
SUM(dau) AS dau
|
SUM(dau) AS dau,
|
||||||
|
SUM(visited_5_uri_mau) AS visited_5_uri_mau,
|
||||||
|
SUM(visited_5_uri_wau) AS visited_5_uri_wau,
|
||||||
|
SUM(visited_5_uri_dau) AS visited_5_uri_dau
|
||||||
FROM
|
FROM
|
||||||
firefox_desktop_exact_mau28_by_dimensions_v1
|
`moz-fx-data-derived-datasets.telemetry.firefox_desktop_exact_mau28_by_dimensions_v1`
|
||||||
WHERE
|
|
||||||
submission_date = @submission_date
|
|
||||||
GROUP BY
|
GROUP BY
|
||||||
submission_date
|
submission_date
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
id,v,day,last_seen
|
||||||
|
a,0,2019-01-01,2019-01-01
|
||||||
|
c,0,2019-01-01,2019-01-01
|
||||||
|
e,0,2019-01-01,2019-01-01
|
||||||
|
g,0,2019-01-01,2019-01-01
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
id,v,day
|
||||||
|
a,2,2019-01-03
|
||||||
|
b,2,2019-01-03
|
||||||
|
c,2,2019-01-03
|
||||||
|
d,2,2019-01-03
|
||||||
|
c,1,2019-01-02
|
||||||
|
d,1,2019-01-02
|
||||||
|
e,1,2019-01-02
|
||||||
|
f,1,2019-01-02
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
id,v,day,last_seen
|
||||||
|
a,0,2019-01-02,2019-01-01
|
||||||
|
a,2,2019-01-03,2019-01-03
|
||||||
|
b,2,2019-01-03,2019-01-03
|
||||||
|
c,1,2019-01-02,2019-01-02
|
||||||
|
c,2,2019-01-03,2019-01-03
|
||||||
|
d,1,2019-01-02,2019-01-02
|
||||||
|
d,2,2019-01-03,2019-01-03
|
||||||
|
e,1,2019-01-02,2019-01-02
|
||||||
|
e,1,2019-01-02,2019-01-02
|
||||||
|
f,1,2019-01-02,2019-01-02
|
||||||
|
f,1,2019-01-02,2019-01-02
|
||||||
|
g,0,2019-01-02,2019-01-01
|
||||||
|
g,0,2019-01-02,2019-01-01
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
loads:
|
||||||
|
- source: ''
|
||||||
|
destination: clients_daily_v6
|
||||||
|
job_config:
|
||||||
|
schema:
|
||||||
|
fields:
|
||||||
|
query:
|
||||||
|
queryParameters:
|
||||||
|
- name: submission_date
|
||||||
|
parameterType: {type: DATE}
|
||||||
|
parameterValue: {value: 2019-1-2}
|
||||||
|
|
|
@ -1,2 +0,0 @@
|
||||||
{"submission_date":"2019-01-01","generated_time":"2019-01-02T01:00:00","last_seen_date":"2019-01-01","active_hours_sum":0.0,"attribution":{"source":"prev"},"client_id":"a"}
|
|
||||||
{"submission_date":"2019-01-01","generated_time":"2019-01-02T01:00:00","last_seen_date":"2019-01-01","active_hours_sum":0.0,"attribution":{"source":"prev"},"client_id":"b"}
|
|
|
@ -5,6 +5,11 @@
|
||||||
"mode": "REQUIRED",
|
"mode": "REQUIRED",
|
||||||
"description": "time_partitioning_field"
|
"description": "time_partitioning_field"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "client_id",
|
||||||
|
"type": "STRING",
|
||||||
|
"mode": "REQUIRED"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "active_hours_sum",
|
"name": "active_hours_sum",
|
||||||
"type": "FLOAT",
|
"type": "FLOAT",
|
||||||
|
@ -23,8 +28,8 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "client_id",
|
"name": "scalar_parent_browser_engagement_total_uri_count_sum",
|
||||||
"type": "STRING",
|
"type": "INT64",
|
||||||
"mode": "REQUIRED"
|
"mode": "NULLABLE"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
{"submission_date":"2019-01-01","active_hours_sum":0.0,"attribution":{"source":"prev"},"client_id":"a","days_since_seen":0}
|
||||||
|
{"submission_date":"2019-01-01","active_hours_sum":0.0,"attribution":{"source":"prev"},"client_id":"b","days_since_seen":0}
|
|
@ -5,13 +5,8 @@
|
||||||
"mode": "REQUIRED"
|
"mode": "REQUIRED"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "generated_time",
|
"name": "client_id",
|
||||||
"type": "DATETIME",
|
"type": "STRING",
|
||||||
"mode": "REQUIRED"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "last_seen_date",
|
|
||||||
"type": "DATE",
|
|
||||||
"mode": "REQUIRED"
|
"mode": "REQUIRED"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -32,8 +27,18 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "client_id",
|
"name": "scalar_parent_browser_engagement_total_uri_count_sum",
|
||||||
"type": "STRING",
|
"type": "INT64",
|
||||||
|
"mode": "NULLABLE"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "days_since_seen",
|
||||||
|
"type": "INT64",
|
||||||
"mode": "REQUIRED"
|
"mode": "REQUIRED"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "days_since_visited_5_uri",
|
||||||
|
"type": "INT64",
|
||||||
|
"mode": "NULLABLE"
|
||||||
}
|
}
|
||||||
]
|
]
|
|
@ -1,3 +1,3 @@
|
||||||
{"submission_date":"2019-01-02","active_hours_sum":0.0,"attribution":{"source":"prev"},"client_id":"a","last_seen_date":"2019-01-01"}
|
{"submission_date":"2019-01-02","client_id":"a","active_hours_sum":0.0,"attribution":{"source":"prev"},"scalar_parent_browser_engagement_total_uri_count_sum":null,"days_since_seen":1,"days_since_visited_5_uri":null}
|
||||||
{"submission_date":"2019-01-02","active_hours_sum":1.0,"attribution":{"source":"test"},"client_id":"b","last_seen_date":"2019-01-02"}
|
{"submission_date":"2019-01-02","client_id":"b","active_hours_sum":1.0,"attribution":{"source":"test"},"scalar_parent_browser_engagement_total_uri_count_sum":null,"days_since_seen":0,"days_since_visited_5_uri":null}
|
||||||
{"submission_date":"2019-01-02","active_hours_sum":1.0,"attribution":{"source":"test"},"client_id":"c","last_seen_date":"2019-01-02"}
|
{"submission_date":"2019-01-02","client_id":"c","active_hours_sum":1.0,"attribution":{"source":"test"},"scalar_parent_browser_engagement_total_uri_count_sum":null,"days_since_seen":0,"days_since_visited_5_uri":null}
|
||||||
|
|
Загрузка…
Ссылка в новой задаче