Query for updates to event_types_v1 (#1295)

* Query for update event_types_v1

This query takes yesterday's events, yesterday's event_types,
and adds the new events, event_properties, and property values.

It writes it out to a new partition. This is not strictly
necessary but will aid debugging and redoes.

* Format SQL
This commit is contained in:
Frank Bertsch 2020-09-10 12:53:04 -04:00 коммит произвёл GitHub
Родитель e71840cec6
Коммит aca88a3d45
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 452 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,243 @@
WITH current_events AS (
SELECT
name AS event,
* EXCEPT (name)
FROM
org_mozilla_firefox.events
CROSS JOIN
UNNEST(events) AS event
WHERE
DATE(submission_timestamp) = @submission_date
),
event_types AS (
SELECT
*
FROM
org_mozilla_firefox_derived.event_types_v1
WHERE
submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY)
),
new_primary_event_types AS (
SELECT
category,
event,
MIN(timestamp) AS first_timestamp,
ROW_NUMBER() OVER (ORDER BY MIN(timestamp) ASC, category ASC, event ASC) + (
SELECT
MAX(numeric_index)
FROM
event_types
) AS numeric_index,
0 AS max_event_property_index
FROM
current_events
LEFT JOIN
event_types
USING
(category, event)
WHERE
event_types.event IS NULL
GROUP BY
category,
event
),
all_primary_event_types AS (
SELECT
*
FROM
new_primary_event_types
UNION ALL
SELECT
category,
event,
first_timestamp,
numeric_index,
MAX(COALESCE(event_property.index, 0)) AS max_event_property_index
FROM
event_types
LEFT JOIN
UNNEST(event_properties) AS event_property
GROUP BY
category,
event,
first_timestamp,
numeric_index
),
new_event_property_indices AS (
SELECT
category,
event,
event_property.key AS event_property,
ROW_NUMBER() OVER (PARTITION BY category, event ORDER BY MIN(timestamp) ASC) + ANY_VALUE(
max_event_property_index
) AS event_property_index,
0 AS max_event_property_value_index
FROM
current_events,
UNNEST(extra) AS event_property
LEFT JOIN
(SELECT * FROM event_types, UNNEST(event_properties)) event_types
USING
(category, event, key)
JOIN
all_primary_event_types
USING
(event, category)
WHERE
event_types.event IS NULL
GROUP BY
category,
event,
event_property
),
all_event_property_indices AS (
SELECT
*
FROM
new_event_property_indices
UNION ALL
SELECT
category,
event,
event_property.key AS event_property,
event_property.index AS event_property_index,
MAX(COALESCE(VALUES .index, 0)) AS max_event_property_value_index
FROM
event_types
LEFT JOIN
UNNEST(event_properties) AS event_property
LEFT JOIN
UNNEST(value) AS values
GROUP BY
category,
event,
first_timestamp,
event_property,
event_property_index
),
new_event_property_value_indices AS (
SELECT
current_events.category,
current_events.event,
event_property.key AS event_property,
event_property.value AS event_property_value,
ROW_NUMBER() OVER (
PARTITION BY
current_events.category,
current_events.event,
event_property.key
ORDER BY
MIN(timestamp) ASC
) + ANY_VALUE(max_event_property_value_index) AS event_property_value_index,
FROM
current_events,
UNNEST(extra) AS event_property
LEFT JOIN
(
SELECT
event_types.* EXCEPT (event_properties),
existing_event_property,
VALUES
FROM
event_types,
UNNEST(event_properties) AS existing_event_property,
UNNEST(value) AS values
) AS event_types
ON
event_property.key = event_types.existing_event_property.key
AND event_property.value = event_types.values.key
JOIN
all_event_property_indices
ON
all_event_property_indices.category = current_events.category
AND all_event_property_indices.event = current_events.event
AND all_event_property_indices.event_property = event_property.key
WHERE
event_types.event IS NULL
GROUP BY
category,
event,
event_property,
event_property_value
),
all_event_property_value_indices AS (
SELECT
*
FROM
new_event_property_value_indices
UNION ALL
SELECT
category,
event,
event_property.key AS event_property,
VALUES
.key AS event_property_value,
VALUES
.index AS event_property_value_index
FROM
event_types,
UNNEST(event_properties) AS event_property,
UNNEST(value) AS values
),
per_event_property AS (
SELECT
category,
event,
event_property,
event_property_index,
ARRAY_AGG(
STRUCT(
event_property_value AS key,
udf.event_code_points_to_string([event_property_value_index]) AS value,
event_property_value_index AS index
)
ORDER BY
event_property_value_index ASC
) AS values,
FROM
all_event_property_value_indices
INNER JOIN
all_event_property_indices
USING
(category, event, event_property)
GROUP BY
category,
event,
event_property,
event_property_index
),
per_event AS (
SELECT
category,
event,
first_timestamp,
numeric_index,
udf.event_code_points_to_string([numeric_index]) AS index,
ARRAY_AGG(
IF(
event_property IS NULL,
NULL,
STRUCT(event_property AS key, VALUES AS value, event_property_index AS index)
) IGNORE NULLS
ORDER BY
event_property_index ASC
) AS event_properties
FROM
all_primary_event_types
LEFT JOIN
per_event_property
USING
(category, event)
GROUP BY
category,
event,
first_timestamp,
numeric_index
)
SELECT
@submission_date AS submission_date,
*
FROM
per_event
ORDER BY
numeric_index ASC

Просмотреть файл

@ -0,0 +1,101 @@
---
-
submission_date: '2020-01-03'
category: first_category
event: first_event
first_timestamp: '2020-01-01T00:00:01+00:00'
numeric_index: 1
index: "\U00000001"
event_properties:
-
key: first_property
index: 1
value:
-
key: first_property_value_1
index: 1
value: "\U00000001"
-
key: first_property_value_2
index: 2
value: "\U00000002"
-
key: first_property_value_3
index: 3
value: "\U00000003"
-
key: second_property
index: 2
value:
-
key: second_property_value_1
index: 1
value: "\U00000001"
-
submission_date: '2020-01-03'
category: second_category
event: second_event
first_timestamp: '2020-01-01T00:00:02+00:00'
numeric_index: 2
index: "\U00000002"
event_properties:
-
key: second_property
index: 1
value:
-
key: second_property_value_1
index: 1
value: "\U00000001"
-
key: second_property_value_2
index: 2
value: "\U00000002"
-
key: third_property
index: 2
value:
-
key: third_property_value_1
index: 1
value: "\U00000001"
-
submission_date: '2020-01-03'
category: first_category
event: third_event
first_timestamp: '2020-01-01T00:00:04+00:00'
numeric_index: 3
index: "\U00000003"
event_properties:
-
key: third_event_prop
index: 1
value:
-
key: third_event_prop_value_1
index: 1
value: "\U00000001"
-
submission_date: '2020-01-03'
category: new_category
event: new_event_no_props
first_timestamp: '2020-01-02T00:00:01+00:00'
numeric_index: 4
index: "\U00000004"
event_properties: []
-
submission_date: '2020-01-03'
category: new_category
event: new_event_with_props
first_timestamp: '2020-01-02T00:00:02+00:00'
numeric_index: 5
index: "\U00000005"
event_properties:
-
key: new_event_prop
index: 1
value:
-
key: new_event_prop_value_1
index: 1
value: "\U00000001"

Просмотреть файл

@ -0,0 +1,48 @@
---
-
submission_timestamp: '2020-01-03 00:00:00'
events:
-
timestamp: '2020-01-02 00:00:01'
name: new_event_no_props
category: new_category
-
timestamp: '2020-01-02 00:00:02'
name: new_event_with_props
category: new_category
extra:
-
key: new_event_prop
value: new_event_prop_value_1
-
timestamp: '2020-01-02 00:00:03'
name: third_event
category: first_category
extra:
-
key: third_event_prop
value: third_event_prop_value_1
-
timestamp: '2020-01-02 00:00:03'
name: second_event
category: second_category
extra:
-
key: third_property
value: third_property_value_1
-
timestamp: '2020-01-02 00:01:01'
name: first_event
category: first_category
extra:
-
key: first_property
value: first_property_value_3
-
timestamp: '2020-01-02 00:01:01'
name: first_event
category: first_category
extra:
-
key: second_property
value: second_property_value_1

Просмотреть файл

@ -0,0 +1,57 @@
---
-
submission_date: '2020-01-02'
category: first_category
event: first_event
first_timestamp: '2020-01-01T00:00:01+00:00'
numeric_index: 1
index: "\U00000001"
event_properties:
-
key: first_property
index: 1
value:
-
key: first_property_value_1
index: 1
value: "\U00000001"
-
key: first_property_value_2
index: 2
value: "\U00000002"
-
key: second_property
index: 2
value:
-
key: second_property_value_1
index: 1
value: "\U00000001"
-
submission_date: '2020-01-02'
category: second_category
event: second_event
first_timestamp: '2020-01-01T00:00:02+00:00'
numeric_index: 2
index: "\U00000002"
event_properties:
-
key: second_property
index: 1
value:
-
key: second_property_value_1
index: 1
value: "\U00000001"
-
key: second_property_value_2
index: 2
value: "\U00000002"
-
submission_date: '2020-01-02'
category: first_category
event: third_event
first_timestamp: '2020-01-01T00:00:04+00:00'
numeric_index: 3
index: "\U00000003"
event_properties: []

Просмотреть файл

@ -0,0 +1,3 @@
- name: submission_date
type: DATE
value: 2020-01-03