Query for updates to event_types_v1 (#1295)
* Query for update event_types_v1 This query takes yesterday's events, yesterday's event_types, and adds the new events, event_properties, and property values. It writes it out to a new partition. This is not strictly necessary but will aid debugging and redoes. * Format SQL
This commit is contained in:
Родитель
e71840cec6
Коммит
aca88a3d45
|
@ -0,0 +1,243 @@
|
|||
WITH current_events AS (
|
||||
SELECT
|
||||
name AS event,
|
||||
* EXCEPT (name)
|
||||
FROM
|
||||
org_mozilla_firefox.events
|
||||
CROSS JOIN
|
||||
UNNEST(events) AS event
|
||||
WHERE
|
||||
DATE(submission_timestamp) = @submission_date
|
||||
),
|
||||
event_types AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
org_mozilla_firefox_derived.event_types_v1
|
||||
WHERE
|
||||
submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY)
|
||||
),
|
||||
new_primary_event_types AS (
|
||||
SELECT
|
||||
category,
|
||||
event,
|
||||
MIN(timestamp) AS first_timestamp,
|
||||
ROW_NUMBER() OVER (ORDER BY MIN(timestamp) ASC, category ASC, event ASC) + (
|
||||
SELECT
|
||||
MAX(numeric_index)
|
||||
FROM
|
||||
event_types
|
||||
) AS numeric_index,
|
||||
0 AS max_event_property_index
|
||||
FROM
|
||||
current_events
|
||||
LEFT JOIN
|
||||
event_types
|
||||
USING
|
||||
(category, event)
|
||||
WHERE
|
||||
event_types.event IS NULL
|
||||
GROUP BY
|
||||
category,
|
||||
event
|
||||
),
|
||||
all_primary_event_types AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
new_primary_event_types
|
||||
UNION ALL
|
||||
SELECT
|
||||
category,
|
||||
event,
|
||||
first_timestamp,
|
||||
numeric_index,
|
||||
MAX(COALESCE(event_property.index, 0)) AS max_event_property_index
|
||||
FROM
|
||||
event_types
|
||||
LEFT JOIN
|
||||
UNNEST(event_properties) AS event_property
|
||||
GROUP BY
|
||||
category,
|
||||
event,
|
||||
first_timestamp,
|
||||
numeric_index
|
||||
),
|
||||
new_event_property_indices AS (
|
||||
SELECT
|
||||
category,
|
||||
event,
|
||||
event_property.key AS event_property,
|
||||
ROW_NUMBER() OVER (PARTITION BY category, event ORDER BY MIN(timestamp) ASC) + ANY_VALUE(
|
||||
max_event_property_index
|
||||
) AS event_property_index,
|
||||
0 AS max_event_property_value_index
|
||||
FROM
|
||||
current_events,
|
||||
UNNEST(extra) AS event_property
|
||||
LEFT JOIN
|
||||
(SELECT * FROM event_types, UNNEST(event_properties)) event_types
|
||||
USING
|
||||
(category, event, key)
|
||||
JOIN
|
||||
all_primary_event_types
|
||||
USING
|
||||
(event, category)
|
||||
WHERE
|
||||
event_types.event IS NULL
|
||||
GROUP BY
|
||||
category,
|
||||
event,
|
||||
event_property
|
||||
),
|
||||
all_event_property_indices AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
new_event_property_indices
|
||||
UNION ALL
|
||||
SELECT
|
||||
category,
|
||||
event,
|
||||
event_property.key AS event_property,
|
||||
event_property.index AS event_property_index,
|
||||
MAX(COALESCE(VALUES .index, 0)) AS max_event_property_value_index
|
||||
FROM
|
||||
event_types
|
||||
LEFT JOIN
|
||||
UNNEST(event_properties) AS event_property
|
||||
LEFT JOIN
|
||||
UNNEST(value) AS values
|
||||
GROUP BY
|
||||
category,
|
||||
event,
|
||||
first_timestamp,
|
||||
event_property,
|
||||
event_property_index
|
||||
),
|
||||
new_event_property_value_indices AS (
|
||||
SELECT
|
||||
current_events.category,
|
||||
current_events.event,
|
||||
event_property.key AS event_property,
|
||||
event_property.value AS event_property_value,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY
|
||||
current_events.category,
|
||||
current_events.event,
|
||||
event_property.key
|
||||
ORDER BY
|
||||
MIN(timestamp) ASC
|
||||
) + ANY_VALUE(max_event_property_value_index) AS event_property_value_index,
|
||||
FROM
|
||||
current_events,
|
||||
UNNEST(extra) AS event_property
|
||||
LEFT JOIN
|
||||
(
|
||||
SELECT
|
||||
event_types.* EXCEPT (event_properties),
|
||||
existing_event_property,
|
||||
VALUES
|
||||
FROM
|
||||
event_types,
|
||||
UNNEST(event_properties) AS existing_event_property,
|
||||
UNNEST(value) AS values
|
||||
) AS event_types
|
||||
ON
|
||||
event_property.key = event_types.existing_event_property.key
|
||||
AND event_property.value = event_types.values.key
|
||||
JOIN
|
||||
all_event_property_indices
|
||||
ON
|
||||
all_event_property_indices.category = current_events.category
|
||||
AND all_event_property_indices.event = current_events.event
|
||||
AND all_event_property_indices.event_property = event_property.key
|
||||
WHERE
|
||||
event_types.event IS NULL
|
||||
GROUP BY
|
||||
category,
|
||||
event,
|
||||
event_property,
|
||||
event_property_value
|
||||
),
|
||||
all_event_property_value_indices AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
new_event_property_value_indices
|
||||
UNION ALL
|
||||
SELECT
|
||||
category,
|
||||
event,
|
||||
event_property.key AS event_property,
|
||||
VALUES
|
||||
.key AS event_property_value,
|
||||
VALUES
|
||||
.index AS event_property_value_index
|
||||
FROM
|
||||
event_types,
|
||||
UNNEST(event_properties) AS event_property,
|
||||
UNNEST(value) AS values
|
||||
),
|
||||
per_event_property AS (
|
||||
SELECT
|
||||
category,
|
||||
event,
|
||||
event_property,
|
||||
event_property_index,
|
||||
ARRAY_AGG(
|
||||
STRUCT(
|
||||
event_property_value AS key,
|
||||
udf.event_code_points_to_string([event_property_value_index]) AS value,
|
||||
event_property_value_index AS index
|
||||
)
|
||||
ORDER BY
|
||||
event_property_value_index ASC
|
||||
) AS values,
|
||||
FROM
|
||||
all_event_property_value_indices
|
||||
INNER JOIN
|
||||
all_event_property_indices
|
||||
USING
|
||||
(category, event, event_property)
|
||||
GROUP BY
|
||||
category,
|
||||
event,
|
||||
event_property,
|
||||
event_property_index
|
||||
),
|
||||
per_event AS (
|
||||
SELECT
|
||||
category,
|
||||
event,
|
||||
first_timestamp,
|
||||
numeric_index,
|
||||
udf.event_code_points_to_string([numeric_index]) AS index,
|
||||
ARRAY_AGG(
|
||||
IF(
|
||||
event_property IS NULL,
|
||||
NULL,
|
||||
STRUCT(event_property AS key, VALUES AS value, event_property_index AS index)
|
||||
) IGNORE NULLS
|
||||
ORDER BY
|
||||
event_property_index ASC
|
||||
) AS event_properties
|
||||
FROM
|
||||
all_primary_event_types
|
||||
LEFT JOIN
|
||||
per_event_property
|
||||
USING
|
||||
(category, event)
|
||||
GROUP BY
|
||||
category,
|
||||
event,
|
||||
first_timestamp,
|
||||
numeric_index
|
||||
)
|
||||
SELECT
|
||||
@submission_date AS submission_date,
|
||||
*
|
||||
FROM
|
||||
per_event
|
||||
ORDER BY
|
||||
numeric_index ASC
|
|
@ -0,0 +1,101 @@
|
|||
---
|
||||
-
|
||||
submission_date: '2020-01-03'
|
||||
category: first_category
|
||||
event: first_event
|
||||
first_timestamp: '2020-01-01T00:00:01+00:00'
|
||||
numeric_index: 1
|
||||
index: "\U00000001"
|
||||
event_properties:
|
||||
-
|
||||
key: first_property
|
||||
index: 1
|
||||
value:
|
||||
-
|
||||
key: first_property_value_1
|
||||
index: 1
|
||||
value: "\U00000001"
|
||||
-
|
||||
key: first_property_value_2
|
||||
index: 2
|
||||
value: "\U00000002"
|
||||
-
|
||||
key: first_property_value_3
|
||||
index: 3
|
||||
value: "\U00000003"
|
||||
-
|
||||
key: second_property
|
||||
index: 2
|
||||
value:
|
||||
-
|
||||
key: second_property_value_1
|
||||
index: 1
|
||||
value: "\U00000001"
|
||||
-
|
||||
submission_date: '2020-01-03'
|
||||
category: second_category
|
||||
event: second_event
|
||||
first_timestamp: '2020-01-01T00:00:02+00:00'
|
||||
numeric_index: 2
|
||||
index: "\U00000002"
|
||||
event_properties:
|
||||
-
|
||||
key: second_property
|
||||
index: 1
|
||||
value:
|
||||
-
|
||||
key: second_property_value_1
|
||||
index: 1
|
||||
value: "\U00000001"
|
||||
-
|
||||
key: second_property_value_2
|
||||
index: 2
|
||||
value: "\U00000002"
|
||||
-
|
||||
key: third_property
|
||||
index: 2
|
||||
value:
|
||||
-
|
||||
key: third_property_value_1
|
||||
index: 1
|
||||
value: "\U00000001"
|
||||
-
|
||||
submission_date: '2020-01-03'
|
||||
category: first_category
|
||||
event: third_event
|
||||
first_timestamp: '2020-01-01T00:00:04+00:00'
|
||||
numeric_index: 3
|
||||
index: "\U00000003"
|
||||
event_properties:
|
||||
-
|
||||
key: third_event_prop
|
||||
index: 1
|
||||
value:
|
||||
-
|
||||
key: third_event_prop_value_1
|
||||
index: 1
|
||||
value: "\U00000001"
|
||||
-
|
||||
submission_date: '2020-01-03'
|
||||
category: new_category
|
||||
event: new_event_no_props
|
||||
first_timestamp: '2020-01-02T00:00:01+00:00'
|
||||
numeric_index: 4
|
||||
index: "\U00000004"
|
||||
event_properties: []
|
||||
-
|
||||
submission_date: '2020-01-03'
|
||||
category: new_category
|
||||
event: new_event_with_props
|
||||
first_timestamp: '2020-01-02T00:00:02+00:00'
|
||||
numeric_index: 5
|
||||
index: "\U00000005"
|
||||
event_properties:
|
||||
-
|
||||
key: new_event_prop
|
||||
index: 1
|
||||
value:
|
||||
-
|
||||
key: new_event_prop_value_1
|
||||
index: 1
|
||||
value: "\U00000001"
|
|
@ -0,0 +1,48 @@
|
|||
---
|
||||
-
|
||||
submission_timestamp: '2020-01-03 00:00:00'
|
||||
events:
|
||||
-
|
||||
timestamp: '2020-01-02 00:00:01'
|
||||
name: new_event_no_props
|
||||
category: new_category
|
||||
-
|
||||
timestamp: '2020-01-02 00:00:02'
|
||||
name: new_event_with_props
|
||||
category: new_category
|
||||
extra:
|
||||
-
|
||||
key: new_event_prop
|
||||
value: new_event_prop_value_1
|
||||
-
|
||||
timestamp: '2020-01-02 00:00:03'
|
||||
name: third_event
|
||||
category: first_category
|
||||
extra:
|
||||
-
|
||||
key: third_event_prop
|
||||
value: third_event_prop_value_1
|
||||
-
|
||||
timestamp: '2020-01-02 00:00:03'
|
||||
name: second_event
|
||||
category: second_category
|
||||
extra:
|
||||
-
|
||||
key: third_property
|
||||
value: third_property_value_1
|
||||
-
|
||||
timestamp: '2020-01-02 00:01:01'
|
||||
name: first_event
|
||||
category: first_category
|
||||
extra:
|
||||
-
|
||||
key: first_property
|
||||
value: first_property_value_3
|
||||
-
|
||||
timestamp: '2020-01-02 00:01:01'
|
||||
name: first_event
|
||||
category: first_category
|
||||
extra:
|
||||
-
|
||||
key: second_property
|
||||
value: second_property_value_1
|
|
@ -0,0 +1,57 @@
|
|||
---
|
||||
-
|
||||
submission_date: '2020-01-02'
|
||||
category: first_category
|
||||
event: first_event
|
||||
first_timestamp: '2020-01-01T00:00:01+00:00'
|
||||
numeric_index: 1
|
||||
index: "\U00000001"
|
||||
event_properties:
|
||||
-
|
||||
key: first_property
|
||||
index: 1
|
||||
value:
|
||||
-
|
||||
key: first_property_value_1
|
||||
index: 1
|
||||
value: "\U00000001"
|
||||
-
|
||||
key: first_property_value_2
|
||||
index: 2
|
||||
value: "\U00000002"
|
||||
-
|
||||
key: second_property
|
||||
index: 2
|
||||
value:
|
||||
-
|
||||
key: second_property_value_1
|
||||
index: 1
|
||||
value: "\U00000001"
|
||||
-
|
||||
submission_date: '2020-01-02'
|
||||
category: second_category
|
||||
event: second_event
|
||||
first_timestamp: '2020-01-01T00:00:02+00:00'
|
||||
numeric_index: 2
|
||||
index: "\U00000002"
|
||||
event_properties:
|
||||
-
|
||||
key: second_property
|
||||
index: 1
|
||||
value:
|
||||
-
|
||||
key: second_property_value_1
|
||||
index: 1
|
||||
value: "\U00000001"
|
||||
-
|
||||
key: second_property_value_2
|
||||
index: 2
|
||||
value: "\U00000002"
|
||||
-
|
||||
submission_date: '2020-01-02'
|
||||
category: first_category
|
||||
event: third_event
|
||||
first_timestamp: '2020-01-01T00:00:04+00:00'
|
||||
numeric_index: 3
|
||||
index: "\U00000003"
|
||||
event_properties: []
|
|
@ -0,0 +1,3 @@
|
|||
- name: submission_date
|
||||
type: DATE
|
||||
value: 2020-01-03
|
Загрузка…
Ссылка в новой задаче