DENG-3540 Add `event_id` as a primary key for event view (#960)

* Add `event_id` as a synthetic pk for event view

* ci fix

* Generate an event_id dimension for the final view. Update test.

* comments

* review comments
This commit is contained in:
wil stuckey 2024-05-01 14:24:01 -05:00 коммит произвёл GitHub
Родитель 122c868a1a
Коммит 3c8063c7bb
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
3 изменённых файлов: 59 добавлений и 26 удалений

Просмотреть файл

@ -76,11 +76,9 @@ class EventsView(View):
# set document_id as primary key if it exists in the underlying table
# this will allow one_to_many joins
document_id_field = self.get_document_id(dimensions, "events")
if document_id_field is not None:
view_defn["dimensions"] = [
{"name": document_id_field, "primary_key": "yes"}
]
event_id_dimension = self.generate_event_id_dimension(dimensions)
if event_id_dimension is not None:
view_defn["dimensions"] = [event_id_dimension]
return {
"includes": [f"{self.tables[0]['events_table_view']}.view.lkml"],
@ -104,3 +102,17 @@ class EventsView(View):
)
return measures
def generate_event_id_dimension(
self, dimensions: list[dict]
) -> Optional[Dict[str, str]]:
"""Generate the event_id dimension to be used as a primary key for a one to many join."""
event_id = self.select_dimension("event_id", dimensions, "events")
if event_id:
event_id_field = event_id["name"]
return {
"name": "event_id",
"primary_key": "yes",
"sql": f"${{{event_id_field}}}",
}
return None

Просмотреть файл

@ -104,26 +104,39 @@ class View(object):
def get_client_id(self, dimensions: List[dict], table: str) -> Optional[str]:
"""Return the first field that looks like a client identifier."""
client_id_fields = [
d["name"]
for d in dimensions
if d["name"] in {"client_id", "client_info__client_id", "context_id"}
]
if not client_id_fields:
# Some pings purposely disinclude client_ids, e.g. firefox installer
return None
if len(client_id_fields) > 1:
raise ClickException(f"Duplicate client_id dimension in {table!r}")
return client_id_fields[0]
client_id_fields = self.select_dimension(
{"client_id", "client_info__client_id", "context_id"},
dimensions,
table,
)
# Some pings purposely disinclude client_ids, e.g. firefox installer
return client_id_fields["name"] if client_id_fields else None
def get_document_id(self, dimensions: List[dict], table: str) -> Optional[str]:
"""Return the first field that looks like a document_id."""
document_id_fields = [
d["name"] for d in dimensions if d["name"] in {"document_id"}
]
if not document_id_fields:
# Some pings purposely disinclude client_ids, e.g. firefox installer
return None
if len(document_id_fields) > 1:
raise ClickException(f"Duplicate document_id dimension in {table!r}")
return document_id_fields[0]
document_id = self.select_dimension("document_id", dimensions, table)
return document_id["name"] if document_id else None
def select_dimension(
self,
dimension_names: str | set[str],
dimensions: List[dict],
table: str,
) -> Optional[dict[str, str]]:
"""
Return the first field that matches dimension name.
Throws if the query set is greater than one and more than one item is selected.
"""
if isinstance(dimension_names, str):
dimension_names = {dimension_names}
selected = [d for d in dimensions if d["name"] in dimension_names]
if selected:
# there should only be one dimension selected from the set
# if there are multiple options in the dimention_names set.
if len(dimension_names) > 1 and len(selected) > 1:
raise ClickException(
f"Duplicate {'/'.join(dimension_names)} dimension in {table!r}"
)
return selected[0]
return None

Просмотреть файл

@ -160,6 +160,13 @@ def test_view_lookml(events_view):
"sql": "${client_info__client_id}",
},
],
"dimensions": [
{
"name": "event_id",
"primary_key": "yes",
"sql": "${event_id}",
},
],
},
],
}
@ -168,7 +175,8 @@ def test_view_lookml(events_view):
[
SchemaField(
"client_info", "RECORD", fields=[SchemaField("client_id", "STRING")]
)
),
SchemaField("event_id", "STRING"),
]
)
actual = events_view.to_lookml(mock_bq_client, None)