This commit is contained in:
Anna Scholtz 2021-10-06 14:53:01 -07:00
Родитель b66e17a651
Коммит 6c89d870d4
4 изменённых файлов: 15 добавлений и 2063 удалений

Просмотреть файл

@ -401,3 +401,18 @@ pocket:
- kenny@getpocket.com
- kirill@getpocket.com
pretty_name: Pocket
sync:
glean_app: false
owners:
- ascholtz@mozilla.com
pretty_name: Sync
views:
sync:
type: ping_view
tables:
- table: mozdata.telemetry.sync
explores:
sync:
type: ping_explore
views:
base_view: sync

Просмотреть файл

@ -1,182 +0,0 @@
"""Utils for generating lookml."""
import re
from typing import Any, Dict, Iterable, List, Optional, Tuple
import click
from google.cloud import bigquery
BIGQUERY_TYPE_TO_DIMENSION_TYPE = {
"BIGNUMERIC": "string",
"BOOLEAN": "yesno",
"BYTES": "string",
"DATE": "time",
"DATETIME": "time",
"FLOAT": "number",
"INTEGER": "number",
"NUMERIC": "number",
"STRING": "string",
"TIME": "time",
"TIMESTAMP": "time",
}
HIDDEN_DIMENSIONS = {
("document_id",),
("client_id",),
("client_info", "client_id"),
("context_id",),
("additional_properties",),
}
MAP_LAYER_NAMES = {
("country",): "countries",
("metadata", "geo", "country"): "countries",
}
def _get_dimension(
path: Tuple[str, ...], field_type: str, mode: str, description: Optional[str]
) -> Dict[str, Any]:
result: Dict[str, Any] = {}
result["sql"] = "${TABLE}." + ".".join(path)
name = path
if (
mode == "REPEATED"
or path in HIDDEN_DIMENSIONS
or field_type not in BIGQUERY_TYPE_TO_DIMENSION_TYPE
):
result["hidden"] = "yes"
else:
result["type"] = BIGQUERY_TYPE_TO_DIMENSION_TYPE[field_type]
group_label, group_item_label = None, None
if len(path) > 1:
group_label = " ".join(path[:-1]).replace("_", " ").title()
group_item_label = path[-1].replace("_", " ").title()
if result["type"] == "time":
# Remove _{type} suffix from the last path element for dimension group
# names For example submission_date and submission_timestamp become
# submission, and metadata.header.parsed_date becomes
# metadata__header__parsed. This is because the timeframe will add a _{type}
# suffix to the individual dimension names.
name = *path[:-1], re.sub("_(date|time(stamp)?)$", "", path[-1])
result["timeframes"] = [
"raw",
"time",
"date",
"week",
"month",
"quarter",
"year",
]
if field_type == "DATE":
result["timeframes"].remove("time")
result["convert_tz"] = "no"
result["datatype"] = "date"
if group_label and group_item_label:
# Dimension groups should not be nested, see issue #82
result["label"] = f"{group_label}: {group_item_label}"
elif len(path) > 1:
result["group_label"] = group_label
result["group_item_label"] = group_item_label
if path in MAP_LAYER_NAMES:
result["map_layer_name"] = MAP_LAYER_NAMES[path]
result["name"] = "__".join(name)
if description:
result["description"] = description
return result
def _generate_dimensions_helper(
schema: List[bigquery.SchemaField], *prefix: str
) -> Iterable[dict]:
for field in sorted(schema, key=lambda f: f.name):
if field.field_type == "RECORD" and not field.mode == "REPEATED":
yield from _generate_dimensions_helper(field.fields, *prefix, field.name)
else:
yield _get_dimension(
(*prefix, field.name), field.field_type, field.mode, field.description
)
def _generate_dimensions(client: bigquery.Client, table: str) -> List[Dict[str, Any]]:
"""Generate dimensions and dimension groups from a bigquery table.
When schema contains both submission_timestamp and submission_date, only produce
a dimension group for submission_timestamp.
Raise ClickException if schema results in duplicate dimensions.
"""
dimensions = {}
for dimension in _generate_dimensions_helper(client.get_table(table).schema):
name = dimension["name"]
# overwrite duplicate "submission" dimension group, thus picking the
# last value sorted by field name, which is submission_timestamp
if name in dimensions and name != "submission":
raise click.ClickException(
f"duplicate dimension {name!r} for table {table!r}"
)
dimensions[name] = dimension
return list(dimensions.values())
def _generate_nested_dimension_views(
schema: List[bigquery.SchemaField], view_name: str
) -> List[Dict[str, Any]]:
"""
Recursively generate views for nested fields.
Nested fields are handled as view, with dimensions and optionally measures.
"""
views: List[Dict[str, Any]] = []
for field in sorted(schema, key=lambda f: f.name):
if field.field_type == "RECORD" and field.name != "labeled_counter":
# labeled_counter is handled explicitly in glean ping views; hidden for other views
<<<<<<< HEAD
=======
view_name = f"{view_name}__{field.name}"
>>>>>>> Update tests
if field.mode == "REPEATED":
nested_field_view: Dict[str, Any] = {
"name": f"{view_name}__{field.name}"
}
dimensions = _generate_dimensions_helper(schema=field.fields)
nested_field_view["dimensions"] = [
d for d in dimensions if not _is_dimension_group(d)
]
nested_field_view["dimension_groups"] = [
d for d in dimensions if _is_dimension_group(d)
]
views = (
views
+ [nested_field_view]
+ _generate_nested_dimension_views(
field.fields, f"{view_name}__{field.name}"
)
)
else:
views = views + _generate_nested_dimension_views(
field.fields, f"{view_name}__{field.name}"
)
return views
def _is_dimension_group(dimension: dict):
"""Determine if a dimension is actually a dimension group."""
return "timeframes" in dimension or "intervals" in dimension
def escape_filter_expr(expr: str) -> str:
"""Escape filter expression for special Looker chars."""
return re.sub(r'((?:^-)|["_%,^])', r"^\1", expr, count=0)
def _is_nested_dimension(dimension: dict):
return (
"hidden" in dimension
and dimension["hidden"]
and "nested" in dimension
and dimension["nested"]
)

Просмотреть файл

@ -1377,34 +1377,6 @@ def test_lookml_actual_metrics_view(
},
],
},
{
"dimensions": [
{"name": "key", "sql": "${TABLE}.key", "type": "string"},
{"name": "value", "sql": "${TABLE}.value", "type": "number"},
],
"name": "metrics__metrics__custom_distribution__test_custom_distribution__values",
},
{
"dimensions": [
{"name": "key", "sql": "${TABLE}.key", "type": "string"},
{"name": "value", "sql": "${TABLE}.value", "type": "number"},
],
"name": "metrics__metrics__labeled_counter_not_in_source",
},
{
"dimensions": [
{"name": "key", "sql": "${TABLE}.key", "type": "string"},
{"name": "value", "sql": "${TABLE}.value", "type": "number"},
],
"name": "metrics__metrics__memory_distribution__test_memory_distribution__values",
},
{
"dimensions": [
{"name": "key", "sql": "${TABLE}.key", "type": "string"},
{"name": "value", "sql": "${TABLE}.value", "type": "number"},
],
"name": "metrics__metrics__timing_distribution__test_timing_distribution__values",
},
{
"dimensions": [
{

Разница между файлами не показана из-за своего большого размера Загрузить разницу