Add sync namespace
This commit is contained in:
Родитель
b66e17a651
Коммит
6c89d870d4
|
@ -401,3 +401,18 @@ pocket:
|
|||
- kenny@getpocket.com
|
||||
- kirill@getpocket.com
|
||||
pretty_name: Pocket
|
||||
sync:
|
||||
glean_app: false
|
||||
owners:
|
||||
- ascholtz@mozilla.com
|
||||
pretty_name: Sync
|
||||
views:
|
||||
sync:
|
||||
type: ping_view
|
||||
tables:
|
||||
- table: mozdata.telemetry.sync
|
||||
explores:
|
||||
sync:
|
||||
type: ping_explore
|
||||
views:
|
||||
base_view: sync
|
||||
|
|
|
@ -1,182 +0,0 @@
|
|||
"""Utils for generating lookml."""
|
||||
import re
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
||||
|
||||
import click
|
||||
from google.cloud import bigquery
|
||||
|
||||
BIGQUERY_TYPE_TO_DIMENSION_TYPE = {
|
||||
"BIGNUMERIC": "string",
|
||||
"BOOLEAN": "yesno",
|
||||
"BYTES": "string",
|
||||
"DATE": "time",
|
||||
"DATETIME": "time",
|
||||
"FLOAT": "number",
|
||||
"INTEGER": "number",
|
||||
"NUMERIC": "number",
|
||||
"STRING": "string",
|
||||
"TIME": "time",
|
||||
"TIMESTAMP": "time",
|
||||
}
|
||||
|
||||
HIDDEN_DIMENSIONS = {
|
||||
("document_id",),
|
||||
("client_id",),
|
||||
("client_info", "client_id"),
|
||||
("context_id",),
|
||||
("additional_properties",),
|
||||
}
|
||||
|
||||
MAP_LAYER_NAMES = {
|
||||
("country",): "countries",
|
||||
("metadata", "geo", "country"): "countries",
|
||||
}
|
||||
|
||||
|
||||
def _get_dimension(
|
||||
path: Tuple[str, ...], field_type: str, mode: str, description: Optional[str]
|
||||
) -> Dict[str, Any]:
|
||||
result: Dict[str, Any] = {}
|
||||
result["sql"] = "${TABLE}." + ".".join(path)
|
||||
name = path
|
||||
if (
|
||||
mode == "REPEATED"
|
||||
or path in HIDDEN_DIMENSIONS
|
||||
or field_type not in BIGQUERY_TYPE_TO_DIMENSION_TYPE
|
||||
):
|
||||
result["hidden"] = "yes"
|
||||
else:
|
||||
result["type"] = BIGQUERY_TYPE_TO_DIMENSION_TYPE[field_type]
|
||||
|
||||
group_label, group_item_label = None, None
|
||||
if len(path) > 1:
|
||||
group_label = " ".join(path[:-1]).replace("_", " ").title()
|
||||
group_item_label = path[-1].replace("_", " ").title()
|
||||
if result["type"] == "time":
|
||||
# Remove _{type} suffix from the last path element for dimension group
|
||||
# names For example submission_date and submission_timestamp become
|
||||
# submission, and metadata.header.parsed_date becomes
|
||||
# metadata__header__parsed. This is because the timeframe will add a _{type}
|
||||
# suffix to the individual dimension names.
|
||||
name = *path[:-1], re.sub("_(date|time(stamp)?)$", "", path[-1])
|
||||
result["timeframes"] = [
|
||||
"raw",
|
||||
"time",
|
||||
"date",
|
||||
"week",
|
||||
"month",
|
||||
"quarter",
|
||||
"year",
|
||||
]
|
||||
if field_type == "DATE":
|
||||
result["timeframes"].remove("time")
|
||||
result["convert_tz"] = "no"
|
||||
result["datatype"] = "date"
|
||||
if group_label and group_item_label:
|
||||
# Dimension groups should not be nested, see issue #82
|
||||
result["label"] = f"{group_label}: {group_item_label}"
|
||||
elif len(path) > 1:
|
||||
result["group_label"] = group_label
|
||||
result["group_item_label"] = group_item_label
|
||||
if path in MAP_LAYER_NAMES:
|
||||
result["map_layer_name"] = MAP_LAYER_NAMES[path]
|
||||
result["name"] = "__".join(name)
|
||||
|
||||
if description:
|
||||
result["description"] = description
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _generate_dimensions_helper(
|
||||
schema: List[bigquery.SchemaField], *prefix: str
|
||||
) -> Iterable[dict]:
|
||||
for field in sorted(schema, key=lambda f: f.name):
|
||||
if field.field_type == "RECORD" and not field.mode == "REPEATED":
|
||||
yield from _generate_dimensions_helper(field.fields, *prefix, field.name)
|
||||
else:
|
||||
yield _get_dimension(
|
||||
(*prefix, field.name), field.field_type, field.mode, field.description
|
||||
)
|
||||
|
||||
|
||||
def _generate_dimensions(client: bigquery.Client, table: str) -> List[Dict[str, Any]]:
|
||||
"""Generate dimensions and dimension groups from a bigquery table.
|
||||
|
||||
When schema contains both submission_timestamp and submission_date, only produce
|
||||
a dimension group for submission_timestamp.
|
||||
|
||||
Raise ClickException if schema results in duplicate dimensions.
|
||||
"""
|
||||
dimensions = {}
|
||||
for dimension in _generate_dimensions_helper(client.get_table(table).schema):
|
||||
name = dimension["name"]
|
||||
# overwrite duplicate "submission" dimension group, thus picking the
|
||||
# last value sorted by field name, which is submission_timestamp
|
||||
if name in dimensions and name != "submission":
|
||||
raise click.ClickException(
|
||||
f"duplicate dimension {name!r} for table {table!r}"
|
||||
)
|
||||
dimensions[name] = dimension
|
||||
return list(dimensions.values())
|
||||
|
||||
|
||||
def _generate_nested_dimension_views(
|
||||
schema: List[bigquery.SchemaField], view_name: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Recursively generate views for nested fields.
|
||||
|
||||
Nested fields are handled as view, with dimensions and optionally measures.
|
||||
"""
|
||||
views: List[Dict[str, Any]] = []
|
||||
for field in sorted(schema, key=lambda f: f.name):
|
||||
if field.field_type == "RECORD" and field.name != "labeled_counter":
|
||||
# labeled_counter is handled explicitly in glean ping views; hidden for other views
|
||||
<<<<<<< HEAD
|
||||
=======
|
||||
view_name = f"{view_name}__{field.name}"
|
||||
>>>>>>> Update tests
|
||||
if field.mode == "REPEATED":
|
||||
nested_field_view: Dict[str, Any] = {
|
||||
"name": f"{view_name}__{field.name}"
|
||||
}
|
||||
dimensions = _generate_dimensions_helper(schema=field.fields)
|
||||
nested_field_view["dimensions"] = [
|
||||
d for d in dimensions if not _is_dimension_group(d)
|
||||
]
|
||||
nested_field_view["dimension_groups"] = [
|
||||
d for d in dimensions if _is_dimension_group(d)
|
||||
]
|
||||
views = (
|
||||
views
|
||||
+ [nested_field_view]
|
||||
+ _generate_nested_dimension_views(
|
||||
field.fields, f"{view_name}__{field.name}"
|
||||
)
|
||||
)
|
||||
else:
|
||||
views = views + _generate_nested_dimension_views(
|
||||
field.fields, f"{view_name}__{field.name}"
|
||||
)
|
||||
|
||||
return views
|
||||
|
||||
|
||||
def _is_dimension_group(dimension: dict):
|
||||
"""Determine if a dimension is actually a dimension group."""
|
||||
return "timeframes" in dimension or "intervals" in dimension
|
||||
|
||||
|
||||
def escape_filter_expr(expr: str) -> str:
|
||||
"""Escape filter expression for special Looker chars."""
|
||||
return re.sub(r'((?:^-)|["_%,^])', r"^\1", expr, count=0)
|
||||
|
||||
|
||||
def _is_nested_dimension(dimension: dict):
|
||||
return (
|
||||
"hidden" in dimension
|
||||
and dimension["hidden"]
|
||||
and "nested" in dimension
|
||||
and dimension["nested"]
|
||||
)
|
|
@ -1377,34 +1377,6 @@ def test_lookml_actual_metrics_view(
|
|||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"dimensions": [
|
||||
{"name": "key", "sql": "${TABLE}.key", "type": "string"},
|
||||
{"name": "value", "sql": "${TABLE}.value", "type": "number"},
|
||||
],
|
||||
"name": "metrics__metrics__custom_distribution__test_custom_distribution__values",
|
||||
},
|
||||
{
|
||||
"dimensions": [
|
||||
{"name": "key", "sql": "${TABLE}.key", "type": "string"},
|
||||
{"name": "value", "sql": "${TABLE}.value", "type": "number"},
|
||||
],
|
||||
"name": "metrics__metrics__labeled_counter_not_in_source",
|
||||
},
|
||||
{
|
||||
"dimensions": [
|
||||
{"name": "key", "sql": "${TABLE}.key", "type": "string"},
|
||||
{"name": "value", "sql": "${TABLE}.value", "type": "number"},
|
||||
],
|
||||
"name": "metrics__metrics__memory_distribution__test_memory_distribution__values",
|
||||
},
|
||||
{
|
||||
"dimensions": [
|
||||
{"name": "key", "sql": "${TABLE}.key", "type": "string"},
|
||||
{"name": "value", "sql": "${TABLE}.value", "type": "number"},
|
||||
],
|
||||
"name": "metrics__metrics__timing_distribution__test_timing_distribution__values",
|
||||
},
|
||||
{
|
||||
"dimensions": [
|
||||
{
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Загрузка…
Ссылка в новой задаче