2023-09-08 22:09:06 +03:00
|
|
|
"""Class to describe a view with metrics from metric-hub."""
|
2024-02-20 21:27:22 +03:00
|
|
|
|
2023-09-08 22:09:06 +03:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
import re
|
2024-02-13 01:55:11 +03:00
|
|
|
from typing import Any, Dict, Iterator, List, Optional, Union
|
2023-09-08 22:09:06 +03:00
|
|
|
|
|
|
|
from generator.metrics_utils import MetricsConfigLoader
|
|
|
|
|
|
|
|
from . import lookml_utils
|
|
|
|
from .view import View, ViewDict
|
|
|
|
|
|
|
|
|
|
|
|
class MetricDefinitionsView(View):
|
|
|
|
"""A view for metric-hub metrics that come from the same data source."""
|
|
|
|
|
|
|
|
type: str = "metric_definitions_view"
|
|
|
|
|
|
|
|
def __init__(self, namespace: str, name: str, tables: List[Dict[str, str]]):
|
|
|
|
"""Get an instance of an MetricDefinitionsView."""
|
|
|
|
super().__init__(namespace, name, MetricDefinitionsView.type, tables)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def from_db_views(
|
|
|
|
klass,
|
|
|
|
namespace: str,
|
|
|
|
is_glean: bool,
|
|
|
|
channels: List[Dict[str, str]],
|
|
|
|
db_views: dict,
|
|
|
|
) -> Iterator[MetricDefinitionsView]:
|
|
|
|
"""Get Metric Definition Views from db views and app variants."""
|
|
|
|
return iter(())
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def from_dict(
|
|
|
|
klass, namespace: str, name: str, definition: ViewDict
|
|
|
|
) -> MetricDefinitionsView:
|
|
|
|
"""Get a MetricDefinitionsView from a dict representation."""
|
2024-02-13 01:55:11 +03:00
|
|
|
return klass(namespace, name, definition.get("tables", []))
|
2023-09-08 22:09:06 +03:00
|
|
|
|
2024-07-29 19:24:50 +03:00
|
|
|
def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
|
2023-09-08 22:09:06 +03:00
|
|
|
"""Get this view as LookML."""
|
|
|
|
namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions(
|
|
|
|
self.namespace
|
|
|
|
)
|
|
|
|
if namespace_definitions is None:
|
|
|
|
return {}
|
|
|
|
|
|
|
|
# get all metric definitions that depend on the data source represented by this view
|
|
|
|
data_source_name = re.sub("^metric_definitions_", "", self.name)
|
|
|
|
data_source_definition = MetricsConfigLoader.configs.get_data_source_definition(
|
|
|
|
data_source_name, self.namespace
|
|
|
|
)
|
|
|
|
|
|
|
|
if data_source_definition is None:
|
|
|
|
return {}
|
|
|
|
|
2024-02-13 01:55:11 +03:00
|
|
|
# todo: hide deprecated metrics?
|
2023-09-08 22:09:06 +03:00
|
|
|
metric_definitions = [
|
2024-02-13 01:55:11 +03:00
|
|
|
f"""{
|
|
|
|
MetricsConfigLoader.configs.get_env().from_string(metric.select_expression).render()
|
|
|
|
} AS {metric_slug},\n"""
|
2023-09-08 22:09:06 +03:00
|
|
|
for metric_slug, metric in namespace_definitions.metrics.definitions.items()
|
2024-02-13 01:55:11 +03:00
|
|
|
if metric.select_expression
|
|
|
|
and metric.data_source.name == data_source_name
|
|
|
|
and metric.type != "histogram"
|
2023-09-08 22:09:06 +03:00
|
|
|
]
|
|
|
|
|
|
|
|
if metric_definitions == []:
|
|
|
|
return {}
|
|
|
|
|
|
|
|
# Metric definitions are intended to aggregated by client per date.
|
|
|
|
# A derived table is needed to do these aggregations, instead of defining them as measures
|
|
|
|
# we want to have them available as dimensions (which don't allow aggregations in their definitions)
|
|
|
|
# to allow for custom measures to be later defined in Looker that aggregate these per client metrics.
|
|
|
|
view_defn: Dict[str, Any] = {"name": self.name}
|
2024-02-13 01:55:11 +03:00
|
|
|
|
2024-02-21 01:23:17 +03:00
|
|
|
ignore_base_fields = [
|
|
|
|
"client_id",
|
|
|
|
"submission_date",
|
|
|
|
"submission",
|
|
|
|
"first_run",
|
|
|
|
] + [
|
2024-02-13 01:55:11 +03:00
|
|
|
metric_slug
|
|
|
|
for metric_slug, metric in namespace_definitions.metrics.definitions.items()
|
|
|
|
if metric.select_expression
|
|
|
|
and metric.data_source.name == data_source_name
|
|
|
|
and metric.type != "histogram"
|
|
|
|
]
|
2024-03-05 00:05:16 +03:00
|
|
|
|
2024-05-22 01:01:53 +03:00
|
|
|
base_view_dimensions = {}
|
|
|
|
joined_data_sources = []
|
|
|
|
|
|
|
|
# check if the metric data source has joins
|
|
|
|
# joined data sources are generally used for creating the "Base Fields"
|
2024-05-29 20:04:28 +03:00
|
|
|
if data_source_definition.joins:
|
2024-05-22 01:01:53 +03:00
|
|
|
# determine the dimensions selected by the joined data sources
|
2024-05-29 20:04:28 +03:00
|
|
|
for joined_data_source_slug, join in data_source_definition.joins.items():
|
2024-05-22 01:01:53 +03:00
|
|
|
joined_data_source = (
|
|
|
|
MetricsConfigLoader.configs.get_data_source_definition(
|
|
|
|
joined_data_source_slug, self.namespace
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
if joined_data_source.columns_as_dimensions:
|
|
|
|
joined_data_sources.append(joined_data_source)
|
2024-06-03 18:04:06 +03:00
|
|
|
|
|
|
|
date_filter = None
|
|
|
|
if joined_data_source.submission_date_column != "NULL":
|
|
|
|
date_filter = (
|
2024-06-03 20:20:08 +03:00
|
|
|
None
|
2024-06-03 18:04:06 +03:00
|
|
|
if joined_data_source.submission_date_column is None
|
2024-06-03 20:20:08 +03:00
|
|
|
or joined_data_source.submission_date_column == "NULL"
|
2024-06-03 18:04:06 +03:00
|
|
|
else f"{joined_data_source.submission_date_column} = '2023-01-01'"
|
|
|
|
)
|
|
|
|
|
2024-05-22 01:01:53 +03:00
|
|
|
# create Looker dimensions by doing a dryrun
|
|
|
|
query = MetricsConfigLoader.configs.get_data_source_sql(
|
|
|
|
joined_data_source_slug,
|
|
|
|
self.namespace,
|
2024-06-03 18:04:06 +03:00
|
|
|
where=date_filter,
|
2024-05-22 01:01:53 +03:00
|
|
|
).format(dataset=self.namespace)
|
2024-02-13 01:55:11 +03:00
|
|
|
|
2024-05-22 01:01:53 +03:00
|
|
|
base_view_dimensions[joined_data_source_slug] = (
|
2024-07-29 19:24:50 +03:00
|
|
|
lookml_utils._generate_dimensions_from_query(
|
|
|
|
query, dryrun=dryrun
|
|
|
|
)
|
2024-05-22 01:01:53 +03:00
|
|
|
)
|
2024-06-03 18:04:06 +03:00
|
|
|
|
|
|
|
if (
|
2024-05-22 01:01:53 +03:00
|
|
|
data_source_definition.client_id_column == "NULL"
|
|
|
|
or data_source_definition.columns_as_dimensions
|
|
|
|
):
|
|
|
|
# if the metrics data source doesn't have any joins then use the dimensions
|
|
|
|
# of the data source itself as base fields
|
2024-06-03 18:04:06 +03:00
|
|
|
date_filter = None
|
|
|
|
if data_source_definition.submission_date_column != "NULL":
|
|
|
|
date_filter = (
|
2024-05-22 01:01:53 +03:00
|
|
|
"submission_date = '2023-01-01'"
|
|
|
|
if data_source_definition.submission_date_column is None
|
|
|
|
else f"{data_source_definition.submission_date_column} = '2023-01-01'"
|
2024-06-03 18:04:06 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
query = MetricsConfigLoader.configs.get_data_source_sql(
|
|
|
|
data_source_definition.name,
|
|
|
|
self.namespace,
|
|
|
|
where=date_filter,
|
2024-06-03 20:20:08 +03:00
|
|
|
ignore_joins=True,
|
2024-05-22 01:01:53 +03:00
|
|
|
).format(dataset=self.namespace)
|
|
|
|
|
|
|
|
base_view_dimensions[data_source_definition.name] = (
|
2024-08-06 19:45:55 +03:00
|
|
|
lookml_utils._generate_dimensions_from_query(query, dryrun)
|
2024-03-04 21:46:59 +03:00
|
|
|
)
|
2024-05-22 01:01:53 +03:00
|
|
|
|
|
|
|
# prepare base field data for query
|
|
|
|
base_view_fields = [
|
|
|
|
{
|
|
|
|
"name": f"{data_source}_{dimension['name']}",
|
|
|
|
"select_sql": f"{data_source}_{dimension['name']},\n",
|
|
|
|
"sql": f"{data_source}.{dimension['name'].replace('__', '.')} AS {data_source}_{dimension['name']},\n",
|
|
|
|
}
|
|
|
|
for data_source, dimensions in base_view_dimensions.items()
|
|
|
|
for dimension in dimensions
|
|
|
|
if dimension["name"] not in ignore_base_fields
|
|
|
|
and "hidden" not in dimension
|
|
|
|
and dimension["type"] != "time"
|
|
|
|
]
|
2024-02-13 01:55:11 +03:00
|
|
|
|
2024-03-04 21:46:59 +03:00
|
|
|
client_id_field = (
|
|
|
|
"NULL"
|
|
|
|
if data_source_definition.client_id_column == "NULL"
|
2024-05-22 01:01:53 +03:00
|
|
|
else f'{data_source_definition.client_id_column or "client_id"}'
|
|
|
|
)
|
|
|
|
|
|
|
|
# filters for date ranges
|
|
|
|
where_sql = " AND ".join(
|
|
|
|
[
|
|
|
|
f"""
|
|
|
|
{data_source.name}.{data_source.submission_date_column or "submission_date"}
|
|
|
|
BETWEEN
|
|
|
|
COALESCE(
|
|
|
|
SAFE_CAST(
|
2024-05-22 21:32:56 +03:00
|
|
|
{{% date_start submission_date %}} AS DATE
|
2024-05-22 01:01:53 +03:00
|
|
|
), CURRENT_DATE()) AND
|
|
|
|
COALESCE(
|
|
|
|
SAFE_CAST(
|
2024-05-22 21:32:56 +03:00
|
|
|
{{% date_end submission_date %}} AS DATE
|
2024-05-22 01:01:53 +03:00
|
|
|
), CURRENT_DATE())
|
|
|
|
"""
|
|
|
|
for data_source in [data_source_definition] + joined_data_sources
|
|
|
|
if data_source.submission_date_column != "NULL"
|
|
|
|
]
|
2024-03-04 21:46:59 +03:00
|
|
|
)
|
2024-05-22 01:01:53 +03:00
|
|
|
|
|
|
|
# filte on sample_id if such a field exists
|
|
|
|
for field in base_view_fields:
|
|
|
|
if field["name"].endswith("_sample_id"):
|
|
|
|
where_sql += f"""
|
|
|
|
AND
|
|
|
|
{field['name'].split('_sample_id')[0]}.sample_id < {{% parameter sampling %}}
|
|
|
|
"""
|
|
|
|
break
|
|
|
|
|
2023-09-08 22:09:06 +03:00
|
|
|
view_defn["derived_table"] = {
|
|
|
|
"sql": f"""
|
2024-02-13 01:55:11 +03:00
|
|
|
SELECT
|
|
|
|
{"".join(metric_definitions)}
|
2024-05-22 01:01:53 +03:00
|
|
|
{"".join([field['select_sql'] for field in base_view_fields])}
|
2024-03-04 21:46:59 +03:00
|
|
|
{client_id_field} AS client_id,
|
2024-02-13 01:55:11 +03:00
|
|
|
{{% if aggregate_metrics_by._parameter_value == 'day' %}}
|
2024-05-22 01:01:53 +03:00
|
|
|
{data_source_definition.submission_date_column or "submission_date"} AS analysis_basis
|
2024-02-13 01:55:11 +03:00
|
|
|
{{% elsif aggregate_metrics_by._parameter_value == 'week' %}}
|
|
|
|
(FORMAT_DATE(
|
|
|
|
'%F',
|
2024-05-22 01:01:53 +03:00
|
|
|
DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"},
|
2024-02-13 01:55:11 +03:00
|
|
|
WEEK(MONDAY)))
|
|
|
|
) AS analysis_basis
|
|
|
|
{{% elsif aggregate_metrics_by._parameter_value == 'month' %}}
|
|
|
|
(FORMAT_DATE(
|
|
|
|
'%Y-%m',
|
2024-05-22 01:01:53 +03:00
|
|
|
{data_source_definition.submission_date_column or "submission_date"})
|
2024-02-13 01:55:11 +03:00
|
|
|
) AS analysis_basis
|
|
|
|
{{% elsif aggregate_metrics_by._parameter_value == 'quarter' %}}
|
|
|
|
(FORMAT_DATE(
|
|
|
|
'%Y-%m',
|
2024-05-22 01:01:53 +03:00
|
|
|
DATE_TRUNC({data_source_definition.submission_date_column or "submission_date"},
|
2024-02-13 01:55:11 +03:00
|
|
|
QUARTER))
|
|
|
|
) AS analysis_basis
|
|
|
|
{{% elsif aggregate_metrics_by._parameter_value == 'year' %}}
|
|
|
|
(EXTRACT(
|
2024-05-22 01:01:53 +03:00
|
|
|
YEAR FROM {data_source_definition.submission_date_column or "submission_date"})
|
2024-02-13 01:55:11 +03:00
|
|
|
) AS analysis_basis
|
|
|
|
{{% else %}}
|
|
|
|
NULL as analysis_basis
|
|
|
|
{{% endif %}}
|
|
|
|
FROM
|
2024-05-22 01:01:53 +03:00
|
|
|
(
|
|
|
|
SELECT
|
|
|
|
{data_source_name}.*,
|
|
|
|
{"".join([field['sql'] for field in base_view_fields])}
|
|
|
|
FROM
|
|
|
|
{
|
|
|
|
MetricsConfigLoader.configs.get_data_source_sql(
|
|
|
|
data_source_name,
|
|
|
|
self.namespace,
|
|
|
|
select_fields=False
|
|
|
|
).format(dataset=self.namespace)
|
|
|
|
}
|
|
|
|
WHERE {where_sql}
|
|
|
|
)
|
2024-02-13 01:55:11 +03:00
|
|
|
GROUP BY
|
2024-05-22 01:01:53 +03:00
|
|
|
{"".join([field['select_sql'] for field in base_view_fields])}
|
2023-09-08 22:09:06 +03:00
|
|
|
client_id,
|
2024-02-13 01:55:11 +03:00
|
|
|
analysis_basis
|
2023-09-08 22:09:06 +03:00
|
|
|
"""
|
|
|
|
}
|
2024-02-13 01:55:11 +03:00
|
|
|
|
2023-09-08 22:09:06 +03:00
|
|
|
view_defn["dimensions"] = self.get_dimensions()
|
2024-02-20 21:27:22 +03:00
|
|
|
view_defn["dimension_groups"] = self.get_dimension_groups()
|
2024-02-13 01:55:11 +03:00
|
|
|
|
2024-05-22 01:01:53 +03:00
|
|
|
# add the Looker dimensions
|
|
|
|
for data_source, dimensions in base_view_dimensions.items():
|
|
|
|
for dimension in dimensions:
|
|
|
|
if (
|
|
|
|
dimension["name"] not in ignore_base_fields
|
|
|
|
and dimension.get("type", "") != "time"
|
|
|
|
):
|
|
|
|
dimension["sql"] = (
|
|
|
|
"${TABLE}." + f"{data_source}_{dimension['name']}"
|
|
|
|
)
|
2024-02-13 01:55:11 +03:00
|
|
|
dimension["group_label"] = "Base Fields"
|
2024-05-22 01:01:53 +03:00
|
|
|
if not lookml_utils._is_dimension_group(dimension):
|
|
|
|
view_defn["dimensions"].append(dimension)
|
|
|
|
else:
|
|
|
|
view_defn["dimension_groups"].append(dimension)
|
2024-06-04 18:59:29 +03:00
|
|
|
# avoid duplicate dimensions
|
|
|
|
ignore_base_fields.append(dimension["name"])
|
2024-02-20 21:27:22 +03:00
|
|
|
|
2024-02-13 01:55:11 +03:00
|
|
|
view_defn["measures"] = self.get_measures(
|
|
|
|
view_defn["dimensions"],
|
|
|
|
)
|
2023-09-08 22:09:06 +03:00
|
|
|
view_defn["sets"] = self._get_sets()
|
2024-03-04 21:46:59 +03:00
|
|
|
view_defn["parameters"] = self._get_parameters(view_defn["dimensions"])
|
2023-09-08 22:09:06 +03:00
|
|
|
|
|
|
|
return {"views": [view_defn]}
|
|
|
|
|
|
|
|
def get_dimensions(
|
2024-07-29 19:24:50 +03:00
|
|
|
self,
|
|
|
|
_table=None,
|
|
|
|
_v1_name: Optional[str] = None,
|
|
|
|
_dryrun=None,
|
2023-09-08 22:09:06 +03:00
|
|
|
) -> List[Dict[str, Any]]:
|
|
|
|
"""Get the set of dimensions for this view based on the metric definitions in metric-hub."""
|
|
|
|
namespace_definitions = MetricsConfigLoader.configs.get_platform_definitions(
|
|
|
|
self.namespace
|
|
|
|
)
|
|
|
|
metric_definitions = namespace_definitions.metrics.definitions
|
|
|
|
data_source_name = re.sub("^metric_definitions_", "", self.name)
|
|
|
|
|
|
|
|
return [
|
|
|
|
{
|
|
|
|
"name": "client_id",
|
|
|
|
"type": "string",
|
2024-02-13 01:55:11 +03:00
|
|
|
"sql": "SAFE_CAST(${TABLE}.client_id AS STRING)",
|
2023-09-08 22:09:06 +03:00
|
|
|
"label": "Client ID",
|
|
|
|
"primary_key": "yes",
|
2024-02-13 01:55:11 +03:00
|
|
|
"group_label": "Base Fields",
|
2023-09-08 22:09:06 +03:00
|
|
|
"description": "Unique client identifier",
|
|
|
|
},
|
|
|
|
] + [ # add a dimension for each metric definition
|
|
|
|
{
|
|
|
|
"name": metric_slug,
|
2024-02-13 01:55:11 +03:00
|
|
|
"group_label": "Metrics",
|
2023-09-08 22:09:06 +03:00
|
|
|
"label": metric.friendly_name
|
|
|
|
or lookml_utils.slug_to_title(metric_slug),
|
|
|
|
"description": metric.description or "",
|
|
|
|
"type": "number",
|
|
|
|
"sql": "${TABLE}." + metric_slug,
|
|
|
|
}
|
|
|
|
for metric_slug, metric in metric_definitions.items()
|
2024-02-13 01:55:11 +03:00
|
|
|
if metric.select_expression
|
|
|
|
and metric.data_source.name == data_source_name
|
|
|
|
and metric.type != "histogram"
|
2023-09-08 22:09:06 +03:00
|
|
|
]
|
|
|
|
|
|
|
|
def get_dimension_groups(self) -> List[Dict[str, Any]]:
|
|
|
|
"""Get dimension groups for this view."""
|
|
|
|
return [
|
|
|
|
{
|
|
|
|
"name": "submission",
|
|
|
|
"type": "time",
|
2024-02-13 01:55:11 +03:00
|
|
|
"group_label": "Base Fields",
|
|
|
|
"sql": "CAST(${TABLE}.analysis_basis AS TIMESTAMP)",
|
2023-09-08 22:09:06 +03:00
|
|
|
"label": "Submission",
|
|
|
|
"timeframes": [
|
|
|
|
"raw",
|
|
|
|
"date",
|
|
|
|
"week",
|
|
|
|
"month",
|
|
|
|
"quarter",
|
|
|
|
"year",
|
|
|
|
],
|
|
|
|
}
|
|
|
|
]
|
|
|
|
|
|
|
|
def _get_sets(self) -> List[Dict[str, Any]]:
|
|
|
|
"""Generate metric sets."""
|
|
|
|
# group all the metric dimensions into a set
|
|
|
|
dimensions = self.get_dimensions()
|
2024-02-13 01:55:11 +03:00
|
|
|
measures = self.get_measures(dimensions)
|
2023-09-08 22:09:06 +03:00
|
|
|
|
|
|
|
return [
|
|
|
|
{
|
|
|
|
"name": "metrics",
|
|
|
|
"fields": [
|
|
|
|
dimension["name"]
|
|
|
|
for dimension in dimensions
|
|
|
|
if dimension["name"] != "client_id"
|
2024-02-13 01:55:11 +03:00
|
|
|
]
|
|
|
|
+ [measure["name"] for measure in measures],
|
|
|
|
}
|
|
|
|
]
|
|
|
|
|
2024-03-04 21:46:59 +03:00
|
|
|
def _get_parameters(self, dimensions: List[dict]):
|
|
|
|
hide_sampling = "yes"
|
|
|
|
|
|
|
|
for dim in dimensions:
|
|
|
|
if dim["name"] == "sample_id":
|
|
|
|
hide_sampling = "no"
|
|
|
|
break
|
|
|
|
|
2024-02-13 01:55:11 +03:00
|
|
|
return [
|
|
|
|
{
|
|
|
|
"name": "aggregate_metrics_by",
|
|
|
|
"label": "Aggregate Client Metrics Per",
|
|
|
|
"type": "unquoted",
|
|
|
|
"default_value": "day",
|
|
|
|
"allowed_values": [
|
|
|
|
{"label": "Per Day", "value": "day"},
|
|
|
|
{"label": "Per Week", "value": "week"},
|
|
|
|
{"label": "Per Month", "value": "month"},
|
|
|
|
{"label": "Per Quarter", "value": "quarter"},
|
|
|
|
{"label": "Per Year", "value": "year"},
|
|
|
|
{"label": "Overall", "value": "overall"},
|
2023-09-08 22:09:06 +03:00
|
|
|
],
|
2024-03-04 21:46:59 +03:00
|
|
|
},
|
|
|
|
{
|
|
|
|
"name": "sampling",
|
|
|
|
"label": "Sample of source data in %",
|
|
|
|
"type": "unquoted",
|
|
|
|
"default_value": "100",
|
|
|
|
"hidden": hide_sampling,
|
|
|
|
},
|
2023-09-08 22:09:06 +03:00
|
|
|
]
|
2024-02-13 01:55:11 +03:00
|
|
|
|
|
|
|
def get_measures(
|
|
|
|
self, dimensions: List[dict]
|
|
|
|
) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
|
|
|
|
"""Get statistics as measures."""
|
|
|
|
measures = []
|
2024-03-04 21:46:59 +03:00
|
|
|
sampling = "1"
|
|
|
|
|
|
|
|
for dim in dimensions:
|
|
|
|
if dim["name"] == "sample_id":
|
|
|
|
sampling = "100 / {% parameter sampling %}"
|
|
|
|
break
|
|
|
|
|
2024-02-13 01:55:11 +03:00
|
|
|
for dimension in dimensions:
|
|
|
|
metric = MetricsConfigLoader.configs.get_metric_definition(
|
|
|
|
dimension["name"], self.namespace
|
|
|
|
)
|
|
|
|
if metric and metric.statistics:
|
2024-03-04 21:46:59 +03:00
|
|
|
for statistic_slug, statistic_conf in metric.statistics.items():
|
2024-05-22 01:01:53 +03:00
|
|
|
dimension_label = dimension.get("label") or dimension.get("name")
|
2024-03-04 21:46:59 +03:00
|
|
|
if statistic_slug in [
|
|
|
|
"average",
|
|
|
|
"max",
|
|
|
|
"min",
|
|
|
|
"median",
|
|
|
|
]:
|
2024-02-13 01:55:11 +03:00
|
|
|
measures.append(
|
|
|
|
{
|
|
|
|
"name": f"{dimension['name']}_{statistic_slug}",
|
2024-03-04 21:46:59 +03:00
|
|
|
"type": statistic_slug,
|
2024-02-13 01:55:11 +03:00
|
|
|
"sql": "${TABLE}." + dimension["name"],
|
2024-05-22 01:01:53 +03:00
|
|
|
"label": f"{dimension_label} {statistic_slug.title()}",
|
2024-03-04 21:46:59 +03:00
|
|
|
"group_label": "Statistics",
|
2024-05-22 01:01:53 +03:00
|
|
|
"description": f"{statistic_slug.title()} of {dimension_label}",
|
2024-03-04 21:46:59 +03:00
|
|
|
}
|
|
|
|
)
|
|
|
|
elif statistic_slug == "sum":
|
|
|
|
measures.append(
|
|
|
|
{
|
|
|
|
"name": f"{dimension['name']}_{statistic_slug}",
|
|
|
|
"type": "sum",
|
|
|
|
"sql": "${TABLE}." + dimension["name"] + "*" + sampling,
|
2024-05-22 01:01:53 +03:00
|
|
|
"label": f"{dimension_label} Sum",
|
2024-02-13 01:55:11 +03:00
|
|
|
"group_label": "Statistics",
|
2024-05-22 01:01:53 +03:00
|
|
|
"description": f"Sum of {dimension_label}",
|
2024-02-13 01:55:11 +03:00
|
|
|
}
|
|
|
|
)
|
|
|
|
elif statistic_slug == "client_count":
|
|
|
|
measures.append(
|
|
|
|
{
|
2024-03-20 01:43:59 +03:00
|
|
|
"name": (
|
|
|
|
f"{dimension['name']}_{statistic_slug}_sampled"
|
|
|
|
if sampling
|
|
|
|
else f"{dimension['name']}_{statistic_slug}"
|
|
|
|
),
|
2024-02-13 01:55:11 +03:00
|
|
|
"type": "count_distinct",
|
2024-05-22 01:01:53 +03:00
|
|
|
"label": f"{dimension_label} Client Count",
|
2024-02-13 01:55:11 +03:00
|
|
|
"group_label": "Statistics",
|
2024-03-05 00:05:16 +03:00
|
|
|
"sql": "IF(${TABLE}."
|
|
|
|
+ f"{dimension['name']} > 0, "
|
2024-02-13 01:55:11 +03:00
|
|
|
+ "${TABLE}.client_id, SAFE_CAST(NULL AS STRING))",
|
2024-05-22 01:01:53 +03:00
|
|
|
"description": f"Number of clients with {dimension_label}",
|
2024-03-04 21:46:59 +03:00
|
|
|
"hidden": "yes" if sampling else "no",
|
2024-02-13 01:55:11 +03:00
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2024-03-04 21:46:59 +03:00
|
|
|
if sampling:
|
|
|
|
measures.append(
|
|
|
|
{
|
|
|
|
"name": f"{dimension['name']}_{statistic_slug}",
|
|
|
|
"type": "number",
|
2024-05-22 01:01:53 +03:00
|
|
|
"label": f"{dimension_label} Client Count",
|
2024-03-04 21:46:59 +03:00
|
|
|
"group_label": "Statistics",
|
|
|
|
"sql": "${"
|
|
|
|
+ f"{dimension['name']}_{statistic_slug}_sampled"
|
|
|
|
+ "} *"
|
|
|
|
+ sampling,
|
2024-05-22 01:01:53 +03:00
|
|
|
"description": f"Number of clients with {dimension_label}",
|
2024-03-04 21:46:59 +03:00
|
|
|
}
|
|
|
|
)
|
|
|
|
elif statistic_slug == "dau_proportion":
|
|
|
|
if "numerator" in statistic_conf:
|
|
|
|
[numerator, numerator_stat] = statistic_conf[
|
|
|
|
"numerator"
|
|
|
|
].split(".")
|
|
|
|
measures.append(
|
|
|
|
{
|
|
|
|
"name": "DAU_sampled" if sampling else "DAU",
|
|
|
|
"type": "count_distinct",
|
|
|
|
"label": "DAU",
|
|
|
|
"group_label": "Statistics",
|
|
|
|
"sql": "${TABLE}.client_id",
|
|
|
|
"hidden": "yes",
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
if sampling:
|
|
|
|
measures.append(
|
|
|
|
{
|
|
|
|
"name": "DAU",
|
|
|
|
"type": "number",
|
|
|
|
"label": "DAU",
|
|
|
|
"group_label": "Statistics",
|
|
|
|
"sql": "${DAU_sampled} *" + sampling,
|
|
|
|
"hidden": "yes",
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
measures.append(
|
|
|
|
{
|
|
|
|
"name": f"{dimension['name']}_{statistic_slug}",
|
|
|
|
"type": "number",
|
2024-05-22 01:01:53 +03:00
|
|
|
"label": f"{dimension_label} DAU Proportion",
|
2024-03-04 21:46:59 +03:00
|
|
|
"sql": "SAFE_DIVIDE(${"
|
|
|
|
+ f"{numerator}_{numerator_stat}"
|
|
|
|
+ "}, ${DAU})",
|
|
|
|
"group_label": "Statistics",
|
|
|
|
"description": f"Proportion of daily active users with {dimension['name']}",
|
|
|
|
}
|
|
|
|
)
|
|
|
|
elif statistic_slug == "ratio":
|
|
|
|
if (
|
|
|
|
"numerator" in statistic_conf
|
|
|
|
and "denominator" in statistic_conf
|
|
|
|
):
|
|
|
|
[numerator, numerator_stat] = statistic_conf[
|
|
|
|
"numerator"
|
|
|
|
].split(".")
|
|
|
|
[denominator, denominator_stat] = statistic_conf[
|
|
|
|
"denominator"
|
|
|
|
].split(".")
|
|
|
|
|
|
|
|
measures.append(
|
|
|
|
{
|
|
|
|
"name": f"{dimension['name']}_{statistic_slug}",
|
|
|
|
"type": "number",
|
2024-05-22 01:01:53 +03:00
|
|
|
"label": f"{dimension_label} Ratio",
|
2024-03-04 21:46:59 +03:00
|
|
|
"sql": "SAFE_DIVIDE(${"
|
|
|
|
+ f"{numerator}_{numerator_stat}"
|
|
|
|
+ "}, ${"
|
|
|
|
+ f"{denominator}_{denominator_stat}"
|
|
|
|
+ "})",
|
|
|
|
"group_label": "Statistics",
|
|
|
|
"description": f""""
|
|
|
|
Ratio between {statistic_conf['numerator']} and
|
|
|
|
{statistic_conf['denominator']}""",
|
|
|
|
}
|
|
|
|
)
|
2024-05-29 20:04:28 +03:00
|
|
|
elif statistic_slug == "rolling_average":
|
|
|
|
aggregation = statistic_conf.get("aggregation", "sum")
|
|
|
|
if "window_sizes" in statistic_conf:
|
|
|
|
for window_size in statistic_conf["window_sizes"]:
|
|
|
|
measures.append(
|
|
|
|
{
|
|
|
|
"name": f"{dimension['name']}_{window_size}_day_{statistic_slug}",
|
|
|
|
"type": "number",
|
|
|
|
"label": f"{dimension_label} {window_size} Day Rolling Average",
|
|
|
|
"sql": f"""
|
|
|
|
AVG({aggregation}(${{TABLE}}.{dimension["name"]} * {sampling})) OVER (
|
|
|
|
ROWS {window_size} PRECEDING
|
|
|
|
)""",
|
|
|
|
"group_label": "Statistics",
|
|
|
|
"description": f"{window_size} day rolling average of {dimension_label}",
|
|
|
|
}
|
|
|
|
)
|
2024-03-04 21:46:59 +03:00
|
|
|
|
2024-02-13 01:55:11 +03:00
|
|
|
return measures
|