Add unnested labeled counter to glean ping explores (#139)
* Add unnested label view
* Add hidden dimension for labeled counter (probably unnecessary)
* Add measures to separate view (broken because of circular dependencies)
* Move unnested view logic into glean ping directly
* Add explore logic to expose join views in explore
* Add test for labeled counter view
* Add description to view
* Break gigantic test for lookml into smaller tests
* Remove extra semicolons and description from view
* Remove duplicate values from explore/view
* Update _to_lookml for API changes
* Add looker suggest explores
* Update tests for suggest explores and new labels
* Use client id as primary key for joins
* Join using document_id as primary key
* Update tests for document id as primary key
* Fix description from rebase
* Address review: longer time period for labels
* Remove document id primary key
* Revert "Remove document id primary key"
This reverts commit 242913c241
.
* Sort views in the correct order
* Reduce suggest time to 30 days
This commit is contained in:
Родитель
4e4e24f4db
Коммит
d54af3a23a
|
@ -26,12 +26,50 @@ class GleanPingExplore(PingExplore):
|
|||
}
|
||||
# collapse whitespace in the description so the lookml looks a little better
|
||||
ping_description = " ".join(ping_descriptions[self.name].split())
|
||||
# insert the description in
|
||||
lookml = super()._to_lookml(v1_name)
|
||||
lookml[0][
|
||||
"description"
|
||||
] = f"Explore for the {self.name} ping. {ping_description}"
|
||||
return lookml
|
||||
|
||||
views_lookml = self.get_view_lookml(self.views["base_view"])
|
||||
|
||||
# The first view, by convention, is always the base view with the
|
||||
# majority of the dimensions from the top level.
|
||||
base = views_lookml["views"][0]
|
||||
base_name = base["name"]
|
||||
|
||||
joins = []
|
||||
for view in views_lookml["views"][1:]:
|
||||
if view["name"].startswith("suggest__"):
|
||||
continue
|
||||
view_name = view["name"]
|
||||
metric = "__".join(view["name"].split("__")[1:])
|
||||
joins.append(
|
||||
{
|
||||
"name": view_name,
|
||||
"relationship": "one_to_many",
|
||||
"sql": (
|
||||
f"LEFT JOIN UNNEST(${{{base_name}.{metric}}}) AS {view_name} "
|
||||
f"ON ${{{base_name}.document_id}} = ${{{view_name}.document_id}}"
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
base_explore = {
|
||||
"name": self.name,
|
||||
# list the base explore first by prefixing with a space
|
||||
"view_label": f" {self.name.title()}",
|
||||
"description": f"Explore for the {self.name} ping. {ping_description}",
|
||||
"view_name": self.views["base_view"],
|
||||
"always_filter": {
|
||||
"filters": self.get_required_filters("base_view"),
|
||||
},
|
||||
"joins": joins,
|
||||
}
|
||||
|
||||
suggests = []
|
||||
for view in views_lookml["views"][1:]:
|
||||
if not view["name"].startswith("suggest__"):
|
||||
continue
|
||||
suggests.append({"name": view["name"], "hidden": "yes"})
|
||||
|
||||
return [base_explore] + suggests
|
||||
|
||||
@staticmethod
|
||||
def from_views(views: List[View]) -> Iterator[PingExplore]:
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
"""Class to describe a Glean Ping View."""
|
||||
import logging
|
||||
from collections import Counter
|
||||
from textwrap import dedent
|
||||
from typing import Any, Dict, Iterable, List, Optional, Union
|
||||
|
||||
import click
|
||||
|
@ -19,6 +20,7 @@ DISTRIBUTION_TYPES = {
|
|||
ALLOWED_TYPES = DISTRIBUTION_TYPES | {
|
||||
"boolean",
|
||||
"counter",
|
||||
"labeled_counter",
|
||||
"datetime",
|
||||
"jwe",
|
||||
"quantity",
|
||||
|
@ -35,6 +37,104 @@ class GleanPingView(PingView):
|
|||
type: str = "glean_ping_view"
|
||||
allow_glean: bool = True
|
||||
|
||||
def to_lookml(self, bq_client, v1_name: Optional[str]) -> Dict[str, Any]:
|
||||
"""Generate LookML for this view.
|
||||
|
||||
The Glean views include a labeled metrics, which need to be joined
|
||||
against the view in the explore.
|
||||
"""
|
||||
lookml = super().to_lookml(bq_client, v1_name)
|
||||
|
||||
# iterate over all of the glean metrics and generate views for unnested
|
||||
# fields as necessary. Append them to the list of existing view
|
||||
# definitions.
|
||||
table = next(
|
||||
(table for table in self.tables if table.get("channel") == "release"),
|
||||
self.tables[0],
|
||||
)["table"]
|
||||
dimensions = self.get_dimensions(bq_client, table, v1_name)
|
||||
|
||||
client_id_field = self._get_client_id(dimensions, table)
|
||||
|
||||
view_definitions = []
|
||||
metrics = self._get_glean_metrics(v1_name)
|
||||
for metric in metrics:
|
||||
if metric.type == "labeled_counter":
|
||||
looker_name = self._to_looker_name(metric)
|
||||
view_name = f"{self.name}__{looker_name}"
|
||||
suggest_name = f"suggest__{view_name}"
|
||||
join_view = {
|
||||
"name": view_name,
|
||||
"label": (
|
||||
"_".join(looker_name.split("__")[1:]).replace("_", " ").title()
|
||||
),
|
||||
"dimensions": [
|
||||
{
|
||||
"name": "document_id",
|
||||
"type": "string",
|
||||
"sql": f"${{{self.name}.document_id}}",
|
||||
"primary_key": "yes",
|
||||
"hidden": "yes",
|
||||
},
|
||||
{
|
||||
"name": "key",
|
||||
"type": "string",
|
||||
"sql": "${TABLE}.key",
|
||||
"suggest_explore": suggest_name,
|
||||
"suggest_dimension": f"{suggest_name}.key",
|
||||
},
|
||||
{
|
||||
"name": "value",
|
||||
"type": "number",
|
||||
"sql": "${TABLE}.value",
|
||||
"hidden": "yes",
|
||||
},
|
||||
],
|
||||
"measures": [
|
||||
{
|
||||
"name": "count",
|
||||
"type": "sum",
|
||||
"sql": "${value}",
|
||||
},
|
||||
{
|
||||
"name": "client_count",
|
||||
"type": "count_distinct",
|
||||
"sql": f"case when ${{value}} > 0 then ${{{self.name}.{client_id_field}}} end",
|
||||
},
|
||||
],
|
||||
}
|
||||
suggest_view = {
|
||||
"name": suggest_name,
|
||||
"derived_table": {
|
||||
"sql": dedent(
|
||||
f"""
|
||||
select
|
||||
m.key,
|
||||
count(*) as n
|
||||
from {table} as t,
|
||||
unnest(metrics.{metric.type}.{metric.id.replace(".", "_")}) as m
|
||||
where date(submission_timestamp) > date_sub(current_date, interval 30 day)
|
||||
and sample_id = 0
|
||||
group by key
|
||||
order by n desc
|
||||
"""
|
||||
)
|
||||
},
|
||||
"dimensions": [
|
||||
{"name": "key", "type": "string", "sql": "${TABLE}.key"}
|
||||
],
|
||||
}
|
||||
view_definitions += [join_view, suggest_view]
|
||||
# deduplicate view definitions, because somehow a few entries make it in
|
||||
# twice e.g. metrics__metrics__labeled_counter__media_audio_init_failure
|
||||
view_definitions = sorted(
|
||||
{v["name"]: v for v in view_definitions}.values(), key=lambda x: x["name"] # type: ignore
|
||||
)
|
||||
|
||||
lookml["views"] += view_definitions
|
||||
|
||||
return lookml
|
||||
|
||||
def _get_links(self, dimension: dict) -> List[Dict[str, str]]:
|
||||
"""Get a link annotation given a metric name."""
|
||||
name = self._get_name(dimension)
|
||||
|
@ -83,20 +183,30 @@ class GleanPingView(PingView):
|
|||
|
||||
return ping_probes
|
||||
|
||||
def _to_looker_name(self, metric: GleanProbe, suffix: str = "") -> str:
|
||||
"""Convert a glean probe into a looker name."""
|
||||
*category, name = metric.id.split(".")
|
||||
category = "_".join(category)
|
||||
|
||||
sep = "" if not category else "_"
|
||||
label = name
|
||||
looker_name = f"metrics__{metric.type}__{category}{sep}{label}"
|
||||
if suffix:
|
||||
looker_name = f"{looker_name}__{suffix}"
|
||||
return looker_name
|
||||
|
||||
def _make_dimension(
|
||||
self, metric: GleanProbe, suffix: str, sql_map: Dict[str, Dict[str, str]]
|
||||
) -> Optional[Dict[str, Union[str, List[Dict[str, str]]]]]:
|
||||
*category, name = metric.id.split(".")
|
||||
category = "_".join(category)
|
||||
|
||||
sep = "" if not category else "_"
|
||||
label = name
|
||||
sep = "_"
|
||||
if not category:
|
||||
sep = ""
|
||||
looker_name = f"metrics__{metric.type}__{category}{sep}{name}"
|
||||
if suffix:
|
||||
label = f"{name}_{suffix}"
|
||||
looker_name = f"metrics__{metric.type}__{category}{sep}{name}__{suffix}"
|
||||
looker_name = f"{looker_name}__{suffix}"
|
||||
|
||||
if looker_name not in sql_map:
|
||||
return None
|
||||
|
@ -127,6 +237,13 @@ class GleanPingView(PingView):
|
|||
],
|
||||
}
|
||||
|
||||
# remove some elements from the definition if we're handling a labeled
|
||||
# counter, as an initial join dimension
|
||||
if metric.type == "labeled_counter":
|
||||
# this field is not used since labeled counters are maps
|
||||
del lookml["type"]
|
||||
lookml["hidden"] = "yes"
|
||||
|
||||
if metric.description:
|
||||
lookml["description"] = metric.description
|
||||
|
||||
|
@ -174,11 +291,14 @@ class GleanPingView(PingView):
|
|||
) -> List[Dict[str, Any]]:
|
||||
"""Get the set of dimensions for this view."""
|
||||
all_fields = super().get_dimensions(bq_client, table, v1_name)
|
||||
return self._get_glean_metric_dimensions(all_fields, v1_name) + [
|
||||
fields = self._get_glean_metric_dimensions(all_fields, v1_name) + [
|
||||
self._add_link(d)
|
||||
for d in all_fields
|
||||
if not d["name"].startswith("metrics__")
|
||||
]
|
||||
# later entries will override earlier entries, if there are duplicates
|
||||
field_dict = {f["name"]: f for f in fields}
|
||||
return list(field_dict.values())
|
||||
|
||||
def get_measures(
|
||||
self, dimensions: List[dict], table: str, v1_name: Optional[str]
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from itertools import filterfalse
|
||||
from typing import Any, Dict, Iterator, List, Optional, Union
|
||||
|
||||
import click
|
||||
|
@ -69,12 +68,16 @@ class PingView(View):
|
|||
)["table"]
|
||||
|
||||
dimensions = self.get_dimensions(bq_client, table, v1_name)
|
||||
view_defn["dimensions"] = list(
|
||||
filterfalse(lookml_utils._is_dimension_group, dimensions)
|
||||
)
|
||||
view_defn["dimension_groups"] = list(
|
||||
filter(lookml_utils._is_dimension_group, dimensions)
|
||||
)
|
||||
|
||||
# set document id field as a primary key for joins
|
||||
view_defn["dimensions"] = [
|
||||
d if d["name"] != "document_id" else dict(**d, primary_key="yes")
|
||||
for d in dimensions
|
||||
if not lookml_utils._is_dimension_group(d)
|
||||
]
|
||||
view_defn["dimension_groups"] = [
|
||||
d for d in dimensions if lookml_utils._is_dimension_group(d)
|
||||
]
|
||||
|
||||
# add measures
|
||||
view_defn["measures"] = self.get_measures(dimensions, table, v1_name)
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import contextlib
|
||||
from pathlib import Path
|
||||
from textwrap import dedent
|
||||
from unittest.mock import Mock, patch
|
||||
|
@ -153,6 +154,15 @@ class MockClient:
|
|||
SchemaField("no_category_counter", "INTEGER"),
|
||||
],
|
||||
),
|
||||
SchemaField(
|
||||
"labeled_counter",
|
||||
"RECORD",
|
||||
"REPEATED",
|
||||
fields=[
|
||||
SchemaField("key", "STRING"),
|
||||
SchemaField("value", "INTEGER"),
|
||||
],
|
||||
),
|
||||
SchemaField(
|
||||
"custom_distribution",
|
||||
"RECORD",
|
||||
|
@ -445,6 +455,14 @@ def msg_glean_probes():
|
|||
"test.counter",
|
||||
{"type": "counter", "history": history_with_descr, "name": "test.counter"},
|
||||
),
|
||||
GleanProbe(
|
||||
"test.labeled_counter",
|
||||
{
|
||||
"type": "labeled_counter",
|
||||
"history": history_with_descr,
|
||||
"name": "test.labeled_counter",
|
||||
},
|
||||
),
|
||||
GleanProbe(
|
||||
"no_category_counter",
|
||||
{"type": "counter", "history": history, "name": "no_category_counter"},
|
||||
|
@ -516,9 +534,8 @@ def msg_glean_probes():
|
|||
]
|
||||
|
||||
|
||||
@patch("generator.views.glean_ping_view.GleanPing")
|
||||
@patch("generator.explores.glean_ping_explore.GleanPing")
|
||||
def test_lookml_actual(
|
||||
@contextlib.contextmanager
|
||||
def _prepare_lookml_actual_test(
|
||||
mock_glean_ping_view,
|
||||
mock_glean_ping_explore,
|
||||
runner,
|
||||
|
@ -598,6 +615,27 @@ def test_lookml_actual(
|
|||
with runner.isolated_filesystem():
|
||||
with patch("google.cloud.bigquery.Client", MockClient):
|
||||
_lookml(open(namespaces), glean_apps, "looker-hub/")
|
||||
yield namespaces_text
|
||||
|
||||
|
||||
@patch("generator.views.glean_ping_view.GleanPing")
|
||||
@patch("generator.explores.glean_ping_explore.GleanPing")
|
||||
def test_lookml_actual_baseline_view(
|
||||
mock_glean_ping_view,
|
||||
mock_glean_ping_explore,
|
||||
runner,
|
||||
glean_apps,
|
||||
tmp_path,
|
||||
msg_glean_probes,
|
||||
):
|
||||
with _prepare_lookml_actual_test(
|
||||
mock_glean_ping_view,
|
||||
mock_glean_ping_explore,
|
||||
runner,
|
||||
glean_apps,
|
||||
tmp_path,
|
||||
msg_glean_probes,
|
||||
) as namespaces_text:
|
||||
expected = {
|
||||
"views": [
|
||||
{
|
||||
|
@ -619,6 +657,7 @@ def test_lookml_actual(
|
|||
"name": "document_id",
|
||||
"hidden": "yes",
|
||||
"sql": "${TABLE}.document_id",
|
||||
"primary_key": "yes",
|
||||
},
|
||||
],
|
||||
"measures": [
|
||||
|
@ -640,6 +679,26 @@ def test_lookml_actual(
|
|||
lkml.load(Path("looker-hub/custom/views/baseline.view.lkml").read_text()),
|
||||
)
|
||||
print_and_test(namespaces_text, open(Path("looker-hub/namespaces.yaml")).read())
|
||||
|
||||
|
||||
@patch("generator.views.glean_ping_view.GleanPing")
|
||||
@patch("generator.explores.glean_ping_explore.GleanPing")
|
||||
def test_lookml_actual_baseline_view_parameterized(
|
||||
mock_glean_ping_view,
|
||||
mock_glean_ping_explore,
|
||||
runner,
|
||||
glean_apps,
|
||||
tmp_path,
|
||||
msg_glean_probes,
|
||||
):
|
||||
with _prepare_lookml_actual_test(
|
||||
mock_glean_ping_view,
|
||||
mock_glean_ping_explore,
|
||||
runner,
|
||||
glean_apps,
|
||||
tmp_path,
|
||||
msg_glean_probes,
|
||||
):
|
||||
expected = {
|
||||
"views": [
|
||||
{
|
||||
|
@ -802,6 +861,26 @@ def test_lookml_actual(
|
|||
Path("looker-hub/glean-app/views/baseline.view.lkml").read_text()
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@patch("generator.views.glean_ping_view.GleanPing")
|
||||
@patch("generator.explores.glean_ping_explore.GleanPing")
|
||||
def test_lookml_actual_metrics_view(
|
||||
mock_glean_ping_view,
|
||||
mock_glean_ping_explore,
|
||||
runner,
|
||||
glean_apps,
|
||||
tmp_path,
|
||||
msg_glean_probes,
|
||||
):
|
||||
with _prepare_lookml_actual_test(
|
||||
mock_glean_ping_view,
|
||||
mock_glean_ping_explore,
|
||||
runner,
|
||||
glean_apps,
|
||||
tmp_path,
|
||||
msg_glean_probes,
|
||||
):
|
||||
expected = {
|
||||
"views": [
|
||||
{
|
||||
|
@ -1129,13 +1208,86 @@ def test_lookml_actual(
|
|||
"type": "count_distinct",
|
||||
},
|
||||
],
|
||||
}
|
||||
},
|
||||
{
|
||||
"dimensions": [
|
||||
{
|
||||
"hidden": "yes",
|
||||
"name": "document_id",
|
||||
"primary_key": "yes",
|
||||
"sql": "${metrics.document_id}",
|
||||
"type": "string",
|
||||
},
|
||||
{
|
||||
"name": "key",
|
||||
"sql": "${TABLE}.key",
|
||||
"suggest_dimension": "suggest__metrics__metrics__labeled_counter__test_labeled_counter.key",
|
||||
"suggest_explore": "suggest__metrics__metrics__labeled_counter__test_labeled_counter",
|
||||
"type": "string",
|
||||
},
|
||||
{
|
||||
"hidden": "yes",
|
||||
"name": "value",
|
||||
"sql": "${TABLE}.value",
|
||||
"type": "number",
|
||||
},
|
||||
],
|
||||
"label": "Labeled Counter Test Labeled Counter",
|
||||
"measures": [
|
||||
{"name": "count", "sql": "${value}", "type": "sum"},
|
||||
{
|
||||
"name": "client_count",
|
||||
"sql": "case when ${value} > 0 then "
|
||||
"${metrics.client_info__client_id} end",
|
||||
"type": "count_distinct",
|
||||
},
|
||||
],
|
||||
"name": "metrics__metrics__labeled_counter__test_labeled_counter",
|
||||
},
|
||||
{
|
||||
"derived_table": {
|
||||
"sql": "select\n"
|
||||
" m.key,\n"
|
||||
" count(*) as n\n"
|
||||
"from mozdata.glean_app.metrics as "
|
||||
"t,\n"
|
||||
"unnest(metrics.labeled_counter.test_labeled_counter) as m\n"
|
||||
"where date(submission_timestamp) > date_sub(current_date, interval 30 day)\n"
|
||||
" and sample_id = 0\n"
|
||||
"group by key\n"
|
||||
"order by n desc"
|
||||
},
|
||||
"dimensions": [
|
||||
{"name": "key", "sql": "${TABLE}.key", "type": "string"}
|
||||
],
|
||||
"name": "suggest__metrics__metrics__labeled_counter__test_labeled_counter",
|
||||
},
|
||||
]
|
||||
}
|
||||
print_and_test(
|
||||
expected,
|
||||
lkml.load(Path("looker-hub/glean-app/views/metrics.view.lkml").read_text()),
|
||||
)
|
||||
|
||||
|
||||
@patch("generator.views.glean_ping_view.GleanPing")
|
||||
@patch("generator.explores.glean_ping_explore.GleanPing")
|
||||
def test_lookml_actual_growth_accounting_view(
|
||||
mock_glean_ping_view,
|
||||
mock_glean_ping_explore,
|
||||
runner,
|
||||
glean_apps,
|
||||
tmp_path,
|
||||
msg_glean_probes,
|
||||
):
|
||||
with _prepare_lookml_actual_test(
|
||||
mock_glean_ping_view,
|
||||
mock_glean_ping_explore,
|
||||
runner,
|
||||
glean_apps,
|
||||
tmp_path,
|
||||
msg_glean_probes,
|
||||
):
|
||||
expected = {
|
||||
"views": [
|
||||
{
|
||||
|
@ -1175,6 +1327,25 @@ def test_lookml_actual(
|
|||
),
|
||||
)
|
||||
|
||||
|
||||
@patch("generator.views.glean_ping_view.GleanPing")
|
||||
@patch("generator.explores.glean_ping_explore.GleanPing")
|
||||
def test_lookml_actual_baseline_explore(
|
||||
mock_glean_ping_view,
|
||||
mock_glean_ping_explore,
|
||||
runner,
|
||||
glean_apps,
|
||||
tmp_path,
|
||||
msg_glean_probes,
|
||||
):
|
||||
with _prepare_lookml_actual_test(
|
||||
mock_glean_ping_view,
|
||||
mock_glean_ping_explore,
|
||||
runner,
|
||||
glean_apps,
|
||||
tmp_path,
|
||||
msg_glean_probes,
|
||||
):
|
||||
expected = {
|
||||
"includes": ["/looker-hub/glean-app/views/baseline.view.lkml"],
|
||||
"explores": [
|
||||
|
@ -1182,6 +1353,7 @@ def test_lookml_actual(
|
|||
"name": "baseline",
|
||||
"description": "Explore for the baseline ping. The baseline ping is foo.",
|
||||
"view_name": "baseline",
|
||||
"view_label": " Baseline",
|
||||
"always_filter": {
|
||||
"filters": [
|
||||
{"channel": "mozdata.glean^_app.baseline"},
|
||||
|
@ -1199,6 +1371,25 @@ def test_lookml_actual(
|
|||
),
|
||||
)
|
||||
|
||||
|
||||
@patch("generator.views.glean_ping_view.GleanPing")
|
||||
@patch("generator.explores.glean_ping_explore.GleanPing")
|
||||
def test_lookml_actual_client_counts(
|
||||
mock_glean_ping_view,
|
||||
mock_glean_ping_explore,
|
||||
runner,
|
||||
glean_apps,
|
||||
tmp_path,
|
||||
msg_glean_probes,
|
||||
):
|
||||
with _prepare_lookml_actual_test(
|
||||
mock_glean_ping_view,
|
||||
mock_glean_ping_explore,
|
||||
runner,
|
||||
glean_apps,
|
||||
tmp_path,
|
||||
msg_glean_probes,
|
||||
):
|
||||
expected = {
|
||||
"includes": ["baseline_clients_daily_table.view.lkml"],
|
||||
"views": [
|
||||
|
|
Загрузка…
Ссылка в новой задаче