Generate LookML For growth_accounting
This commit is contained in:
Родитель
8bc8be6b32
Коммит
3154d13dc9
|
@ -26,6 +26,10 @@ class Explore:
|
|||
"""Generate LookML for this explore."""
|
||||
raise NotImplementedError("Only implemented in subclasses")
|
||||
|
||||
def get_dependent_views(self) -> List[str]:
|
||||
"""Get views this explore is dependent on."""
|
||||
return [view for _, view in self.views.items()]
|
||||
|
||||
@staticmethod
|
||||
def from_dict(name: str, defn: dict) -> Explore:
|
||||
"""Get an instance of an explore from a namespace definition."""
|
||||
|
|
|
@ -9,14 +9,11 @@ import yaml
|
|||
from google.cloud import bigquery
|
||||
|
||||
from .explores import explore_types
|
||||
from .views import GrowthAccountingView, View, ViewDict, view_types
|
||||
from .views import View, ViewDict, view_types
|
||||
|
||||
|
||||
def _generate_views(client, out_dir: Path, views: Iterable[View]) -> Iterable[Path]:
|
||||
for view in views:
|
||||
if view.view_type == GrowthAccountingView.type:
|
||||
continue
|
||||
|
||||
path = out_dir / f"{view.name}.view.lkml"
|
||||
lookml = {"views": view.to_lookml(client)}
|
||||
path.write_text(lkml.dump(lookml))
|
||||
|
@ -27,12 +24,15 @@ def _generate_explores(
|
|||
client, out_dir: Path, namespace: str, explores: dict
|
||||
) -> Iterable[Path]:
|
||||
for explore_name, defn in explores.items():
|
||||
if defn["type"] != "ping_explore":
|
||||
continue
|
||||
|
||||
explore = explore_types[defn["type"]].from_dict(explore_name, defn)
|
||||
file_lookml = {
|
||||
"includes": f"/looker-hub/{namespace}/views/*.view.lkml",
|
||||
# Looker validates all included files,
|
||||
# so if we're not explicit about files here, validation takes
|
||||
# forever as looker re-validates all views for every explore (if we used *).
|
||||
"includes": [
|
||||
f"/looker-hub/{namespace}/views/{view}.view.lkml"
|
||||
for view in explore.get_dependent_views()
|
||||
],
|
||||
"explores": [explore.to_lookml()],
|
||||
}
|
||||
path = out_dir / (explore_name + ".explore.lkml")
|
||||
|
|
|
@ -2,8 +2,9 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from collections import defaultdict
|
||||
from copy import deepcopy
|
||||
from itertools import filterfalse
|
||||
from typing import Any, Dict, Iterator, List, TypedDict
|
||||
from typing import Any, Dict, Iterator, List, TypedDict, Union
|
||||
|
||||
import click
|
||||
|
||||
|
@ -206,6 +207,201 @@ class GrowthAccountingView(View):
|
|||
"""A view for growth accounting measures."""
|
||||
|
||||
type: str = "growth_accounting_view"
|
||||
other_dimensions: List[Dict[str, str]] = [
|
||||
{
|
||||
"name": "first",
|
||||
"sql": "{TABLE}.first",
|
||||
"type": "yesno",
|
||||
"hidden": "yes",
|
||||
}
|
||||
]
|
||||
|
||||
default_dimensions: List[Dict[str, str]] = [
|
||||
{
|
||||
"name": "active_this_week",
|
||||
"sql": "mozfun.bits28.active_in_range(days_seen_bits, -6, 7)",
|
||||
"type": "yesno",
|
||||
"hidden": "yes",
|
||||
},
|
||||
{
|
||||
"name": "active_last_week",
|
||||
"sql": "mozfun.bits28.active_in_range(days_seen_bits, -13, 7)",
|
||||
"type": "yesno",
|
||||
"hidden": "yes",
|
||||
},
|
||||
{
|
||||
"name": "new_this_week",
|
||||
"sql": (
|
||||
"DATE_DIFF(${submission_date}," "first_run_date, DAY) BETWEEN 0 AND 6"
|
||||
),
|
||||
"type": "yesno",
|
||||
"hidden": "yes",
|
||||
},
|
||||
{
|
||||
"name": "new_last_week",
|
||||
"sql": (
|
||||
"DATE_DIFF(${submission_date}," "first_run_date, DAY) BETWEEN 7 AND 13"
|
||||
),
|
||||
"type": "yesno",
|
||||
"hidden": "yes",
|
||||
},
|
||||
]
|
||||
|
||||
default_measures: List[Dict[str, Union[str, Dict[str, str]]]] = [
|
||||
{
|
||||
"name": "overall_active_previous",
|
||||
"type": "count",
|
||||
"filters": {"active_last_week": "yes"},
|
||||
},
|
||||
{
|
||||
"name": "overall_active_current",
|
||||
"type": "count",
|
||||
"filters": {"active_this_week": "yes"},
|
||||
},
|
||||
{
|
||||
"name": "overall_resurrected",
|
||||
"type": "count",
|
||||
"filters": {
|
||||
"new_last_week": "no",
|
||||
"new_this_week": "no",
|
||||
"active_last_week": "no",
|
||||
"active_this_week": "yes",
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "new_users",
|
||||
"type": "count",
|
||||
"filters": {"new_this_week": "yes", "active_this_week": "yes"},
|
||||
},
|
||||
{
|
||||
"name": "established_users_returning",
|
||||
"type": "count",
|
||||
"filters": {
|
||||
"new_last_week": "no",
|
||||
"new_this_week": "no",
|
||||
"active_last_week": "yes",
|
||||
"active_this_week": "yes",
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "new_users_returning",
|
||||
"type": "count",
|
||||
"filters": {
|
||||
"new_last_week": "yes",
|
||||
"active_last_week": "yes",
|
||||
"active_this_week": "yes",
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "new_users_churned_count",
|
||||
"type": "count",
|
||||
"filters": {
|
||||
"new_last_week": "yes",
|
||||
"active_last_week": "yes",
|
||||
"active_this_week": "no",
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "established_users_churned_count",
|
||||
"type": "count",
|
||||
"filters": {
|
||||
"new_last_week": "no",
|
||||
"new_this_week": "no",
|
||||
"active_last_week": "yes",
|
||||
"active_this_week": "no",
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "new_users_churned",
|
||||
"type": "number",
|
||||
"sql": "-1 * ${new_users_churned_count}",
|
||||
},
|
||||
{
|
||||
"name": "established_users_churned",
|
||||
"type": "number",
|
||||
"sql": "-1 * ${established_users_churned_count}",
|
||||
},
|
||||
{
|
||||
"name": "overall_churned",
|
||||
"type": "number",
|
||||
"sql": "${new_users_churned} + ${established_users_churned}",
|
||||
},
|
||||
{
|
||||
"name": "overall_retention_rate",
|
||||
"type": "number",
|
||||
"sql": (
|
||||
"SAFE_DIVIDE("
|
||||
"(${established_users_returning} + ${new_users_returning}),"
|
||||
"${overall_active_previous}"
|
||||
")"
|
||||
),
|
||||
},
|
||||
{
|
||||
"name": "established_user_retention_rate",
|
||||
"type": "number",
|
||||
"sql": (
|
||||
"SAFE_DIVIDE(,"
|
||||
"${established_users_returning},"
|
||||
"(${established_users_returning} + ${established_users_churned_count})"
|
||||
")"
|
||||
),
|
||||
},
|
||||
{
|
||||
"name": "new_user_retention_rate",
|
||||
"type": "number",
|
||||
"sql": (
|
||||
"SAFE_DIVIDE("
|
||||
"${new_users_returning},"
|
||||
"(${new_users_returning} + ${new_users_churned_count})"
|
||||
")"
|
||||
),
|
||||
},
|
||||
{
|
||||
"name": "overall_churn_rate",
|
||||
"type": "number",
|
||||
"sql": (
|
||||
"SAFE_DIVIDE("
|
||||
"(${established_users_churned_count} + ${new_users_churned_count}),"
|
||||
"${overall_active_previous}"
|
||||
")"
|
||||
),
|
||||
},
|
||||
{
|
||||
"name": "fraction_of_active_resurrected",
|
||||
"type": "number",
|
||||
"sql": "SAFE_DIVIDE(${overall_resurrected}, ${overall_active_current})",
|
||||
},
|
||||
{
|
||||
"name": "fraction_of_active_new",
|
||||
"type": "number",
|
||||
"sql": "SAFE_DIVIDE(${new_users}, ${overall_active_current})",
|
||||
},
|
||||
{
|
||||
"name": "fraction_of_active_established_returning",
|
||||
"type": "number",
|
||||
"sql": (
|
||||
"SAFE_DIVIDE("
|
||||
"${established_users_returning},"
|
||||
"${overall_active_current}"
|
||||
")"
|
||||
),
|
||||
},
|
||||
{
|
||||
"name": "fraction_of_active_new_returning",
|
||||
"type": "number",
|
||||
"sql": "SAFE_DIVIDE(${new_users_returning}, ${overall_active_current})",
|
||||
},
|
||||
{
|
||||
"name": "quick_ratio",
|
||||
"type": "number",
|
||||
"sql": (
|
||||
"SAFE_DIVIDE("
|
||||
"(${new_users} + ${overall_resurrected}),"
|
||||
"(${established_users_churned_count} + ${new_users_churned_count})"
|
||||
")"
|
||||
),
|
||||
},
|
||||
]
|
||||
|
||||
def __init__(self, tables: List[Dict[str, str]]):
|
||||
"""Get an instance of a GrowthAccountingView."""
|
||||
|
@ -232,7 +428,32 @@ class GrowthAccountingView(View):
|
|||
|
||||
def to_lookml(self, bq_client) -> List[dict]:
|
||||
"""Generate LookML for this view."""
|
||||
pass
|
||||
view_defn: Dict[str, Any] = {"name": self.name}
|
||||
table = self.tables[0]["table"]
|
||||
|
||||
# add dimensions and dimension groups
|
||||
dimensions = lookml_utils._generate_dimensions(bq_client, table) + deepcopy(
|
||||
GrowthAccountingView.default_dimensions
|
||||
)
|
||||
|
||||
view_defn["dimensions"] = list(
|
||||
filterfalse(lookml_utils._is_dimension_group, dimensions)
|
||||
)
|
||||
view_defn["dimension_groups"] = list(
|
||||
filter(lookml_utils._is_dimension_group, dimensions)
|
||||
)
|
||||
|
||||
# add measures
|
||||
view_defn["measures"] = self.get_measures()
|
||||
|
||||
# SQL Table Name
|
||||
view_defn["sql_table_name"] = f"`{table}`"
|
||||
|
||||
return [view_defn]
|
||||
|
||||
def get_measures(self) -> List[Dict[str, Union[str, Dict[str, str]]]]:
|
||||
"""Generate measures for the Growth Accounting Framework."""
|
||||
return deepcopy(GrowthAccountingView.default_measures)
|
||||
|
||||
|
||||
view_types = {
|
||||
|
|
|
@ -9,6 +9,9 @@ from click.testing import CliRunner
|
|||
from google.cloud import bigquery
|
||||
|
||||
from generator.lookml import lookml
|
||||
from generator.views import GrowthAccountingView
|
||||
|
||||
from .utils import print_and_test
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -31,6 +34,15 @@ class MockClient:
|
|||
bigquery.schema.SchemaField("document_id", "STRING"),
|
||||
],
|
||||
)
|
||||
if table_ref == "mozdata.glean_app.baseline_clients_last_seen":
|
||||
return bigquery.Table(
|
||||
table_ref,
|
||||
schema=[
|
||||
bigquery.schema.SchemaField("client_id", "STRING"),
|
||||
bigquery.schema.SchemaField("country", "STRING"),
|
||||
bigquery.schema.SchemaField("document_id", "STRING"),
|
||||
],
|
||||
)
|
||||
if table_ref == "mozdata.glean_app.baseline":
|
||||
return bigquery.Table(
|
||||
table_ref,
|
||||
|
@ -105,7 +117,7 @@ class MockClient:
|
|||
raise ValueError(f"Table not found: {table_ref}")
|
||||
|
||||
|
||||
def test_lookml(runner, tmp_path):
|
||||
def test_lookml_actual(runner, tmp_path):
|
||||
namespaces = tmp_path / "namespaces.yaml"
|
||||
namespaces.write_text(
|
||||
dedent(
|
||||
|
@ -128,11 +140,19 @@ def test_lookml(runner, tmp_path):
|
|||
table: mozdata.glean_app.baseline
|
||||
- channel: beta
|
||||
table: mozdata.glean_app_beta.baseline
|
||||
growth_accounting:
|
||||
type: growth_accounting_view
|
||||
tables:
|
||||
- table: mozdata.glean_app.baseline_clients_last_seen
|
||||
explores:
|
||||
baseline:
|
||||
type: ping_explore
|
||||
views:
|
||||
base_view: baseline
|
||||
growth_accounting:
|
||||
type: growth_accounting_explore
|
||||
views:
|
||||
base_view: growth_accounting
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
@ -153,7 +173,7 @@ def test_lookml(runner, tmp_path):
|
|||
except Exception as e:
|
||||
# use exception chaining to expose original traceback
|
||||
raise e from result.exception
|
||||
assert {
|
||||
expected = {
|
||||
"views": [
|
||||
{
|
||||
"name": "baseline",
|
||||
|
@ -189,8 +209,12 @@ def test_lookml(runner, tmp_path):
|
|||
],
|
||||
}
|
||||
]
|
||||
} == lkml.load(Path("looker-hub/custom/views/baseline.view.lkml").read_text())
|
||||
assert {
|
||||
}
|
||||
print_and_test(
|
||||
expected,
|
||||
lkml.load(Path("looker-hub/custom/views/baseline.view.lkml").read_text()),
|
||||
)
|
||||
expected = {
|
||||
"views": [
|
||||
{
|
||||
"name": "baseline",
|
||||
|
@ -340,19 +364,67 @@ def test_lookml(runner, tmp_path):
|
|||
],
|
||||
}
|
||||
]
|
||||
} == lkml.load(
|
||||
Path("looker-hub/glean-app/views/baseline.view.lkml").read_text()
|
||||
}
|
||||
|
||||
print_and_test(
|
||||
expected,
|
||||
lkml.load(
|
||||
Path("looker-hub/glean-app/views/baseline.view.lkml").read_text()
|
||||
),
|
||||
)
|
||||
assert {
|
||||
"includes": "/looker-hub/glean-app/views/*.view.lkml",
|
||||
expected = {
|
||||
"views": [
|
||||
{
|
||||
"name": "growth_accounting",
|
||||
"sql_table_name": "`mozdata.glean_app.baseline_clients_last_seen`",
|
||||
"dimensions": [
|
||||
{
|
||||
"name": "client_id",
|
||||
"hidden": "yes",
|
||||
"sql": "${TABLE}.client_id",
|
||||
},
|
||||
{
|
||||
"name": "country",
|
||||
"map_layer_name": "countries",
|
||||
"sql": "${TABLE}.country",
|
||||
"type": "string",
|
||||
},
|
||||
{
|
||||
"name": "document_id",
|
||||
"hidden": "yes",
|
||||
"sql": "${TABLE}.document_id",
|
||||
},
|
||||
]
|
||||
+ GrowthAccountingView.default_dimensions,
|
||||
"measures": GrowthAccountingView.default_measures,
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# lkml changes the format of lookml, so we need to cycle it through to match
|
||||
print_and_test(
|
||||
lkml.load(lkml.dump(expected)),
|
||||
lkml.load(
|
||||
Path(
|
||||
"looker-hub/glean-app/views/growth_accounting.view.lkml"
|
||||
).read_text()
|
||||
),
|
||||
)
|
||||
|
||||
expected = {
|
||||
"includes": ["/looker-hub/glean-app/views/baseline.view.lkml"],
|
||||
"explores": [
|
||||
{
|
||||
"name": "baseline",
|
||||
"view_name": "baseline",
|
||||
}
|
||||
],
|
||||
} == lkml.load(
|
||||
Path("looker-hub/glean-app/explores/baseline.explore.lkml").read_text()
|
||||
}
|
||||
print_and_test(
|
||||
expected,
|
||||
lkml.load(
|
||||
Path("looker-hub/glean-app/explores/baseline.explore.lkml").read_text()
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
"""Utility functions for tests."""
|
||||
import pprint
|
||||
|
||||
|
||||
def get_differences(expected, result, path="", sep="."):
|
||||
"""
|
||||
Get the differences between two JSON-like python objects.
|
||||
|
||||
For complicated objects, this is a big improvement over pytest -vv.
|
||||
"""
|
||||
differences = []
|
||||
|
||||
if expected is not None and result is None:
|
||||
differences.append(("Expected exists but not Result", path))
|
||||
if expected is None and result is not None:
|
||||
differences.append(("Result exists but not Expected", path))
|
||||
if expected is None and result is None:
|
||||
return differences
|
||||
|
||||
exp_is_dict, res_is_dict = isinstance(expected, dict), isinstance(result, dict)
|
||||
exp_is_list, res_is_list = isinstance(expected, list), isinstance(result, list)
|
||||
if exp_is_dict and not res_is_dict:
|
||||
differences.append(("Expected is dict but not Result", path))
|
||||
elif res_is_dict and not exp_is_dict:
|
||||
differences.append(("Result is dict but not Expected", path))
|
||||
elif not exp_is_dict and not res_is_dict:
|
||||
if exp_is_list and res_is_list:
|
||||
for i in range(max(len(expected), len(result))):
|
||||
if i >= len(result):
|
||||
differences.append(
|
||||
(f"Result missing element {expected[i]}", path + sep + str(i))
|
||||
)
|
||||
elif i >= len(expected):
|
||||
differences.append(
|
||||
(
|
||||
f"Result contains extra element {result[i]}",
|
||||
path + sep + str(i),
|
||||
)
|
||||
)
|
||||
else:
|
||||
differences += get_differences(
|
||||
expected[i], result[i], path + sep + str(i)
|
||||
)
|
||||
elif expected != result:
|
||||
differences.append((f"Expected={expected}, Result={result}", path))
|
||||
else:
|
||||
exp_keys, res_keys = set(expected.keys()), set(result.keys())
|
||||
in_exp_not_res, in_res_not_exp = exp_keys - res_keys, res_keys - exp_keys
|
||||
|
||||
for k in in_exp_not_res:
|
||||
differences.append(("In Expected, not in Result", path + sep + k))
|
||||
for k in in_res_not_exp:
|
||||
differences.append(("In Result, not in Expected", path + sep + k))
|
||||
|
||||
for k in exp_keys & res_keys:
|
||||
differences += get_differences(expected[k], result[k], path + sep + k)
|
||||
|
||||
return differences
|
||||
|
||||
|
||||
def print_and_test(expected, result):
|
||||
"""Print objects and differences, then test equality."""
|
||||
pp = pprint.PrettyPrinter(indent=2)
|
||||
|
||||
print("\nExpected:")
|
||||
pp.pprint(expected)
|
||||
|
||||
print("\nActual:")
|
||||
pp.pprint(result)
|
||||
|
||||
print("\nDifferences:")
|
||||
print("\n".join([" - ".join(v) for v in get_differences(expected, result)]))
|
||||
|
||||
assert result == expected
|
Загрузка…
Ссылка в новой задаче