Generate LookML For growth_accounting

This commit is contained in:
Frank Bertsch 2021-04-15 21:42:11 -04:00
Родитель 8bc8be6b32
Коммит 3154d13dc9
5 изменённых файлов: 391 добавлений и 20 удалений

Просмотреть файл

@ -26,6 +26,10 @@ class Explore:
"""Generate LookML for this explore."""
raise NotImplementedError("Only implemented in subclasses")
def get_dependent_views(self) -> List[str]:
"""Get views this explore is dependent on."""
return [view for _, view in self.views.items()]
@staticmethod
def from_dict(name: str, defn: dict) -> Explore:
"""Get an instance of an explore from a namespace definition."""

Просмотреть файл

@ -9,14 +9,11 @@ import yaml
from google.cloud import bigquery
from .explores import explore_types
from .views import GrowthAccountingView, View, ViewDict, view_types
from .views import View, ViewDict, view_types
def _generate_views(client, out_dir: Path, views: Iterable[View]) -> Iterable[Path]:
for view in views:
if view.view_type == GrowthAccountingView.type:
continue
path = out_dir / f"{view.name}.view.lkml"
lookml = {"views": view.to_lookml(client)}
path.write_text(lkml.dump(lookml))
@ -27,12 +24,15 @@ def _generate_explores(
client, out_dir: Path, namespace: str, explores: dict
) -> Iterable[Path]:
for explore_name, defn in explores.items():
if defn["type"] != "ping_explore":
continue
explore = explore_types[defn["type"]].from_dict(explore_name, defn)
file_lookml = {
"includes": f"/looker-hub/{namespace}/views/*.view.lkml",
# Looker validates all included files,
# so if we're not explicit about files here, validation takes
# forever as looker re-validates all views for every explore (if we used *).
"includes": [
f"/looker-hub/{namespace}/views/{view}.view.lkml"
for view in explore.get_dependent_views()
],
"explores": [explore.to_lookml()],
}
path = out_dir / (explore_name + ".explore.lkml")

Просмотреть файл

@ -2,8 +2,9 @@
from __future__ import annotations
from collections import defaultdict
from copy import deepcopy
from itertools import filterfalse
from typing import Any, Dict, Iterator, List, TypedDict
from typing import Any, Dict, Iterator, List, TypedDict, Union
import click
@ -206,6 +207,201 @@ class GrowthAccountingView(View):
"""A view for growth accounting measures."""
type: str = "growth_accounting_view"
other_dimensions: List[Dict[str, str]] = [
{
"name": "first",
"sql": "{TABLE}.first",
"type": "yesno",
"hidden": "yes",
}
]
default_dimensions: List[Dict[str, str]] = [
{
"name": "active_this_week",
"sql": "mozfun.bits28.active_in_range(days_seen_bits, -6, 7)",
"type": "yesno",
"hidden": "yes",
},
{
"name": "active_last_week",
"sql": "mozfun.bits28.active_in_range(days_seen_bits, -13, 7)",
"type": "yesno",
"hidden": "yes",
},
{
"name": "new_this_week",
"sql": (
"DATE_DIFF(${submission_date}," "first_run_date, DAY) BETWEEN 0 AND 6"
),
"type": "yesno",
"hidden": "yes",
},
{
"name": "new_last_week",
"sql": (
"DATE_DIFF(${submission_date}," "first_run_date, DAY) BETWEEN 7 AND 13"
),
"type": "yesno",
"hidden": "yes",
},
]
default_measures: List[Dict[str, Union[str, Dict[str, str]]]] = [
{
"name": "overall_active_previous",
"type": "count",
"filters": {"active_last_week": "yes"},
},
{
"name": "overall_active_current",
"type": "count",
"filters": {"active_this_week": "yes"},
},
{
"name": "overall_resurrected",
"type": "count",
"filters": {
"new_last_week": "no",
"new_this_week": "no",
"active_last_week": "no",
"active_this_week": "yes",
},
},
{
"name": "new_users",
"type": "count",
"filters": {"new_this_week": "yes", "active_this_week": "yes"},
},
{
"name": "established_users_returning",
"type": "count",
"filters": {
"new_last_week": "no",
"new_this_week": "no",
"active_last_week": "yes",
"active_this_week": "yes",
},
},
{
"name": "new_users_returning",
"type": "count",
"filters": {
"new_last_week": "yes",
"active_last_week": "yes",
"active_this_week": "yes",
},
},
{
"name": "new_users_churned_count",
"type": "count",
"filters": {
"new_last_week": "yes",
"active_last_week": "yes",
"active_this_week": "no",
},
},
{
"name": "established_users_churned_count",
"type": "count",
"filters": {
"new_last_week": "no",
"new_this_week": "no",
"active_last_week": "yes",
"active_this_week": "no",
},
},
{
"name": "new_users_churned",
"type": "number",
"sql": "-1 * ${new_users_churned_count}",
},
{
"name": "established_users_churned",
"type": "number",
"sql": "-1 * ${established_users_churned_count}",
},
{
"name": "overall_churned",
"type": "number",
"sql": "${new_users_churned} + ${established_users_churned}",
},
{
"name": "overall_retention_rate",
"type": "number",
"sql": (
"SAFE_DIVIDE("
"(${established_users_returning} + ${new_users_returning}),"
"${overall_active_previous}"
")"
),
},
{
"name": "established_user_retention_rate",
"type": "number",
"sql": (
"SAFE_DIVIDE(,"
"${established_users_returning},"
"(${established_users_returning} + ${established_users_churned_count})"
")"
),
},
{
"name": "new_user_retention_rate",
"type": "number",
"sql": (
"SAFE_DIVIDE("
"${new_users_returning},"
"(${new_users_returning} + ${new_users_churned_count})"
")"
),
},
{
"name": "overall_churn_rate",
"type": "number",
"sql": (
"SAFE_DIVIDE("
"(${established_users_churned_count} + ${new_users_churned_count}),"
"${overall_active_previous}"
")"
),
},
{
"name": "fraction_of_active_resurrected",
"type": "number",
"sql": "SAFE_DIVIDE(${overall_resurrected}, ${overall_active_current})",
},
{
"name": "fraction_of_active_new",
"type": "number",
"sql": "SAFE_DIVIDE(${new_users}, ${overall_active_current})",
},
{
"name": "fraction_of_active_established_returning",
"type": "number",
"sql": (
"SAFE_DIVIDE("
"${established_users_returning},"
"${overall_active_current}"
")"
),
},
{
"name": "fraction_of_active_new_returning",
"type": "number",
"sql": "SAFE_DIVIDE(${new_users_returning}, ${overall_active_current})",
},
{
"name": "quick_ratio",
"type": "number",
"sql": (
"SAFE_DIVIDE("
"(${new_users} + ${overall_resurrected}),"
"(${established_users_churned_count} + ${new_users_churned_count})"
")"
),
},
]
def __init__(self, tables: List[Dict[str, str]]):
"""Get an instance of a GrowthAccountingView."""
@ -232,7 +428,32 @@ class GrowthAccountingView(View):
def to_lookml(self, bq_client) -> List[dict]:
"""Generate LookML for this view."""
pass
view_defn: Dict[str, Any] = {"name": self.name}
table = self.tables[0]["table"]
# add dimensions and dimension groups
dimensions = lookml_utils._generate_dimensions(bq_client, table) + deepcopy(
GrowthAccountingView.default_dimensions
)
view_defn["dimensions"] = list(
filterfalse(lookml_utils._is_dimension_group, dimensions)
)
view_defn["dimension_groups"] = list(
filter(lookml_utils._is_dimension_group, dimensions)
)
# add measures
view_defn["measures"] = self.get_measures()
# SQL Table Name
view_defn["sql_table_name"] = f"`{table}`"
return [view_defn]
def get_measures(self) -> List[Dict[str, Union[str, Dict[str, str]]]]:
"""Generate measures for the Growth Accounting Framework."""
return deepcopy(GrowthAccountingView.default_measures)
view_types = {

Просмотреть файл

@ -9,6 +9,9 @@ from click.testing import CliRunner
from google.cloud import bigquery
from generator.lookml import lookml
from generator.views import GrowthAccountingView
from .utils import print_and_test
@pytest.fixture
@ -31,6 +34,15 @@ class MockClient:
bigquery.schema.SchemaField("document_id", "STRING"),
],
)
if table_ref == "mozdata.glean_app.baseline_clients_last_seen":
return bigquery.Table(
table_ref,
schema=[
bigquery.schema.SchemaField("client_id", "STRING"),
bigquery.schema.SchemaField("country", "STRING"),
bigquery.schema.SchemaField("document_id", "STRING"),
],
)
if table_ref == "mozdata.glean_app.baseline":
return bigquery.Table(
table_ref,
@ -105,7 +117,7 @@ class MockClient:
raise ValueError(f"Table not found: {table_ref}")
def test_lookml(runner, tmp_path):
def test_lookml_actual(runner, tmp_path):
namespaces = tmp_path / "namespaces.yaml"
namespaces.write_text(
dedent(
@ -128,11 +140,19 @@ def test_lookml(runner, tmp_path):
table: mozdata.glean_app.baseline
- channel: beta
table: mozdata.glean_app_beta.baseline
growth_accounting:
type: growth_accounting_view
tables:
- table: mozdata.glean_app.baseline_clients_last_seen
explores:
baseline:
type: ping_explore
views:
base_view: baseline
growth_accounting:
type: growth_accounting_explore
views:
base_view: growth_accounting
"""
)
)
@ -153,7 +173,7 @@ def test_lookml(runner, tmp_path):
except Exception as e:
# use exception chaining to expose original traceback
raise e from result.exception
assert {
expected = {
"views": [
{
"name": "baseline",
@ -189,8 +209,12 @@ def test_lookml(runner, tmp_path):
],
}
]
} == lkml.load(Path("looker-hub/custom/views/baseline.view.lkml").read_text())
assert {
}
print_and_test(
expected,
lkml.load(Path("looker-hub/custom/views/baseline.view.lkml").read_text()),
)
expected = {
"views": [
{
"name": "baseline",
@ -340,19 +364,67 @@ def test_lookml(runner, tmp_path):
],
}
]
} == lkml.load(
Path("looker-hub/glean-app/views/baseline.view.lkml").read_text()
}
print_and_test(
expected,
lkml.load(
Path("looker-hub/glean-app/views/baseline.view.lkml").read_text()
),
)
assert {
"includes": "/looker-hub/glean-app/views/*.view.lkml",
expected = {
"views": [
{
"name": "growth_accounting",
"sql_table_name": "`mozdata.glean_app.baseline_clients_last_seen`",
"dimensions": [
{
"name": "client_id",
"hidden": "yes",
"sql": "${TABLE}.client_id",
},
{
"name": "country",
"map_layer_name": "countries",
"sql": "${TABLE}.country",
"type": "string",
},
{
"name": "document_id",
"hidden": "yes",
"sql": "${TABLE}.document_id",
},
]
+ GrowthAccountingView.default_dimensions,
"measures": GrowthAccountingView.default_measures,
}
]
}
# lkml changes the format of lookml, so we need to cycle it through to match
print_and_test(
lkml.load(lkml.dump(expected)),
lkml.load(
Path(
"looker-hub/glean-app/views/growth_accounting.view.lkml"
).read_text()
),
)
expected = {
"includes": ["/looker-hub/glean-app/views/baseline.view.lkml"],
"explores": [
{
"name": "baseline",
"view_name": "baseline",
}
],
} == lkml.load(
Path("looker-hub/glean-app/explores/baseline.explore.lkml").read_text()
}
print_and_test(
expected,
lkml.load(
Path("looker-hub/glean-app/explores/baseline.explore.lkml").read_text()
),
)

74
tests/utils.py Normal file
Просмотреть файл

@ -0,0 +1,74 @@
"""Utility functions for tests."""
import pprint
def get_differences(expected, result, path="", sep="."):
"""
Get the differences between two JSON-like python objects.
For complicated objects, this is a big improvement over pytest -vv.
"""
differences = []
if expected is not None and result is None:
differences.append(("Expected exists but not Result", path))
if expected is None and result is not None:
differences.append(("Result exists but not Expected", path))
if expected is None and result is None:
return differences
exp_is_dict, res_is_dict = isinstance(expected, dict), isinstance(result, dict)
exp_is_list, res_is_list = isinstance(expected, list), isinstance(result, list)
if exp_is_dict and not res_is_dict:
differences.append(("Expected is dict but not Result", path))
elif res_is_dict and not exp_is_dict:
differences.append(("Result is dict but not Expected", path))
elif not exp_is_dict and not res_is_dict:
if exp_is_list and res_is_list:
for i in range(max(len(expected), len(result))):
if i >= len(result):
differences.append(
(f"Result missing element {expected[i]}", path + sep + str(i))
)
elif i >= len(expected):
differences.append(
(
f"Result contains extra element {result[i]}",
path + sep + str(i),
)
)
else:
differences += get_differences(
expected[i], result[i], path + sep + str(i)
)
elif expected != result:
differences.append((f"Expected={expected}, Result={result}", path))
else:
exp_keys, res_keys = set(expected.keys()), set(result.keys())
in_exp_not_res, in_res_not_exp = exp_keys - res_keys, res_keys - exp_keys
for k in in_exp_not_res:
differences.append(("In Expected, not in Result", path + sep + k))
for k in in_res_not_exp:
differences.append(("In Result, not in Expected", path + sep + k))
for k in exp_keys & res_keys:
differences += get_differences(expected[k], result[k], path + sep + k)
return differences
def print_and_test(expected, result):
"""Print objects and differences, then test equality."""
pp = pprint.PrettyPrinter(indent=2)
print("\nExpected:")
pp.pprint(expected)
print("\nActual:")
pp.pprint(result)
print("\nDifferences:")
print("\n".join([" - ".join(v) for v in get_differences(expected, result)]))
assert result == expected