Generate LookML For growth_accounting

2021-04-15 21:42:11 -04:00 · 2021-04-15 21:42:11 -04:00 · 3154d13dc9
--- a/generator/explores.py
+++ b/generator/explores.py
@ -26,6 +26,10 @@ class Explore:
        """Generate LookML for this explore."""
        raise NotImplementedError("Only implemented in subclasses")

+    def get_dependent_views(self) -> List[str]:
+        """Get views this explore is dependent on."""
+        return [view for _, view in self.views.items()]
+
    @staticmethod
    def from_dict(name: str, defn: dict) -> Explore:
        """Get an instance of an explore from a namespace definition."""
--- a/generator/lookml.py
+++ b/generator/lookml.py
@ -9,14 +9,11 @@ import yaml
 from google.cloud import bigquery

 from .explores import explore_types
-from .views import GrowthAccountingView, View, ViewDict, view_types
+from .views import View, ViewDict, view_types


 def _generate_views(client, out_dir: Path, views: Iterable[View]) -> Iterable[Path]:
    for view in views:
-        if view.view_type == GrowthAccountingView.type:
-            continue
-
        path = out_dir / f"{view.name}.view.lkml"
        lookml = {"views": view.to_lookml(client)}
        path.write_text(lkml.dump(lookml))
@ -27,12 +24,15 @@ def _generate_explores(
    client, out_dir: Path, namespace: str, explores: dict
 ) -> Iterable[Path]:
    for explore_name, defn in explores.items():
-        if defn["type"] != "ping_explore":
-            continue
-
        explore = explore_types[defn["type"]].from_dict(explore_name, defn)
        file_lookml = {
-            "includes": f"/looker-hub/{namespace}/views/*.view.lkml",
+            # Looker validates all included files,
+            # so if we're not explicit about files here, validation takes
+            # forever as looker re-validates all views for every explore (if we used *).
+            "includes": [
+                f"/looker-hub/{namespace}/views/{view}.view.lkml"
+                for view in explore.get_dependent_views()
+            ],
            "explores": [explore.to_lookml()],
        }
        path = out_dir / (explore_name + ".explore.lkml")
--- a/generator/views.py
+++ b/generator/views.py
@ -2,8 +2,9 @@
 from __future__ import annotations

 from collections import defaultdict
+from copy import deepcopy
 from itertools import filterfalse
-from typing import Any, Dict, Iterator, List, TypedDict
+from typing import Any, Dict, Iterator, List, TypedDict, Union

 import click

@ -206,6 +207,201 @@ class GrowthAccountingView(View):
    """A view for growth accounting measures."""

    type: str = "growth_accounting_view"
+    other_dimensions: List[Dict[str, str]] = [
+        {
+            "name": "first",
+            "sql": "{TABLE}.first",
+            "type": "yesno",
+            "hidden": "yes",
+        }
+    ]
+
+    default_dimensions: List[Dict[str, str]] = [
+        {
+            "name": "active_this_week",
+            "sql": "mozfun.bits28.active_in_range(days_seen_bits, -6, 7)",
+            "type": "yesno",
+            "hidden": "yes",
+        },
+        {
+            "name": "active_last_week",
+            "sql": "mozfun.bits28.active_in_range(days_seen_bits, -13, 7)",
+            "type": "yesno",
+            "hidden": "yes",
+        },
+        {
+            "name": "new_this_week",
+            "sql": (
+                "DATE_DIFF(${submission_date}," "first_run_date, DAY) BETWEEN 0 AND 6"
+            ),
+            "type": "yesno",
+            "hidden": "yes",
+        },
+        {
+            "name": "new_last_week",
+            "sql": (
+                "DATE_DIFF(${submission_date}," "first_run_date, DAY) BETWEEN 7 AND 13"
+            ),
+            "type": "yesno",
+            "hidden": "yes",
+        },
+    ]
+
+    default_measures: List[Dict[str, Union[str, Dict[str, str]]]] = [
+        {
+            "name": "overall_active_previous",
+            "type": "count",
+            "filters": {"active_last_week": "yes"},
+        },
+        {
+            "name": "overall_active_current",
+            "type": "count",
+            "filters": {"active_this_week": "yes"},
+        },
+        {
+            "name": "overall_resurrected",
+            "type": "count",
+            "filters": {
+                "new_last_week": "no",
+                "new_this_week": "no",
+                "active_last_week": "no",
+                "active_this_week": "yes",
+            },
+        },
+        {
+            "name": "new_users",
+            "type": "count",
+            "filters": {"new_this_week": "yes", "active_this_week": "yes"},
+        },
+        {
+            "name": "established_users_returning",
+            "type": "count",
+            "filters": {
+                "new_last_week": "no",
+                "new_this_week": "no",
+                "active_last_week": "yes",
+                "active_this_week": "yes",
+            },
+        },
+        {
+            "name": "new_users_returning",
+            "type": "count",
+            "filters": {
+                "new_last_week": "yes",
+                "active_last_week": "yes",
+                "active_this_week": "yes",
+            },
+        },
+        {
+            "name": "new_users_churned_count",
+            "type": "count",
+            "filters": {
+                "new_last_week": "yes",
+                "active_last_week": "yes",
+                "active_this_week": "no",
+            },
+        },
+        {
+            "name": "established_users_churned_count",
+            "type": "count",
+            "filters": {
+                "new_last_week": "no",
+                "new_this_week": "no",
+                "active_last_week": "yes",
+                "active_this_week": "no",
+            },
+        },
+        {
+            "name": "new_users_churned",
+            "type": "number",
+            "sql": "-1 * ${new_users_churned_count}",
+        },
+        {
+            "name": "established_users_churned",
+            "type": "number",
+            "sql": "-1 * ${established_users_churned_count}",
+        },
+        {
+            "name": "overall_churned",
+            "type": "number",
+            "sql": "${new_users_churned} + ${established_users_churned}",
+        },
+        {
+            "name": "overall_retention_rate",
+            "type": "number",
+            "sql": (
+                "SAFE_DIVIDE("
+                "(${established_users_returning} + ${new_users_returning}),"
+                "${overall_active_previous}"
+                ")"
+            ),
+        },
+        {
+            "name": "established_user_retention_rate",
+            "type": "number",
+            "sql": (
+                "SAFE_DIVIDE(,"
+                "${established_users_returning},"
+                "(${established_users_returning} + ${established_users_churned_count})"
+                ")"
+            ),
+        },
+        {
+            "name": "new_user_retention_rate",
+            "type": "number",
+            "sql": (
+                "SAFE_DIVIDE("
+                "${new_users_returning},"
+                "(${new_users_returning} + ${new_users_churned_count})"
+                ")"
+            ),
+        },
+        {
+            "name": "overall_churn_rate",
+            "type": "number",
+            "sql": (
+                "SAFE_DIVIDE("
+                "(${established_users_churned_count} + ${new_users_churned_count}),"
+                "${overall_active_previous}"
+                ")"
+            ),
+        },
+        {
+            "name": "fraction_of_active_resurrected",
+            "type": "number",
+            "sql": "SAFE_DIVIDE(${overall_resurrected}, ${overall_active_current})",
+        },
+        {
+            "name": "fraction_of_active_new",
+            "type": "number",
+            "sql": "SAFE_DIVIDE(${new_users}, ${overall_active_current})",
+        },
+        {
+            "name": "fraction_of_active_established_returning",
+            "type": "number",
+            "sql": (
+                "SAFE_DIVIDE("
+                "${established_users_returning},"
+                "${overall_active_current}"
+                ")"
+            ),
+        },
+        {
+            "name": "fraction_of_active_new_returning",
+            "type": "number",
+            "sql": "SAFE_DIVIDE(${new_users_returning}, ${overall_active_current})",
+        },
+        {
+            "name": "quick_ratio",
+            "type": "number",
+            "sql": (
+                "SAFE_DIVIDE("
+                "(${new_users} + ${overall_resurrected}),"
+                "(${established_users_churned_count} + ${new_users_churned_count})"
+                ")"
+            ),
+        },
+    ]

    def __init__(self, tables: List[Dict[str, str]]):
        """Get an instance of a GrowthAccountingView."""
@ -232,7 +428,32 @@ class GrowthAccountingView(View):

    def to_lookml(self, bq_client) -> List[dict]:
        """Generate LookML for this view."""
-        pass
+        view_defn: Dict[str, Any] = {"name": self.name}
+        table = self.tables[0]["table"]
+
+        # add dimensions and dimension groups
+        dimensions = lookml_utils._generate_dimensions(bq_client, table) + deepcopy(
+            GrowthAccountingView.default_dimensions
+        )
+
+        view_defn["dimensions"] = list(
+            filterfalse(lookml_utils._is_dimension_group, dimensions)
+        )
+        view_defn["dimension_groups"] = list(
+            filter(lookml_utils._is_dimension_group, dimensions)
+        )
+
+        # add measures
+        view_defn["measures"] = self.get_measures()
+
+        # SQL Table Name
+        view_defn["sql_table_name"] = f"`{table}`"
+
+        return [view_defn]
+
+    def get_measures(self) -> List[Dict[str, Union[str, Dict[str, str]]]]:
+        """Generate measures for the Growth Accounting Framework."""
+        return deepcopy(GrowthAccountingView.default_measures)


 view_types = {
--- a/tests/test_lookml.py
+++ b/tests/test_lookml.py
@ -9,6 +9,9 @@ from click.testing import CliRunner
 from google.cloud import bigquery

 from generator.lookml import lookml
+from generator.views import GrowthAccountingView
+
+from .utils import print_and_test


@pytest.fixture
@ -31,6 +34,15 @@ class MockClient:
                    bigquery.schema.SchemaField("document_id", "STRING"),
                ],
            )
+        if table_ref == "mozdata.glean_app.baseline_clients_last_seen":
+            return bigquery.Table(
+                table_ref,
+                schema=[
+                    bigquery.schema.SchemaField("client_id", "STRING"),
+                    bigquery.schema.SchemaField("country", "STRING"),
+                    bigquery.schema.SchemaField("document_id", "STRING"),
+                ],
+            )
        if table_ref == "mozdata.glean_app.baseline":
            return bigquery.Table(
                table_ref,
@ -105,7 +117,7 @@ class MockClient:
        raise ValueError(f"Table not found: {table_ref}")


-def test_lookml(runner, tmp_path):
+def test_lookml_actual(runner, tmp_path):
    namespaces = tmp_path / "namespaces.yaml"
    namespaces.write_text(
        dedent(
@ -128,11 +140,19 @@ def test_lookml(runner, tmp_path):
                    table: mozdata.glean_app.baseline
                  - channel: beta
                    table: mozdata.glean_app_beta.baseline
+                growth_accounting:
+                  type: growth_accounting_view
+                  tables:
+                  - table: mozdata.glean_app.baseline_clients_last_seen
              explores:
                baseline:
                  type: ping_explore
                  views:
                    base_view: baseline
+                growth_accounting:
+                  type: growth_accounting_explore
+                  views:
+                    base_view: growth_accounting
            """
        )
    )
@ -153,7 +173,7 @@ def test_lookml(runner, tmp_path):
        except Exception as e:
            # use exception chaining to expose original traceback
            raise e from result.exception
-        assert {
+        expected = {
            "views": [
                {
                    "name": "baseline",
@ -189,8 +209,12 @@ def test_lookml(runner, tmp_path):
                    ],
                }
            ]
-        } == lkml.load(Path("looker-hub/custom/views/baseline.view.lkml").read_text())
-        assert {
+        }
+        print_and_test(
+            expected,
+            lkml.load(Path("looker-hub/custom/views/baseline.view.lkml").read_text()),
+        )
+        expected = {
            "views": [
                {
                    "name": "baseline",
@ -340,19 +364,67 @@ def test_lookml(runner, tmp_path):
                    ],
                }
            ]
-        } == lkml.load(
-            Path("looker-hub/glean-app/views/baseline.view.lkml").read_text()
+        }
+
+        print_and_test(
+            expected,
+            lkml.load(
+                Path("looker-hub/glean-app/views/baseline.view.lkml").read_text()
+            ),
        )
-        assert {
-            "includes": "/looker-hub/glean-app/views/*.view.lkml",
+        expected = {
+            "views": [
+                {
+                    "name": "growth_accounting",
+                    "sql_table_name": "`mozdata.glean_app.baseline_clients_last_seen`",
+                    "dimensions": [
+                        {
+                            "name": "client_id",
+                            "hidden": "yes",
+                            "sql": "${TABLE}.client_id",
+                        },
+                        {
+                            "name": "country",
+                            "map_layer_name": "countries",
+                            "sql": "${TABLE}.country",
+                            "type": "string",
+                        },
+                        {
+                            "name": "document_id",
+                            "hidden": "yes",
+                            "sql": "${TABLE}.document_id",
+                        },
+                    ]
+                    + GrowthAccountingView.default_dimensions,
+                    "measures": GrowthAccountingView.default_measures,
+                }
+            ]
+        }
+
+        # lkml changes the format of lookml, so we need to cycle it through to match
+        print_and_test(
+            lkml.load(lkml.dump(expected)),
+            lkml.load(
+                Path(
+                    "looker-hub/glean-app/views/growth_accounting.view.lkml"
+                ).read_text()
+            ),
+        )
+
+        expected = {
+            "includes": ["/looker-hub/glean-app/views/baseline.view.lkml"],
            "explores": [
                {
                    "name": "baseline",
                    "view_name": "baseline",
                }
            ],
-        } == lkml.load(
-            Path("looker-hub/glean-app/explores/baseline.explore.lkml").read_text()
+        }
+        print_and_test(
+            expected,
+            lkml.load(
+                Path("looker-hub/glean-app/explores/baseline.explore.lkml").read_text()
+            ),
        )


--- a/tests/utils.py
+++ b/tests/utils.py
@ -0,0 +1,74 @@
+"""Utility functions for tests."""
+import pprint
+
+
+def get_differences(expected, result, path="", sep="."):
+    """
+    Get the differences between two JSON-like python objects.
+
+    For complicated objects, this is a big improvement over pytest -vv.
+    """
+    differences = []
+
+    if expected is not None and result is None:
+        differences.append(("Expected exists but not Result", path))
+    if expected is None and result is not None:
+        differences.append(("Result exists but not Expected", path))
+    if expected is None and result is None:
+        return differences
+
+    exp_is_dict, res_is_dict = isinstance(expected, dict), isinstance(result, dict)
+    exp_is_list, res_is_list = isinstance(expected, list), isinstance(result, list)
+    if exp_is_dict and not res_is_dict:
+        differences.append(("Expected is dict but not Result", path))
+    elif res_is_dict and not exp_is_dict:
+        differences.append(("Result is dict but not Expected", path))
+    elif not exp_is_dict and not res_is_dict:
+        if exp_is_list and res_is_list:
+            for i in range(max(len(expected), len(result))):
+                if i >= len(result):
+                    differences.append(
+                        (f"Result missing element {expected[i]}", path + sep + str(i))
+                    )
+                elif i >= len(expected):
+                    differences.append(
+                        (
+                            f"Result contains extra element {result[i]}",
+                            path + sep + str(i),
+                        )
+                    )
+                else:
+                    differences += get_differences(
+                        expected[i], result[i], path + sep + str(i)
+                    )
+        elif expected != result:
+            differences.append((f"Expected={expected}, Result={result}", path))
+    else:
+        exp_keys, res_keys = set(expected.keys()), set(result.keys())
+        in_exp_not_res, in_res_not_exp = exp_keys - res_keys, res_keys - exp_keys
+
+        for k in in_exp_not_res:
+            differences.append(("In Expected, not in Result", path + sep + k))
+        for k in in_res_not_exp:
+            differences.append(("In Result, not in Expected", path + sep + k))
+
+        for k in exp_keys & res_keys:
+            differences += get_differences(expected[k], result[k], path + sep + k)
+
+    return differences
+
+
+def print_and_test(expected, result):
+    """Print objects and differences, then test equality."""
+    pp = pprint.PrettyPrinter(indent=2)
+
+    print("\nExpected:")
+    pp.pprint(expected)
+
+    print("\nActual:")
+    pp.pprint(result)
+
+    print("\nDifferences:")
+    print("\n".join([" - ".join(v) for v in get_differences(expected, result)]))
+
+    assert result == expected