- Fix test cases for namespaces
- Add new file for testing funnels
This commit is contained in:
Frank Bertsch 2021-05-25 13:20:12 -04:00
Родитель 212ed9d056
Коммит 0783b7c46e
7 изменённых файлов: 323 добавлений и 16 удалений

Просмотреть файл

@ -23,9 +23,11 @@ class FunnelAnalysisExplore(Explore):
"""
for view in views:
if view.name == "funnel_analysis":
tables = view.tables
dict_views = {
f"joined_event_type_{n}": f"event_type_{n}"
for n in range(1, FunnelAnalysisExplore.n_funnel_steps + 1)
f"joined_{name}": name
for name in tables[0].keys()
if name.startswith("event_type_")
}
dict_views["base_view"] = "funnel_analysis"

Просмотреть файл

@ -1,7 +1,7 @@
"""Class to describe a Funnel Analysis View."""
from __future__ import annotations
from typing import Dict, Iterator, List
from typing import Any, Dict, Iterator, List, Optional
from .view import View
@ -10,6 +10,7 @@ class FunnelAnalysisView(View):
"""A view for Client Counting measures."""
type: str = "funnel_analysis_view"
num_funnel_steps: int = 4
def __init__(self, namespace: str, tables: List[Dict[str, str]]):
"""Get an instance of a FunnelAnalysisView."""
@ -22,6 +23,7 @@ class FunnelAnalysisView(View):
is_glean: bool,
channels: List[Dict[str, str]],
db_views: dict,
num_funnel_steps: int = num_funnel_steps,
) -> Iterator[FunnelAnalysisView]:
"""Get Client Count Views from db views and app variants."""
# We can guarantee there will always be at least one channel,
@ -33,16 +35,121 @@ class FunnelAnalysisView(View):
)["dataset"]
necessary_views = {"events_daily", "event_types"}
actual_views = {}
for view_id, references in db_views[dataset].items():
necessary_views -= {view_id}
if view_id in necessary_views:
actual_views[view_id] = f"`mozdata.{dataset}.{view_id}`"
if len(necessary_views) == 0:
if len(actual_views) == 2:
tables = {
"funnel_analysis": "events_daily_table",
"event_types": actual_views["event_types"],
}
tables.update(
{
f"event_type_{i}": "event_types"
for i in range(1, num_funnel_steps + 1)
}
)
yield FunnelAnalysisView(
namespace,
[
[tables],
)
def to_lookml(self, bq_client, v1_name: Optional[str]) -> Dict[str, Any]:
"""Get this view as LookML."""
return {
"includes": [f"{self.tables[0]['funnel_analysis']}.view.lkml"],
"views": self._funnel_analysis_lookml() + self._event_types_lookml(),
}
def n_events(self) -> int:
"""Get the number of events allowed in this funnel."""
return len([k for k in self.tables[0] if k.startswith("event_type_")])
def _funnel_analysis_lookml(self) -> List[Dict[str, Any]]:
dimensions = [
{
"name": f"completed_event_{n}",
"type": "yesno",
"sql": (
"REGEXP_CONTAINS(${TABLE}.events, mozfun.event_analysis.create_funnel_regex(["
f"${{event_type_{n}.match_string}}],"
"True))"
),
}
for n in range(1, self.n_events() + 1)
]
count_measures: List[Dict[str, Any]] = [
{
"name": f"count_user_days_event_{n}",
"type": "count",
"filters": [{f"completed_event_{ni}": "yes"} for ni in range(1, n + 1)],
}
for n in range(1, self.n_events() + 1)
]
fractional_measures: List[Dict[str, Any]] = [
{
"name": f"fraction_user_days_event_{n}",
"type": "number",
"sql": f"SAFE_DIVIDE(${{count_user_days_event_{n}}}, ${{count_user_days_event_1}})",
}
for n in range(1, self.n_events() + 1)
]
return [
{
"name": "funnel_analysis",
"extends": ["events_daily_table"],
"dimensions": dimensions,
"measures": count_measures + fractional_measures,
}
]
def _event_types_lookml(self) -> List[Dict[str, Any]]:
events = [
{
"name": "event_types",
"derived_table": {
"sql": (
"SELECT "
"mozfun.event_analysis.aggregate_match_strings( "
"ARRAY_AGG( "
"mozfun.event_analysis.event_index_to_match_string(index))) AS match_string "
"FROM "
f"{self.tables[0]['event_types']} "
"WHERE "
"{% condition message_id %} event_types.category {% endcondition %} "
"AND {% condition event_type %} event_types.event {% endcondition %}"
)
},
"filters": [
{
"events_daily_view": "events_daily_table",
"event_types_view": "event_types_table",
"name": "category",
"type": "string",
"suggest_explore": "event_names",
"suggest_dimension": "event_names.category",
},
{
"name": "name",
"type": "string",
"suggest_explore": "event_names",
"suggest_dimension": "event_names.name",
},
],
)
"dimensions": [
{
"name": "match_string",
"hidden": "yes",
"sql": "${TABLE}.match_string",
}
],
}
] + [
{
"name": f"event_type_{n}",
"extends": ["event_types"],
}
for n in range(1, self.n_events() + 1)
]
return events

Просмотреть файл

@ -64,7 +64,7 @@ class View(object):
def __str__(self):
"""Stringify."""
return f"name: {self.name}, type: {self.type}, table: {self.tables}"
return f"name: {self.name}, type: {self.type}, table: {self.tables}, namespace: {self.namespace}"
def __eq__(self, other) -> bool:
"""Check for equality with other View."""

Просмотреть файл

@ -0,0 +1,171 @@
from unittest.mock import Mock
import pytest
from generator.explores import FunnelAnalysisExplore
from generator.views import FunnelAnalysisView
from .utils import print_and_test
@pytest.fixture()
def funnel_analysis_view():
return FunnelAnalysisView(
"glean_app",
[
{
"funnel_analysis": "events_daily_table",
"event_types": "`mozdata.glean_app.event_types`",
"event_type_1": "event_types",
"event_type_2": "event_types",
}
],
)
@pytest.fixture()
def funnel_analysis_explore():
return FunnelAnalysisExplore(
"funnel_analysis",
{
"base_view": "funnel_analysis",
"joined_event_type_1": "event_type_1",
"joined_event_type_2": "event_type_2",
},
)
def test_view_from_db_views(funnel_analysis_view):
db_views = {
"glean_app": {
"events_daily": [
["moz-fx-data-shared-prod", "glean_app_derived", "events_daily_v1"]
],
"event_types": [
["moz-fx-data-shared-prod", "glean_app_derived", "event_types_v1"]
],
}
}
channels = [
{"channel": "release", "dataset": "glean_app"},
{"channel": "beta", "dataset": "glean_app_beta"},
]
actual = next(
FunnelAnalysisView.from_db_views("glean_app", True, channels, db_views, 2)
)
assert actual == funnel_analysis_view
def test_explore_from_views(funnel_analysis_view, funnel_analysis_explore):
views = [funnel_analysis_view]
actual = next(FunnelAnalysisExplore.from_views(views))
assert actual == funnel_analysis_explore
def test_view_lookml(funnel_analysis_view):
expected = {
"includes": ["events_daily_table.view.lkml"],
"views": [
{
"name": "funnel_analysis",
"extends": ["events_daily_table"],
"dimensions": [
{
"name": "completed_event_1",
"type": "yesno",
"sql": (
"REGEXP_CONTAINS(${TABLE}.events, mozfun.event_analysis.create_funnel_regex(["
"${event_type_1.match_string}],"
"True))"
),
},
{
"name": "completed_event_2",
"type": "yesno",
"sql": (
"REGEXP_CONTAINS(${TABLE}.events, mozfun.event_analysis.create_funnel_regex(["
"${event_type_2.match_string}],"
"True))"
),
},
],
"measures": [
{
"name": "count_user_days_event_1",
"type": "count",
"filters": [
{"completed_event_1": "yes"},
],
},
{
"name": "count_user_days_event_2",
"type": "count",
"filters": [
{"completed_event_1": "yes"},
{"completed_event_2": "yes"},
],
},
{
"name": "fraction_user_days_event_1",
"type": "number",
"sql": "SAFE_DIVIDE(${count_user_days_event_1}, ${count_user_days_event_1})",
},
{
"name": "fraction_user_days_event_2",
"type": "number",
"sql": "SAFE_DIVIDE(${count_user_days_event_2}, ${count_user_days_event_1})",
},
],
},
{
"name": "event_types",
"derived_table": {
"sql": (
"SELECT "
"mozfun.event_analysis.aggregate_match_strings( "
"ARRAY_AGG( "
"mozfun.event_analysis.event_index_to_match_string(index))) AS match_string "
"FROM "
"`mozdata.glean_app.event_types` "
"WHERE "
"{% condition message_id %} event_types.category {% endcondition %} "
"AND {% condition event_type %} event_types.event {% endcondition %}"
)
},
"filters": [
{
"name": "category",
"type": "string",
"suggest_explore": "event_names",
"suggest_dimension": "event_names.category",
},
{
"name": "name",
"type": "string",
"suggest_explore": "event_names",
"suggest_dimension": "event_names.name",
},
],
"dimensions": [
{
"name": "match_string",
"hidden": "yes",
"sql": "${TABLE}.match_string",
}
],
},
{
"name": "event_type_1",
"extends": ["event_types"],
},
{
"name": "event_type_2",
"extends": ["event_types"],
},
],
}
actual = funnel_analysis_view.to_lookml(Mock(), None)
print_and_test(expected=expected, actual=actual)

Просмотреть файл

@ -390,6 +390,25 @@ class MockClient:
return bigquery.Table(
table_ref, schema=[SchemaField("context_id", "STRING")]
)
if table_ref == "mozdata.glean_app.events_daily":
return bigquery.Table(
table_ref,
schema=[
SchemaField("client_id", "STRING"),
SchemaField("submission_date", "DATE"),
SchemaField("country", "STRING"),
SchemaField("events", "STRING"),
],
)
if table_ref == "mozdata.glean_app.event_types":
return bigquery.Table(
table_ref,
schema=[
SchemaField("category", "STRING"),
SchemaField("event", "STRING"),
SchemaField("index", "STRING"),
],
)
raise ValueError(f"Table not found: {table_ref}")

Просмотреть файл

@ -289,7 +289,6 @@ def test_get_looker_views(glean_apps, generated_sql_uri):
namespace,
[
{
"channel": "release",
"table": "mozdata.glean_app.baseline_clients_daily",
}
],
@ -306,7 +305,6 @@ def test_get_looker_views(glean_apps, generated_sql_uri):
namespace,
[
{
"channel": "release",
"table": "mozdata.glean_app.baseline_clients_last_seen",
}
],
@ -375,8 +373,12 @@ def test_get_funnel_view(glean_apps, tmp_path):
namespace,
[
{
"events_daily_view": "events_daily_table",
"event_types_view": "event_types_table",
"funnel_analysis": "events_daily_table",
"event_types": "`mozdata.glean_app.event_types`",
"event_type_1": "event_types",
"event_type_2": "event_types",
"event_type_3": "event_types",
"event_type_4": "event_types",
}
],
),
@ -386,6 +388,7 @@ def test_get_funnel_view(glean_apps, tmp_path):
[
{
"table": "mozdata.glean_app.events_daily",
"channel": "release",
},
],
),
@ -393,7 +396,10 @@ def test_get_funnel_view(glean_apps, tmp_path):
namespace,
"event_types_table",
[
{"table": "mozdata.glean_app.event_types"},
{
"table": "mozdata.glean_app.event_types",
"channel": "release",
},
],
),
]

Просмотреть файл

@ -58,9 +58,11 @@ def get_differences(expected, result, path="", sep="."):
return differences
def print_and_test(expected, result):
def print_and_test(expected, result=None, actual=None):
"""Print objects and differences, then test equality."""
pp = pprint.PrettyPrinter(indent=2)
if actual is not None:
result = actual
print("\nExpected:")
pp.pprint(expected)