289 строки
12 KiB
Python
289 строки
12 KiB
Python
"""Class to describe a Funnel Analysis View.
|
|
|
|
We create a single View file and a single Explore file.
|
|
|
|
The View file has many Looker views defined within it:
|
|
funnel_analysis: Based on events_daily, has the `events` string and user dimensions (e.g. country)
|
|
event_names: The names of events. Used for suggestions.
|
|
event_N: For each possible funnel step, a single view. This is used to define what that funnel step is.
|
|
|
|
The Explore's job is to take this generated file an link all those event_N's to the funnel_analysis.
|
|
We join them via cross join, because event_N views only have 1 row and 1 column - the match_string
|
|
to use for a regex_match on the `events` string in funnel_analysis.
|
|
|
|
For example, say we filter event_1 on `event`: `WHERE event in ("session-start, "session-end")`
|
|
Then we join that with funnel_analysis: `FROM funnel_analysis CROSS JOIN event_1`
|
|
That lets us find out whether the user completed those funnel steps:
|
|
`SELECT REGEXP_CONTAINS(funnel_analysis.events, event_1.match_string) AS completed_step_1`
|
|
|
|
The `funnel_analysis` view has some nice dimensions to hide these details from the end user,
|
|
e.g. `completed_funnel_step_N`. We can then count those users across dimensions.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from textwrap import dedent
|
|
from typing import Any, Dict, Iterator, List, Optional
|
|
|
|
from .view import View, ViewDict
|
|
|
|
DEFAULT_NUM_FUNNEL_STEPS: int = 4
|
|
|
|
|
|
class FunnelAnalysisView(View):
|
|
"""A view for doing Funnel Analysis."""
|
|
|
|
type: str = "funnel_analysis_view"
|
|
|
|
def __init__(self, namespace: str, tables: List[Dict[str, str]]):
|
|
"""Get an instance of a FunnelAnalysisView."""
|
|
super().__init__(namespace, "funnel_analysis", FunnelAnalysisView.type, tables)
|
|
|
|
@classmethod
|
|
def from_db_views(
|
|
klass,
|
|
namespace: str,
|
|
is_glean: bool,
|
|
channels: List[Dict[str, str]],
|
|
db_views: dict,
|
|
num_funnel_steps: int = DEFAULT_NUM_FUNNEL_STEPS,
|
|
) -> Iterator[FunnelAnalysisView]:
|
|
"""Get Client Count Views from db views and app variants.
|
|
|
|
We only create a FunnelAnalysisView if we have the two necessary db tables:
|
|
- events_daily
|
|
- event_types
|
|
"""
|
|
# We can guarantee there will always be at least one channel,
|
|
# because this comes from the associated _get_glean_repos in
|
|
# namespaces.py
|
|
dataset = next(
|
|
(channel for channel in channels if channel.get("channel") == "release"),
|
|
channels[0],
|
|
)["dataset"]
|
|
|
|
necessary_views = {"events_daily", "event_types"}
|
|
actual_views = {}
|
|
for view_id, references in db_views[dataset].items():
|
|
if view_id in necessary_views:
|
|
actual_views[view_id] = f"`mozdata.{dataset}.{view_id}`"
|
|
|
|
if len(actual_views) == 2:
|
|
# Only create an instance if we have the two necessary tables
|
|
tables = {
|
|
"funnel_analysis": "events_daily_table",
|
|
"event_types": actual_views["event_types"],
|
|
}
|
|
tables.update(
|
|
{f"step_{i}": "event_types" for i in range(1, num_funnel_steps + 1)}
|
|
)
|
|
yield FunnelAnalysisView(
|
|
namespace,
|
|
[tables],
|
|
)
|
|
|
|
@classmethod
|
|
def from_dict(
|
|
klass, namespace: str, name: str, _dict: ViewDict
|
|
) -> FunnelAnalysisView:
|
|
"""Get a FunnalAnalysisView from a dict representation."""
|
|
return FunnelAnalysisView(namespace, _dict["tables"])
|
|
|
|
def to_lookml(self, v1_name: Optional[str], dryrun) -> Dict[str, Any]:
|
|
"""Get this view as LookML."""
|
|
return {
|
|
"includes": [f"{self.tables[0]['funnel_analysis']}.view.lkml"],
|
|
"views": self._funnel_analysis_lookml() + self._event_types_lookml(),
|
|
}
|
|
|
|
def n_events(self) -> int:
|
|
"""Get the number of events allowed in this funnel."""
|
|
return len([k for k in self.tables[0] if k.startswith("step_")])
|
|
|
|
def _funnel_analysis_lookml(self) -> List[Dict[str, Any]]:
|
|
dimensions = [
|
|
{
|
|
"name": f"completed_step_{n}",
|
|
"type": "yesno",
|
|
"description": f"Whether the user completed step {n} on the associated day.",
|
|
"sql": dedent(
|
|
f"""
|
|
REGEXP_CONTAINS(
|
|
${{TABLE}}.events, mozfun.event_analysis.create_funnel_regex(
|
|
[{", ".join([
|
|
f'${{step_{ni}.match_string}}' for ni in range(1, n + 1)
|
|
])}],
|
|
True
|
|
)
|
|
)
|
|
"""
|
|
),
|
|
}
|
|
for n in range(1, self.n_events() + 1)
|
|
]
|
|
|
|
count_measures: List[Dict[str, Any]] = [
|
|
{
|
|
"name": f"count_completed_step_{n}",
|
|
"description": (
|
|
f"The number of times that step {n} was completed. "
|
|
"Grouping by day makes this a count of users who completed "
|
|
f"step {n} on each day."
|
|
),
|
|
"type": "count",
|
|
"filters": [{f"completed_step_{ni}": "yes"} for ni in range(1, n + 1)],
|
|
}
|
|
for n in range(1, self.n_events() + 1)
|
|
]
|
|
|
|
fractional_measures: List[Dict[str, Any]] = [
|
|
{
|
|
"name": f"fraction_completed_step_{n}",
|
|
"description": f"Of the user-days that completed Step 1, the fraction that completed step {n}.",
|
|
"type": "number",
|
|
"value_format": "0.00%",
|
|
"sql": f"SAFE_DIVIDE(${{count_completed_step_{n}}}, ${{count_completed_step_1}})",
|
|
}
|
|
for n in range(1, self.n_events() + 1)
|
|
]
|
|
return [
|
|
{
|
|
"name": "funnel_analysis",
|
|
"extends": ["events_daily_table"],
|
|
"dimensions": dimensions,
|
|
"measures": count_measures + fractional_measures,
|
|
}
|
|
]
|
|
|
|
def _event_types_lookml(self) -> List[Dict[str, Any]]:
|
|
events = (
|
|
[
|
|
{
|
|
"name": "event_types",
|
|
"derived_table": {
|
|
"sql": dedent(
|
|
f"""
|
|
SELECT
|
|
mozfun.event_analysis.aggregate_match_strings(
|
|
ARRAY_AGG(
|
|
DISTINCT
|
|
{{% if _filters['property_value'] -%}}
|
|
mozfun.event_analysis.event_property_value_to_match_string(
|
|
et.index,
|
|
properties.index,
|
|
property_value.value
|
|
)
|
|
{{% elsif _filters['property_name'] -%}}
|
|
mozfun.event_analysis.event_property_index_to_match_string(
|
|
et.index,
|
|
properties.index
|
|
)
|
|
{{% else -%}}
|
|
mozfun.event_analysis.event_index_to_match_string(et.index)
|
|
{{% endif -%}}
|
|
IGNORE NULLS
|
|
)
|
|
) AS match_string
|
|
FROM
|
|
{self.tables[0]['event_types']} AS et
|
|
LEFT JOIN
|
|
UNNEST(COALESCE(event_properties, [])) AS properties
|
|
LEFT JOIN
|
|
UNNEST(properties.value) AS property_value
|
|
WHERE
|
|
{{% condition category %}} category {{% endcondition %}}
|
|
AND {{% condition event %}} event {{% endcondition %}}
|
|
AND {{% condition property_name %}} properties.key {{% endcondition %}}
|
|
AND {{% condition property_value %}} property_value.key {{% endcondition %}}
|
|
"""
|
|
),
|
|
},
|
|
"filters": [
|
|
{
|
|
"name": "category",
|
|
"description": "The event category, as defined in metrics.yaml.",
|
|
"type": "string",
|
|
"suggest_explore": "event_names",
|
|
"suggest_dimension": "event_names.category",
|
|
},
|
|
{
|
|
"name": "event",
|
|
"description": "The event name.",
|
|
"type": "string",
|
|
"suggest_explore": "event_names",
|
|
"suggest_dimension": "event_names.event",
|
|
},
|
|
{
|
|
"name": "property_name",
|
|
"description": "The event property name.",
|
|
"type": "string",
|
|
"suggest_explore": "event_names",
|
|
"suggest_dimension": "event_names.property_name",
|
|
},
|
|
{
|
|
"name": "property_value",
|
|
"description": "The event property value.",
|
|
"type": "string",
|
|
"suggest_explore": "event_names",
|
|
"suggest_dimension": "event_names.property_value",
|
|
},
|
|
],
|
|
"dimensions": [
|
|
{
|
|
"name": "match_string",
|
|
"hidden": "yes",
|
|
"sql": "${TABLE}.match_string",
|
|
}
|
|
],
|
|
}
|
|
]
|
|
+ [
|
|
{
|
|
"name": f"step_{n}",
|
|
"extends": ["event_types"],
|
|
}
|
|
for n in range(1, self.n_events() + 1)
|
|
]
|
|
+ [
|
|
{
|
|
"name": "event_names",
|
|
"derived_table": {
|
|
"sql": (
|
|
"SELECT category, "
|
|
" event, "
|
|
" property.key AS property_name, "
|
|
" property_value.key AS property_value, "
|
|
" property_value.index as property_index "
|
|
f"FROM {self.tables[0]['event_types']} "
|
|
"LEFT JOIN UNNEST(event_properties) AS property "
|
|
"LEFT JOIN UNNEST(property.value) AS property_value "
|
|
)
|
|
},
|
|
"dimensions": [
|
|
{
|
|
"name": "category",
|
|
"type": "string",
|
|
"sql": "${TABLE}.category",
|
|
},
|
|
{
|
|
"name": "event",
|
|
"type": "string",
|
|
"sql": "${TABLE}.event",
|
|
},
|
|
{
|
|
"name": "property_name",
|
|
"type": "string",
|
|
"sql": "${TABLE}.property_name",
|
|
},
|
|
{
|
|
"name": "property_value",
|
|
"type": "string",
|
|
"sql": "${TABLE}.property_value",
|
|
},
|
|
],
|
|
}
|
|
]
|
|
)
|
|
|
|
return events
|