Add dashboarding files and tests.

This commit is contained in:
Marina Samuel 2018-02-05 14:28:03 -05:00
Родитель 17b22ef4e6
Коммит 894694ccd3
18 изменённых файлов: 2253 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,205 @@
import logging
from redash_client.constants import VizWidth
from stmoab.SummaryDashboard import SummaryDashboard
class ActivityStreamExperimentDashboard(SummaryDashboard):
# These are either strings representing both the measurement name
# event being measured or a key value pair: {<measurement_name>: <events>}
DEFAULT_EVENTS = ["CLICK", "SEARCH", "BLOCK", "DELETE",
{
"event_name": "Positive Interactions",
"event_list": ["CLICK", "BOOKMARK_ADD", "SEARCH"]}]
UT_EVENTS = [
"scalar_parent_browser_engagement_unique_domains_count",
"scalar_parent_browser_engagement_active_ticks",
"scalar_parent_browser_engagement_tab_open_event_count",
"scalar_parent_browser_engagement_max_concurrent_tab_count",
"scalar_parent_browser_engagement_unfiltered_uri_count"]
UT_HOURLY_EVENTS = [
"scalar_parent_browser_engagement_unique_domains_count",
"scalar_parent_browser_engagement_tab_open_event_count",
"scalar_parent_browser_engagement_max_concurrent_tab_count",
"scalar_parent_browser_engagement_unfiltered_uri_count"]
UT_MAPPED_HOURLY_EVENTS = [
"scalar_parent_browser_engagement_navigation_searchbar",
"scalar_parent_browser_engagement_navigation_about_newtab"
"scalar_parent_browser_engagement_navigation_about_home"]
MAPPED_UT_EVENTS = [
"scalar_parent_browser_engagement_navigation_searchbar",
"scalar_parent_browser_engagement_navigation_about_newtab",
"scalar_parent_browser_engagement_navigation_about_home"]
DEFAULT_EVENTS_TABLE = "assa_events_daily"
URL_FETCHER_DATA_SOURCE_ID = 28
DISABLE_TITLE = "Disable Rate"
RETENTION_DIFF_TITLE = "Daily Retention Difference (Experiment - Control)"
def __init__(self, redash_client, project_name, dash_name, exp_id,
start_date=None, end_date=None):
DASH_TITLE = "{project}: {dash}".format(
project=project_name, dash=dash_name)
super(ActivityStreamExperimentDashboard, self).__init__(
redash_client,
DASH_TITLE,
self.DEFAULT_EVENTS_TABLE,
start_date, end_date)
logging.basicConfig()
self._logger = logging.getLogger()
self._logger.setLevel(logging.INFO)
self._experiment_id = exp_id
self._params["experiment_id"] = self._experiment_id
self._logger.info((
"ActivityStreamExperimentDashboard: {name} "
"Initialization Complete".format(name=dash_name)))
def _get_title(self, template_name):
title = template_name.title().split(": ")
if len(title) > 1:
title = title[1]
return title
def _get_event_title_description(self, template, event):
if type(event) == str:
event_name = event.capitalize()
event_string = "('{}')".format(event)
else:
event_name = event["event_name"]
events = []
for event in event["event_list"]:
events.append("'{}'".format(event))
event_string = "(" + ", ".join(events) + ")"
self._params["event"] = event
self._params["event_string"] = event_string
title = description = self._get_title(template["name"]).replace(
"Event", event_name)
if template["description"]:
description = template["description"].lower().replace(
"event", event_name).capitalize()
event_data = {
"title": title,
"description": description
}
return event_data
def _create_options(self):
options = {
"parameters": []
}
for param in self._params:
param_obj = {
"title": param,
"name": param,
"type": "text",
"value": self._params[param],
"global": False
}
options["parameters"].append(param_obj)
return options
def _apply_non_event_template(self, template, chart_data, values=None):
title = description = self._get_title(template["name"])
if template["description"]:
description = template["description"]
self._add_template_to_dashboard(
template,
chart_data,
title,
VizWidth.WIDE,
description
)
def _apply_event_template(self, template, chart_data,
events_list, events_table, title=None):
for event in events_list:
event_data = self._get_event_title_description(template, event)
self._add_template_to_dashboard(
template,
chart_data,
event_data["title"],
VizWidth.REGULAR,
event_data["description"],
)
def _add_template_to_dashboard(self, template, chart_data, title,
viz_width, description):
# Remove graphs if they already exist.
if title in chart_data:
self._logger.info(("ActivityStreamExperimentDashboard: "
"{template} graph exists and is being removed"
.format(template=title)))
query_id = chart_data[title]["query_id"]
widget_id = chart_data[title]["widget_id"]
self.remove_graph_from_dashboard(widget_id, query_id)
self._logger.info(("ActivityStreamExperimentDashboard: "
"New {title} graph is being added"
.format(title=title)))
self._add_forked_query_to_dashboard(
title,
template["id"],
self._params,
viz_width,
template["options"],
template["type"],
description
)
def _apply_functions_to_templates(
self, template_keyword, events_list, events_table,
events_function, general_function=None, title=None
):
if events_table is None:
events_table = self._events_table
self._params["events_table"] = events_table
templates = self.redash.search_queries(template_keyword)
chart_data = self.get_query_ids_and_names()
for template in templates:
if "event" in template["name"].lower():
self._logger.info((
"ActivityStreamExperimentDashboard: "
"Processing template '{template_name}'"
.format(template_name=template["name"])))
events_function(
template,
chart_data,
events_list,
events_table,
title)
else:
self._logger.info((
"ActivityStreamExperimentDashboard: "
"Processing template '{template_name}'"
.format(template_name=template["name"])))
general_function(template, chart_data)
def add_graph_templates(self, template_keyword,
events_list=None, events_table=None):
self._logger.info(
"ActivityStreamExperimentDashboard: Adding templates.")
if events_list is None:
events_list = self.DEFAULT_EVENTS
self._apply_functions_to_templates(
template_keyword,
events_list,
events_table,
self._apply_event_template,
self._apply_non_event_template
)

Просмотреть файл

@ -0,0 +1,224 @@
import math
import statistics
from scipy import stats
import statsmodels.stats.power as smp
from redash_client.constants import VizWidth
from stmoab.utils import upload_as_json, create_boto_transfer
from stmoab.constants import TTableSchema
from stmoab.ActivityStreamExperimentDashboard import (
ActivityStreamExperimentDashboard)
class StatisticalDashboard(ActivityStreamExperimentDashboard):
TTABLE_DESCRIPTION = (
"Smaller p-values (e.g. <= 0.05) indicate a high "
"probability that the variants have different distributions. Alpha "
"error indicates the probability a difference is observed when one "
"does not exists. Larger power (e.g. >= 0.7) indicates a high "
"probability that an observed difference is correct. Beta error "
"(1 - power) indicates the probability that no difference is observed "
"when indeed one exists.")
ALPHA_ERROR = 0.005
TTABLE_TEMPLATE = {"columns": TTableSchema, "rows": []}
def __init__(
self, redash_client, aws_access_key, aws_secret_key,
s3_bucket_id, project_name, dash_name, exp_id,
start_date=None, end_date=None
):
super(StatisticalDashboard, self).__init__(
redash_client,
project_name,
dash_name,
exp_id,
start_date,
end_date)
self._ttables = {}
self._s3_bucket = s3_bucket_id
self._transfer = create_boto_transfer(aws_access_key, aws_secret_key)
def _copy_ttable_tempalte(self):
template_copy = self.TTABLE_TEMPLATE.copy()
template_copy["rows"] = []
return template_copy
def _compute_pooled_stddev(self, control_std, exp_std,
control_vals, exp_vals):
control_len_sub_1 = len(control_vals) - 1
exp_len_sub_1 = len(exp_vals) - 1
pooled_stddev_num = (pow(control_std, 2) * control_len_sub_1 +
pow(exp_std, 2) * exp_len_sub_1)
pooled_stddev_denom = control_len_sub_1 + exp_len_sub_1
pooled_stddev = math.sqrt(pooled_stddev_num / float(pooled_stddev_denom))
return pooled_stddev
def _power_and_ttest(self, control_vals, exp_vals):
control_mean = statistics.mean(control_vals)
control_std = statistics.stdev(control_vals)
exp_mean = statistics.mean(exp_vals)
exp_std = statistics.stdev(exp_vals)
pooled_stddev = self._compute_pooled_stddev(
control_std, exp_std, control_vals, exp_vals)
power = 0
percent_diff = None
if control_mean != 0 and pooled_stddev != 0:
percent_diff = (control_mean - exp_mean) / float(control_mean)
effect_size = (abs(percent_diff) * float(control_mean)) / float(pooled_stddev)
power = smp.TTestIndPower().solve_power(
effect_size,
nobs1=len(control_vals),
ratio=len(exp_vals) / float(len(control_vals)),
alpha=self.ALPHA_ERROR, alternative='two-sided')
ttest_result = stats.ttest_ind(control_vals, exp_vals, equal_var=False)
p_val = ""
if len(ttest_result) >= 2 and not math.isnan(ttest_result[1]):
p_val = ttest_result[1]
mean_diff = exp_mean - control_mean
if p_val <= self.ALPHA_ERROR and mean_diff < 0:
significance = "Negative"
elif p_val <= self.ALPHA_ERROR and mean_diff > 0:
significance = "Positive"
else:
significance = "Neutral"
return {
"power": power,
"p_val": p_val,
"control_mean": control_mean,
"mean_diff": mean_diff,
"percent_diff": 0 if percent_diff is None else percent_diff * -100,
"significance": significance,
}
def _get_ttable_data_for_query(self, label, query_string,
column_name, data_source_id):
data = self.redash.get_query_results(
query_string, data_source_id)
if data is None or len(data) <= 3 or (column_name not in data[0]):
return {}
control_vals = []
exp_vals = []
for row in data:
if "type" in row and row["type"].find("control") == -1:
exp_vals.append(row[column_name])
elif "type" in row:
control_vals.append(row[column_name])
else:
return {}
results = self._power_and_ttest(control_vals, exp_vals)
return {
"Metric": label,
"Alpha Error": self.ALPHA_ERROR,
"Power": results["power"],
"Two-Tailed P-value (ttest)": results["p_val"],
"Control Mean": results["control_mean"],
"Experiment Mean - Control Mean": results["mean_diff"],
"Percent Difference in Means": results["percent_diff"],
"Significance": results["significance"]
}
def _apply_ttable_event_template(self, template, chart_data, events_list,
events_table, title):
if title not in self._ttables:
self._ttables[title] = self._copy_ttable_tempalte()
self._params["events_table"] = events_table
for event in events_list:
event_data = self._get_event_title_description(template, event)
options = self._create_options()
adjusted_string = template["query"].replace(
"{{{", "{").replace("}}}", "}")
query_string = adjusted_string.format(**self._params)
self.redash.update_query(
template["id"],
template["name"],
template["query"],
template["data_source_id"],
event_data["description"],
options
)
ttable_row = self._get_ttable_data_for_query(
event_data["title"],
query_string,
"count",
template["data_source_id"])
if len(ttable_row) == 0:
self._logger.info((
"StatisticalTester: "
"Query '{name}' has no relevant data and will not be "
"included in T-Table.".format(name=event_data["title"])))
continue
self._ttables[title]["rows"].append(ttable_row)
def add_ttable_data(self, template_keyword, title,
events_list=None, events_table=None):
self._logger.info((
"StatisticalTester: Adding data for "
"{keyword}").format(keyword=template_keyword))
if events_list is None:
events_list = self.DEFAULT_EVENTS
events_table = self._events_table
# Create the t-table
self._apply_functions_to_templates(
template_keyword,
events_list,
events_table,
self._apply_ttable_event_template,
None,
title)
def add_ttable(self, title):
self._logger.info((
"StatisticalTester: Creating a T-Table with "
"title {title}").format(title=title))
FILENAME = '{exp_id}_{title}'.format(exp_id=self._experiment_id, title=title)
chart_data = self.get_query_ids_and_names()
# Remove a table if it already exists
if title in chart_data:
self._logger.info((
"StatisticalTester: "
"Stale T-Table exists and will be removed"))
query_id = chart_data[title]["query_id"]
widget_id = chart_data[title]["widget_id"]
self.remove_graph_from_dashboard(widget_id, query_id)
query_string = upload_as_json(
"experiments",
FILENAME,
self._transfer,
self._s3_bucket,
self._ttables[title],
)
query_id, table_id = self.redash.create_new_query(
title,
query_string,
self.URL_FETCHER_DATA_SOURCE_ID,
self.TTABLE_DESCRIPTION,
)
self.redash.add_visualization_to_dashboard(
self._dash_id, table_id, VizWidth.WIDE)

262
stmoab/SummaryDashboard.py Normal file
Просмотреть файл

@ -0,0 +1,262 @@
import time
from redash_client.constants import (
VizWidth, VizType, ChartType, TimeInterval)
from stmoab.templates import retention, all_events_weekly, active_users
from stmoab.constants import RetentionType
class SummaryDashboard(object):
TILES_DATA_SOURCE_ID = 5
DAILY_RETENTION_TITLE = "Daily Retention"
WEEKLY_RETENTION_TITLE = "Weekly Retention"
EVENTS_WEEKLY_TITLE = "Weely Events"
MAU_DAU_TITLE = "Engagement"
MAU_DAU_SERIES_OPTIONS = {
"mau": {
"type": ChartType.AREA,
"yAxis": 0,
"zIndex": 0,
"index": 0
},
"wau": {
"type": ChartType.AREA,
"yAxis": 0,
"zIndex": 1,
"index": 0
},
"dau": {
"type": ChartType.AREA,
"yAxis": 0,
"zIndex": 2,
"index": 0
},
}
class SummaryDashboardException(Exception):
pass
def __init__(self, redash_client, dash_name, events_table_name,
start_date, end_date=None):
self._dash_name = dash_name
self._events_table = events_table_name
self._start_date = start_date
self._end_date = end_date if end_date else time.strftime("%Y-%m-%d")
self._params = {
"start_date": self._start_date,
"end_date": self._end_date
}
self.redash = redash_client
self._dash_id = self.redash.create_new_dashboard(self._dash_name)
self.redash.publish_dashboard(self._dash_id)
self.public_url = self.redash.get_public_url(self._dash_id)
def update_refresh_schedule(self, seconds_to_refresh):
widgets = self.redash.get_widget_from_dash(self._dash_name)
for widget in widgets:
widget_id = widget.get(
"visualization", {}).get("query", {}).get("id", None)
if not widget_id:
continue
self.redash.update_query_schedule(widget_id, seconds_to_refresh)
def get_query_ids_and_names(self):
widgets = self.redash.get_widget_from_dash(self._dash_name)
data = {}
for widget in widgets:
widget_id = widget.get("id", None)
query_id = widget.get(
"visualization", {}).get("query", {}).get("id", None)
widget_name = widget.get(
"visualization", {}).get("query", {}).get("name", None)
widget_query = widget.get(
"visualization", {}).get("query", {}).get("query", None)
if not widget_name:
continue
data[widget_name] = {
"query_id": query_id,
"widget_id": widget_id,
"query": widget_query,
}
return data
def remove_graph_from_dashboard(self, widget_id, query_id):
if widget_id is not None:
self.redash.remove_visualization(widget_id)
if query_id is not None:
self.redash.delete_query(query_id)
def remove_all_graphs(self):
widgets = self.get_query_ids_and_names()
for widget_name in widgets:
widget = widgets[widget_name]
widget_id = widget.get("widget_id", None)
query_id = widget.get("query_id", None)
self.remove_graph_from_dashboard(widget_id, query_id)
def _get_mau_dau_column_mappings(self, query_fields):
mau_dau_column_mapping = {
# Date
query_fields[0]: "x",
# DAU
query_fields[1]: "y",
# WAU
query_fields[2]: "y",
# MAU
query_fields[3]: "y",
}
engagement_ratio_column_mapping = {
# Date
query_fields[0]: "x",
# Weekly Engagement
query_fields[4]: "y",
# Montly Engagement
query_fields[5]: "y",
}
return mau_dau_column_mapping, engagement_ratio_column_mapping
def _add_forked_query_to_dashboard(
self, query_title, parent_query_id, query_params, visualization_width,
options, visualization_type=VizType.CHART, visualization_name="Chart"
):
fork = self.redash.fork_query(parent_query_id)
adjusted_string = fork["query"].replace("{{{", "{").replace("}}}", "}")
sql_query = adjusted_string.format(**query_params)
self.redash.update_query(
fork["id"],
query_title,
sql_query,
fork["data_source_id"],
"",
)
viz_id = self.redash.make_new_visualization_request(
fork["id"],
visualization_type,
options,
visualization_name,
)
self.redash.add_visualization_to_dashboard(
self._dash_id, viz_id, visualization_width)
def _add_query_to_dashboard(self, query_title, query_string,
data_source, visualization_width,
visualization_type=VizType.CHART,
visualization_name="", chart_type=None,
column_mapping=None, series_options=None,
time_interval=None, stacking=True):
query_id, table_id = self.redash.create_new_query(
query_title, query_string, data_source)
viz_id = self.redash.create_new_visualization(
query_id,
visualization_type,
visualization_name,
chart_type,
column_mapping,
series_options,
time_interval,
stacking,
)
self.redash.add_visualization_to_dashboard(
self._dash_id, viz_id, visualization_width)
def add_mau_dau(self, where_clause=""):
if self.MAU_DAU_TITLE in self.get_query_ids_and_names():
return
query_string, fields = active_users(
self._events_table, self._start_date, where_clause)
mau_dau_mapping, er_mapping = self._get_mau_dau_column_mappings(fields)
# Make the MAU/WAU/DAU graph
self._add_query_to_dashboard(
self.MAU_DAU_TITLE,
query_string,
self.TILES_DATA_SOURCE_ID,
VizWidth.WIDE,
VizType.CHART,
"",
ChartType.AREA,
mau_dau_mapping,
series_options=self.MAU_DAU_SERIES_OPTIONS,
)
# Make the engagement ratio graph
self._add_query_to_dashboard(
self.MAU_DAU_TITLE,
query_string,
self.TILES_DATA_SOURCE_ID,
VizWidth.WIDE,
VizType.CHART,
"",
ChartType.LINE,
er_mapping,
)
def add_retention_graph(self, retention_type, where_clause=""):
time_interval = TimeInterval.WEEKLY
graph_title = self.WEEKLY_RETENTION_TITLE
if retention_type == RetentionType.DAILY:
time_interval = TimeInterval.DAILY
graph_title = self.DAILY_RETENTION_TITLE
current_charts = self.get_query_ids_and_names()
if graph_title in current_charts:
return
query_string, fields = retention(
self._events_table, retention_type, self._start_date, where_clause)
self._add_query_to_dashboard(
graph_title,
query_string,
self.TILES_DATA_SOURCE_ID,
VizWidth.WIDE,
VizType.COHORT,
time_interval=time_interval,
)
def add_events_weekly(self, where_clause="", event_column="event_type"):
if self.EVENTS_WEEKLY_TITLE in self.get_query_ids_and_names():
return
query_string, fields = all_events_weekly(
self._events_table, self._start_date, where_clause, event_column)
column_mapping = {
fields[0]: "x",
fields[1]: "y",
fields[2]: "series",
}
self._add_query_to_dashboard(
self.EVENTS_WEEKLY_TITLE,
query_string,
self.TILES_DATA_SOURCE_ID,
VizWidth.WIDE,
VizType.CHART,
"",
ChartType.BAR,
column_mapping,
stacking=True
)

1
stmoab/__init__.py Normal file
Просмотреть файл

@ -0,0 +1 @@

38
stmoab/constants.py Normal file
Просмотреть файл

@ -0,0 +1,38 @@
class RetentionType:
DAILY = "day"
WEEKLY = "week"
TTableSchema = [
{
"name": "Metric",
"type": "string",
"friendly_name": "Metric"
}, {
"name": "Alpha Error",
"type": "float",
"friendly_name": "Alpha Error"
}, {
"name": "Power",
"type": "float",
"friendly_name": "Power"
}, {
"name": "Two-Tailed P-value (ttest)",
"type": "float",
"friendly_name": "Two-Tailed P-value (ttest)"
}, {
"name": "Control Mean",
"type": "float",
"friendly_name": "Control Mean"
}, {
"name": "Experiment Mean - Control Mean",
"type": "float",
"friendly_name": "Experiment Mean - Control Mean"
}, {
"name": "Percent Difference in Means",
"type": "float",
"friendly_name": "Percent Difference in Means"
}, {
"name": "Significance",
"type": "string",
"friendly_name": "Significance"
}]

Просмотреть файл

Просмотреть файл

@ -0,0 +1,26 @@
import os
from redash_client.client import RedashClient
from stmoab.constants import RetentionType
from stmoab.SummaryDashboard import SummaryDashboard
if __name__ == '__main__':
api_key = os.environ["REDASH_API_KEY"]
redash_client = RedashClient(api_key)
dash = SummaryDashboard(
redash_client,
"Firefox iOS: Metrics Summary",
"activity_stream_mobile_events_daily",
"02/17/2017"
)
dash._events_table = "activity_stream_mobile_stats_daily"
dash.add_mau_dau()
dash._events_table = "activity_stream_mobile_events_daily"
dash.add_retention_graph(RetentionType.DAILY)
dash.add_events_weekly(event_column="event")
dash.update_refresh_schedule(3600)
#dash.remove_all_graphs()

Просмотреть файл

@ -0,0 +1,39 @@
import os
from redash_client.client import RedashClient
from stmoab.utils import read_experiment_definition, format_date, is_old_date
from stmoab.ActivityStreamExperimentDashboard import (
ActivityStreamExperimentDashboard)
DIRECTORY_NAME = "experiments/json_definitions"
URL = (
"https://experimenter.dev.mozaws.net/api/v1/"
"activity-stream/experiments.json?format=json")
def handler(json_input, context):
api_key = os.environ["REDASH_API_KEY"]
redash_client = RedashClient(api_key)
experiments = read_experiment_definition(URL)
for experiment in experiments:
end_date = None
if "end_date" in experiment and experiment["end_date"] is not None:
if is_old_date(experiment["end_date"]):
continue
end_date = format_date(experiment["end_date"])
dash = ActivityStreamExperimentDashboard(
redash_client,
experiment["name"],
experiment["slug"],
experiment["addon_versions"],
format_date(experiment["start_date"]),
end_date,
)
dash.add_graph_templates("AS Template:")
dash.add_ttable("TTests Template:")
dash.update_refresh_schedule(43200)

Просмотреть файл

@ -0,0 +1,222 @@
import os
from redash_client.client import RedashClient
from stmoab.StatisticalDashboard import StatisticalDashboard
'''
"Deduped Combined Frecency",
"exp-006-deduped-combined-frecency",
['1.2.0', '1.3.0', '1.4.0', '1.4.1'],
"01/18/17"
'''
'''
"Original Newtab Sites",
"exp-008-original-newtab-sites",
['1.3.0', '1.4.0', '1.4.1'],
"02/02/17"
'''
'''
"Locally Fetch Metadata",
"exp-007-locally-fetch-metadata",
['1.3.0', '1.4.0'],
"02/02/17"
'''
'''
"Locally Fetch Metadata",
"exp-010-locally-fetch-metadata",
['1.4.1', '1.5.0', '1.6.0'],
"02/15/17"
'''
'''
"Screenshots",
"exp-009-screenshots",
['1.5.0', '1.6.0'],
"02/23/17"
'''
'''
"Async Screenshots",
"exp-012-screenshotsasync",
['1.7.0'],
"03/20/17"
'''
'''
"Bookmark Screenshots",
"exp-013-bookmark-screenshots",
['1.8.0'],
"04/06/17"
'''
'''
"Metadata Long Cache",
"exp-015-metadatalongcache",
['1.8.0', '1.9.0'],
"04/06/17"
'''
'''
"Screenshots Long Cache",
"exp-014-screenshotsasync",
['1.8.0'],
"04/06/17"
'''
'''
"Pocket",
"exp-021-pocketstories",
['1.10.1'],
"05/02/17"
'''
'''
"Metadata No Service",
"exp-018-metadata-no-service",
['1.10.0', '1.10.1'],
"05/01/17"
'''
'''
"Metadata Local Refresh",
"exp-019-metadata-local-refresh",
['1.10.0', '1.10.1'],
"05/01/17"
'''
'''
"Activity Stream System Addon Experiment",
"v1 Nightly Pocket User Personalization",
"as-nightly-personalization-1400890",
start_date="2017-09-27",
'''
'''
"Activity Stream System Addon Experiment",
"v2 Beta",
"pref-flip-activity-stream-beta-1389722-v2",
start_date="2017-08-30",
end_date="2017-09-09"
'''
'''
"Activity Stream System Addon Experiment",
"v1 Release",
"pref-flip-activity-stream-56-release-bug-1405332",
start_date="2017-10-05",
end_date="2017-10-20"
'''
'''
"Activity Stream System Addon Experiment",
"v1 About Home",
"pref-flip-activity-stream-56-beta-about-home-bug-1405334",
start_date="2017-10-05",
'''
'''
"Activity Stream System Addon Experiment",
"v2 Pocket Personalization",
"pref-flip-activity-stream-58-nightly-pocket-personalization-bug-1400890",
start_date="2017-10-06",
'''
'''
"Activity Stream System Addon Experiment",
"Beta Revisited",
"pref-flip-activity-stream-beta-1389722-v2",
start_date="2017-08-30",
end_date="2017-09-08"
'''
'''
"Activity Stream System Addon Experiment",
"Release enUS",
"pref-flip-activity-stream-56-release-bug-1405332",
start_date="2017-10-05",
end_date="2017-10-20"
'''
'''
"Activity Stream System Addon Experiment",
"Beta Post Bug Fix",
"pref-flip-activity-stream-beta-1389722-v2-round2",
start_date="2017-09-19",
end_date="2017-09-24"
'''
'''
"Activity Stream System Addon Experiment",
"Beta All Pocket Geos Post Bug Fix",
"pref-flip-activity-stream-beta-1389722-v2-2-round2",
start_date="2017-09-20",
end_date="2017-09-28"
'''
'''
"Activity Stream System Addon Experiment",
"Beta 57 Study",
"pref-flip-activity-stream-57-beta-enabled-bug-1410535",
start_date="2017-10-25",
'''
'''
"Activity Stream System Addon Experiment",
"Beta 57 Two Rows of Topsites",
"pref-flip-activity-stream-57-beta-two-rows-bug-1411695",
start_date=" ",
'''
'''
"Activity Stream System Addon Experiment",
"Beta 57 Two Rows v2",
"pref-flip-activity-stream-57-beta-two-rows-user-pref-bug-1411695",
start_date="2017-10-31",
'''
'''
"Activity Stream System Addon Experiment",
"v3 Pocket Personalization",
"pref-flip-activity-stream-58-nightly-optimized-pocket-personalization-bug-1410483",
start_date="2017-10-31",
'''
'''
"Activity Stream System Addon Experiment",
"57 Release",
"pref-flip-activity-stream-57-release-enabled-existing-users-bug-1415966",
start_date="2017-11-14"
'''
'''
"Activity Stream System Addon Experiment",
"57 Release New Users",
"pref-flip-activity-stream-57-release-enabled-new-users-bug-1415967",
start_date="2017-11-14"
'''
if __name__ == '__main__':
api_key = os.environ["REDASH_API_KEY"]
aws_access_key = os.environ['AWS_ACCESS_KEY']
aws_secret_key = os.environ['AWS_SECRET_KEY']
s3_bucket_id_stats = os.environ['S3_BUCKET_ID_STATS']
redash_client = RedashClient(api_key)
PING_CENTRE_TTABLE = "Statistical Analysis - Ping Centre"
UT_TTABLE = "Statistical Analysis - UT"
UT_HOURLY_TTABLE = "Statistical Analysis (Per Active Hour) - UT"
dash = StatisticalDashboard(
redash_client,
"Activity Stream System Addon Experiment",
"57 Release",
"pref-flip-activity-stream-57-release-enabled-existing-users-bug-1415966",
start_date="2017-11-14"
)
# Average Events per Day UT
#dash.add_graph_templates("AS Template UT One:", dash.UT_EVENTS)
#dash.add_graph_templates("AS Template UT Mapped Two:", dash.MAPPED_UT_EVENTS)
# Average Events per Active Hour UT
dash.add_graph_templates("AS Template UT Three:", dash.UT_HOURLY_EVENTS)
dash.add_graph_templates("AS Template UT Mapped Four:", dash.MAPPED_UT_EVENTS)
# Average Events per Day Ping Centre
#dash.add_graph_templates("ASSA Template:", dash.DEFAULT_EVENTS)
#dash.add_ttable_data("TTests Template UT Four:", UT_TTABLE, dash.UT_EVENTS)
#dash.add_ttable_data("TTests Template Mapped UT Six:", UT_TTABLE, dash.MAPPED_UT_EVENTS)
#dash.add_ttable(UT_TTABLE)
# Events per Hour TTests
#dash.add_ttable_data("TTests Template Per Hour UT Five:", UT_HOURLY_TTABLE, dash.UT_HOURLY_EVENTS)
#dash.add_ttable_data("TTests Template Per Hour Mapped UT:", UT_HOURLY_TTABLE, dash.MAPPED_UT_EVENTS)
#dash.add_ttable(UT_HOURLY_TTABLE)
#dash.add_ttable_data("TTests Template:", PING_CENTRE_TTABLE, dash.DEFAULT_EVENTS)
#dash.add_ttable(PING_CENTRE_TTABLE)
#dash.update_refresh_schedule(86400)
#dash.remove_all_graphs()

Просмотреть файл

@ -0,0 +1,35 @@
import os
from redash_client.client import RedashClient
from stmoab.constants import RetentionType
from stmoab.SummaryDashboard import SummaryDashboard
if __name__ == '__main__':
api_key = os.environ["REDASH_API_KEY"]
redash_client = RedashClient(api_key)
test_pilot_experiments = {
"Summary": "@testpilot-addon",
"Min Vid": "@min-vid",
"Cliqz": "testpilot@cliqz.com",
"Pulse": "pulse@mozilla.com",
"Snooze Tabs": "snoozetabs@mozilla.com"
}
for exp_name in test_pilot_experiments:
where_clause = "AND addon_id = '{0}'".format(test_pilot_experiments[exp_name])
dash = SummaryDashboard(
redash_client,
"Test Pilot: {0}".format(exp_name),
"ping_centre_test_pilot"
"02/13/2017"
)
dash.add_mau_dau(where_clause)
dash.add_retention_graph(RetentionType.WEEKLY, where_clause)
dash.add_events_weekly(where_clause)
dash.update_refresh_schedule(3600)
#dash.remove_all_graphs()

123
stmoab/templates.py Normal file
Просмотреть файл

@ -0,0 +1,123 @@
def retention(events_table, retention_type, start_date, where_clause):
return """
WITH population AS
(SELECT client_id AS unique_id, DATE_TRUNC('{1}', date) AS cohort_date, COUNT(*)
FROM {0}
WHERE 1 = 1
{3}
GROUP BY 1, 2),
activity AS
(SELECT DATE_TRUNC('{1}', date) AS activity_date, client_id AS unique_id, cohort_date
FROM {0}
JOIN population
ON population.unique_id = client_id
WHERE DATE_TRUNC('{1}', date) >= (CURRENT_DATE - INTERVAL '91 days')
AND DATE_TRUNC('{1}', cohort_date) >= (CURRENT_DATE - INTERVAL '91 days')
{3}),
population_agg AS
(SELECT DATE_TRUNC('{1}', date) AS cohort_date, COUNT(DISTINCT client_id) AS total
FROM {0}
WHERE 1 = 1
{3}
GROUP BY 1)
SELECT * FROM
(SELECT date, day as week_number, value, total, MAX(day) over (PARTITION BY date) AS max_week_num
FROM
(SELECT activity.cohort_date AS date,
DATE_DIFF('{1}', activity.cohort_date, activity_date) AS day,
total,
COUNT(DISTINCT unique_id) AS value
FROM activity
JOIN population_agg
ON activity.cohort_date = population_agg.cohort_date
WHERE activity_date >= activity.cohort_date
AND activity.cohort_date > '{2}'
GROUP BY 1, 2, 3))
WHERE week_number < max_week_num
ORDER BY date, week_number""".format(events_table, retention_type, start_date, where_clause), []
def all_events_weekly(events_table, start_date, where_clause, event_column):
return """
WITH weekly_events AS
(SELECT DATE_TRUNC('week', date) AS week, COUNT(*)
FROM {0}
WHERE DATE_TRUNC('week', date) >= '{1}'
{2}
GROUP BY 1),
event_counts AS
(SELECT week, {3}, count FROM
(SELECT *, RANK() over (PARTITION BY week ORDER BY count) AS rank FROM
(SELECT DATE_TRUNC('week', date) AS week, {3}, COUNT(*)
FROM {0}
WHERE DATE_TRUNC('week', date) >= '{1}'
{2}
GROUP BY 1, 2
ORDER BY 1, 2))
WHERE rank <= 20)
SELECT weekly_events.week, event_counts.{3}, event_counts.count / weekly_events.count::FLOAT * 100 AS rate
FROM weekly_events
LEFT JOIN event_counts
ON weekly_events.week = event_counts.week""".format(events_table, start_date, where_clause, event_column), ["week", "rate", event_column]
def active_users(events_table, start_date, where_clause=""):
return """
WITH weekly AS
(SELECT day, COUNT(DISTINCT client_id) AS dist_clients
FROM
(SELECT DISTINCT date
FROM {0}
WHERE date >= '{1}'
{2}
ORDER BY date) AS g(day)
LEFT JOIN {0}
ON {0}.date BETWEEN g.day - 7 AND g.day
AND {0}.date >= '{1}'
{2}
GROUP BY day
ORDER BY day),
monthly AS
(SELECT day, count(DISTINCT client_id) AS dist_clients
FROM
(SELECT DISTINCT date
FROM {0}
WHERE date >= '{1}'
{2}
ORDER BY date) AS g(day)
LEFT JOIN {0}
ON {0}.date BETWEEN g.day - 28 AND g.day
AND {0}.date >= '{1}'
{2}
GROUP BY day
ORDER BY day),
daily AS
(SELECT date, COUNT(DISTINCT a.client_id) AS dau
FROM {0} AS a WHERE date >= '{1}' {2} GROUP BY date),
smoothed_daily AS
(SELECT date as day,
dau,
AVG(dau) OVER(order by date ROWS BETWEEN 7 PRECEDING AND 0 FOLLOWING) as dist_clients
FROM daily
ORDER BY day desc)
SELECT
date,
d.dist_clients as dau,
w.dist_clients as wau,
m.dist_clients as mau,
(d.dist_clients::FLOAT / w.dist_clients) * 100.0 as weekly_engagement,
(d.dist_clients::FLOAT / m.dist_clients) * 100.0 as monthly_engagement
FROM {0} a
JOIN smoothed_daily d on d.day = date
JOIN weekly w on w.day = date
JOIN monthly m on m.day = date
WHERE date < current_date and date >= '2016-05-10'
GROUP BY date, d.dist_clients, wau, mau
ORDER BY date, dau, wau, mau""".format(events_table, start_date, where_clause), ["date", "dau", "wau", "mau", "weekly_engagement", "monthly_engagement"]

0
stmoab/tests/__init__.py Normal file
Просмотреть файл

76
stmoab/tests/base.py Normal file
Просмотреть файл

@ -0,0 +1,76 @@
import mock
import json
import unittest
from redash_client.client import RedashClient
from stmoab.SummaryDashboard import SummaryDashboard
class AppTest(unittest.TestCase):
def post_server(self, url, data):
EXPECTED_QUERY_ID = "query_id123"
EXPECTED_QUERY_STRING = "select some_stuff from table"
QUERY_ID_RESPONSE = {
"id": EXPECTED_QUERY_ID
}
FORK_RESPONSE = {
"id": EXPECTED_QUERY_ID,
"query": EXPECTED_QUERY_STRING,
"data_source_id": 3
}
response = self.get_mock_response(
content=json.dumps(QUERY_ID_RESPONSE))
if "fork" in url:
response = self.get_mock_response(
content=json.dumps(FORK_RESPONSE))
self.server_calls += 1
return response
def get_dashboard(self, api_key):
EVENTS_TABLE_NAME = "activity_stream_mobile_events_daily"
START_DATE = "02/17/2017"
DASH_NAME = "Firefox iOS: Metrics Summary"
self.mock_requests_get.return_value = self.get_mock_response()
self.mock_requests_post.return_value = self.get_mock_response()
dashboard = SummaryDashboard(
self.redash,
DASH_NAME,
EVENTS_TABLE_NAME,
START_DATE,
)
return dashboard
def setUp(self):
API_KEY = "test_key"
self.redash = RedashClient(API_KEY)
mock_requests_post_patcher = mock.patch(
"redash_client.client.requests.post")
self.mock_requests_post = mock_requests_post_patcher.start()
self.addCleanup(mock_requests_post_patcher.stop)
mock_requests_get_patcher = mock.patch(
"redash_client.client.requests.get")
self.mock_requests_get = mock_requests_get_patcher.start()
self.addCleanup(mock_requests_get_patcher.stop)
mock_requests_delete_patcher = mock.patch(
"redash_client.client.requests.delete")
self.mock_requests_delete = mock_requests_delete_patcher.start()
self.addCleanup(mock_requests_delete_patcher.stop)
self.dash = self.get_dashboard(API_KEY)
def get_mock_response(self, status=200, content='{}'):
mock_response = mock.Mock()
mock_response.status_code = status
mock_response.content = content
return mock_response

Просмотреть файл

@ -0,0 +1,116 @@
import json
import time
from stmoab.tests.base import AppTest
from stmoab.ActivityStreamExperimentDashboard import (
ActivityStreamExperimentDashboard)
class TestActivityStreamExperimentDashboard(AppTest):
START_DATE = "2017-17-02"
END_DATE = time.strftime("%Y-%m-%d")
DASH_PROJECT = "Activity Stream Experiment"
DASH_NAME = "Screenshots Long Cache"
EXPERIMENT_ID = "exp-014-screenshotsasync"
def get_dashboard(self, api_key):
self.mock_requests_get.return_value = self.get_mock_response()
self.mock_requests_post.return_value = self.get_mock_response()
dashboard = ActivityStreamExperimentDashboard(
self.redash,
self.DASH_PROJECT,
self.DASH_NAME,
self.EXPERIMENT_ID,
self.START_DATE,
)
return dashboard
def test_correct_values_at_initialization(self):
self.assertEqual(self.dash._experiment_id, self.EXPERIMENT_ID)
self.assertEqual(
self.dash._dash_name,
"{project}: {dash}".format(
project=self.DASH_PROJECT, dash=self.DASH_NAME))
self.assertEqual(self.dash._start_date, self.START_DATE)
self.assertEqual(self.dash._end_date, self.END_DATE)
# 2 posts to create the dashboard and make it public
self.assertEqual(self.mock_requests_post.call_count, 2)
self.assertEqual(self.mock_requests_get.call_count, 1)
self.assertEqual(self.mock_requests_delete.call_count, 0)
def test_add_templates_makes_correct_calls(self):
self.get_calls = 0
QUERIES_IN_SEARCH = [{
"id": 5,
"description": "SomeQuery",
"name": "AS Template: Query Title Event",
"data_source_id": 5
}, {
"id": 6,
"description": "SomeQuery2",
"name": "AS Template: Query Title",
"data_source_id": 5
}]
VISUALIZATIONS_FOR_QUERY = {
"visualizations": [
{"options": {}},
{"options": {}}
]
}
WIDGETS_RESPONSE = {
"widgets": [[{
"id": "the_widget_id",
"visualization": {
"query": {
"id": "some_id",
"name": "Query Title Click"
},
},
}]]
}
def get_server(url):
response = self.get_mock_response()
if self.get_calls == 0:
response = self.get_mock_response(
content=json.dumps(QUERIES_IN_SEARCH))
elif self.get_calls <= 2:
response = self.get_mock_response(
content=json.dumps(VISUALIZATIONS_FOR_QUERY))
else:
response = self.get_mock_response(
content=json.dumps(WIDGETS_RESPONSE))
self.get_calls += 1
return response
self.server_calls = 0
self.mock_requests_delete.return_value = self.get_mock_response()
self.mock_requests_post.side_effect = self.post_server
self.mock_requests_get.side_effect = get_server
self.dash.add_graph_templates("Template:")
# GET calls:
# 1) Create dashboard
# 2) Get dashboard widgets
# 3) Search queries
# 4) Get two existing visualizations
# POST calls:
# 1) Create dashboard
# 2) Search queries
# 3) Fork query
# 4) Update query
# 5) Create visualization
# 6) Append visualization to dashboard
# 7) Repeat 2-6 six times
# 8) Make dashboard public
# DELETE calls:
# One existing graph is removed from dashboard
# and deleted (2 calls)
self.assertEqual(self.mock_requests_post.call_count, 32)
self.assertEqual(self.mock_requests_get.call_count, 5)
self.assertEqual(self.mock_requests_delete.call_count, 2)

Просмотреть файл

@ -0,0 +1,401 @@
import math
import mock
import json
import time
import statistics
from stmoab.tests.base import AppTest
from stmoab.StatisticalDashboard import (
StatisticalDashboard)
class TestStatisticalDashboard(AppTest):
START_DATE = "02/17/2017"
END_DATE = time.strftime("%m/%d/%y")
DASH_PROJECT = "Activity Stream Experiment"
DASH_NAME = "Screenshots Long Cache"
EXPERIMENT_ID = "exp-014-screenshotsasync"
AWS_ACCESS_KEY = "access"
AWS_SECRET_KEY = "secret"
AWS_BUCKET_ID = "bucket"
def get_dashboard(self, api_key):
self.mock_requests_get.return_value = self.get_mock_response()
self.mock_requests_post.return_value = self.get_mock_response()
mock_boto_transfer_patcher = mock.patch(
"stmoab.utils.S3Transfer")
mock_boto_transfer_patcher.start()
self.addCleanup(mock_boto_transfer_patcher.stop)
dashboard = StatisticalDashboard(
self.redash,
self.AWS_ACCESS_KEY,
self.AWS_SECRET_KEY,
self.AWS_BUCKET_ID,
self.DASH_PROJECT,
self.DASH_NAME,
self.EXPERIMENT_ID,
self.START_DATE,
)
return dashboard
def test_pooled_stddev(self):
exp_vals = [1, 2, 3]
control_vals = [4, 6, 8]
EXPECTED_POOLED_STDDEV = math.sqrt(10 / float(4))
exp_std = statistics.stdev(exp_vals)
control_std = statistics.stdev(control_vals)
pooled_stddev = self.dash._compute_pooled_stddev(
control_std, exp_std, control_vals, exp_vals)
self.assertEqual(pooled_stddev, EXPECTED_POOLED_STDDEV)
def test_power_and_ttest_negative_results(self):
exp_vals = [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]
control_vals = [4, 6, 8, 4, 6, 8, 4, 6, 8, 4, 6, 8]
MEAN_DIFFERENCE = -4
results = self.dash._power_and_ttest(
control_vals, exp_vals)
self.assertEqual(results["mean_diff"], MEAN_DIFFERENCE)
self.assertEqual(results["significance"], "Negative")
self.assertTrue(0 <= results["p_val"] <= 0.05)
self.assertTrue(0.5 <= results["power"] <= 1)
def test_power_and_ttest_positive_results(self):
exp_vals = [4, 6, 8, 4, 6, 8, 4, 6, 8, 4, 6, 8]
control_vals = [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]
MEAN_DIFFERENCE = 4
results = self.dash._power_and_ttest(
control_vals, exp_vals)
self.assertEqual(results["mean_diff"], MEAN_DIFFERENCE)
self.assertEqual(results["significance"], "Positive")
self.assertTrue(0 <= results["p_val"] <= 0.05)
self.assertTrue(0.5 <= results["power"] <= 1)
def test_power_and_ttest_neutral_results(self):
exp_vals = [4, 6, 8, 4, 6, 8, 4, 6, 8, 4, 6, 8]
control_vals = [4, 6, 8, 4, 6, 8, 4, 6, 8, 4, 6, 8]
MEAN_DIFFERENCE = 0
results = self.dash._power_and_ttest(
control_vals, exp_vals)
self.assertEqual(results["mean_diff"], MEAN_DIFFERENCE)
self.assertEqual(results["significance"], "Neutral")
self.assertEqual(results["p_val"], 1)
self.assertTrue(0 <= results["power"] <= 0.1)
def test_get_ttable_data_for_non_existent_query(self):
QUERY_RESULTS_RESPONSE = {}
self.mock_requests_post.return_value = self.get_mock_response(
content=json.dumps(QUERY_RESULTS_RESPONSE))
ttable_row = self.dash._get_ttable_data_for_query(
"beep", "meep", "boop", 5)
self.assertEqual(ttable_row, {})
def test_ttable_not_made_for_non_matching_graph(self):
BAD_ROW = []
for i in range(5):
BAD_ROW.append({
"some_weird_row": "beep",
"count": 5
})
QUERY_RESULTS_RESPONSE = {
"query_result": {
"data": {
"rows": BAD_ROW
}
}
}
self.mock_requests_post.return_value = self.get_mock_response(
content=json.dumps(QUERY_RESULTS_RESPONSE))
ttable_row = self.dash._get_ttable_data_for_query(
"beep", "meep", "count", 5)
self.assertEqual(len(ttable_row), 0)
def test_ttable_row_data_is_correct(self):
EXPECTED_LABEL = "beep"
EXPECTED_ROWS = []
EXPECTED_MEAN_DIFFERENCE = -4
for i in range(12):
EXPECTED_ROWS.append({
"date": 123,
"count": (i % 3) + 1,
"type": "experiment"
})
EXPECTED_ROWS.append({
"date": 123,
"count": ((i * 2) % 6) + 4, # 4, 6, 8
"type": "control"
})
QUERY_RESULTS_RESPONSE = {
"query_result": {
"data": {
"rows": EXPECTED_ROWS
}
}
}
self.mock_requests_post.return_value = self.get_mock_response(
content=json.dumps(QUERY_RESULTS_RESPONSE))
ttable_row = self.dash._get_ttable_data_for_query(
EXPECTED_LABEL, "meep", "count", 5)
self.assertEqual(len(ttable_row), 8)
self.assertEqual(ttable_row["Metric"], EXPECTED_LABEL)
self.assertEqual(ttable_row["Alpha Error"], self.dash.ALPHA_ERROR)
self.assertTrue(0.5 <= ttable_row["Power"] <= 1)
self.assertTrue(0 <= ttable_row["Two-Tailed P-value (ttest)"] <= 0.05)
self.assertEqual(
ttable_row["Experiment Mean - Control Mean"], EXPECTED_MEAN_DIFFERENCE)
def test_add_ttable_makes_correct_calls(self):
self.get_calls = 0
self.server_calls = 0
QUERIES_IN_SEARCH = [{
"id": 5,
"description": "SomeQuery",
"name": "AS Template: Query Title Event",
"data_source_id": 5,
"query": "SELECT stuff FROM things"
}]
VISUALIZATIONS_FOR_QUERY = {
"visualizations": [
{"options": {}},
]
}
WIDGETS_RESPONSE = {
"widgets": [[{
"visualization": {
"query": {
"name": "Some table",
},
},
}]]
}
EXPECTED_ROWS = [{
"count": 123,
"type": "experiment",
}, {
"count": 789,
"type": "control",
}, {
"count": 1233,
"type": "experiment",
}, {
"count": 7819,
"type": "control",
}]
QUERY_RESULTS_RESPONSE = {
"query_result": {
"data": {
"rows": EXPECTED_ROWS
}
}
}
def get_server(url):
if self.get_calls == 0:
response = self.get_mock_response(
content=json.dumps(QUERIES_IN_SEARCH))
elif self.get_calls <= 2 and self.get_calls > 0:
response = self.get_mock_response(
content=json.dumps(VISUALIZATIONS_FOR_QUERY))
else:
response = self.get_mock_response(
content=json.dumps(WIDGETS_RESPONSE))
self.get_calls += 1
return response
self.mock_requests_get.side_effect = get_server
self.mock_requests_post.return_value = self.get_mock_response(
content=json.dumps(QUERY_RESULTS_RESPONSE))
TABLE_NAME = "Table Name"
self.dash.add_ttable_data(
"Template:", TABLE_NAME, self.dash.DEFAULT_EVENTS)
self.dash.add_ttable(TABLE_NAME)
# GET calls:
# 1) Create dashboard
# 2) Get dashboard widgets (2 times)
# 3) Search for templates
# 4) Get template
# POST calls:
# 1) Create dashboard
# 2) Update queries (5 events * 2 requests each: update + refresh)
# 3) Get Ttable query results for 5 rows
# 4) Create query (doesn't return ID, so no refresh)
# 5) Add query to dashboard
# 6) Make dashboard public
self.assertEqual(self.mock_requests_post.call_count, 19)
self.assertEqual(self.mock_requests_get.call_count, 5)
self.assertEqual(self.mock_requests_delete.call_count, 0)
def test_ttable_with_no_rows(self):
self.get_calls = 0
self.server_calls = 0
QUERIES_IN_SEARCH = [{
"id": 5,
"description": "SomeQuery",
"name": "AS Template: Query Title Event",
"data_source_id": 5,
"query": "SELECT stuff FROM things"
}]
VISUALIZATIONS_FOR_QUERY = {
"visualizations": [
{"options": {}},
{"options": {}}
]
}
WIDGETS_RESPONSE = {
"widgets": [[{
"visualization": {
"query": {
"name": "Some Graph",
},
},
}]]
}
def get_server(url):
response = self.get_mock_response()
if self.get_calls == 0:
response = self.get_mock_response(
content=json.dumps(QUERIES_IN_SEARCH))
elif self.get_calls <= 2 and self.get_calls > 0:
response = self.get_mock_response(
content=json.dumps(VISUALIZATIONS_FOR_QUERY))
else:
response = self.get_mock_response(
content=json.dumps(WIDGETS_RESPONSE))
self.get_calls += 1
return response
mock_json_uploader = mock.patch(
("stmoab.StatisticalDashboard.upload_as_json"))
upload_file_patch = mock_json_uploader.start()
upload_file_patch.return_value = ""
self.mock_requests_get.side_effect = get_server
self.mock_requests_post.side_effect = self.post_server
TABLE_NAME = "Table Name"
self.dash.add_ttable_data(
"Template:", TABLE_NAME, self.dash.DEFAULT_EVENTS)
self.dash.add_ttable(TABLE_NAME)
# GET calls:
# 1) Create dashboard
# 2) Get dashboard widgets (2 times)
# 3) Search for templates
# 4) Get templates (2 calls)
# POST calls:
# 1) Create dashboard
# 2) Update queries (5 events * 2 requests each: update + refresh)
# 3) Get Ttable query results for 5 rows
# 4) Create query (create + refresh)
# 5) Add query to dashboard
# 6) Make dashboard public
self.assertEqual(self.mock_requests_post.call_count, 20)
self.assertEqual(self.mock_requests_get.call_count, 6)
self.assertEqual(self.mock_requests_delete.call_count, 0)
# The ttable has no rows
args, kwargs = upload_file_patch.call_args
self.assertEqual(len(args[2]["rows"]), 0)
mock_json_uploader.stop()
def test_statistical_analysis_graph_exist_deletes_and_creates_new(self):
self.get_calls = 0
TABLE_NAME = "Table Name"
QUERIES_IN_SEARCH = [{
"id": 5,
"description": "SomeQuery",
"name": "AS Template: Query Title Event",
"data_source_id": 5,
"query": "SELECT stuff FROM things"
}]
VISUALIZATIONS_FOR_QUERY = {
"visualizations": [
{"options": {}},
{"options": {}}
]
}
WIDGETS_RESPONSE = {
"widgets": [[{
"id": "123",
"visualization": {
"query": {
"name": TABLE_NAME,
"id": "abc"
},
},
}]]
}
def get_server(url):
response = self.get_mock_response()
if self.get_calls == 0:
response = self.get_mock_response(
content=json.dumps(QUERIES_IN_SEARCH))
elif self.get_calls <= 2 and self.get_calls > 0:
response = self.get_mock_response(
content=json.dumps(VISUALIZATIONS_FOR_QUERY))
else:
response = self.get_mock_response(
content=json.dumps(WIDGETS_RESPONSE))
self.get_calls += 1
return response
mock_json_uploader = mock.patch(
("stmoab.StatisticalDashboard.upload_as_json"))
upload_file_patch = mock_json_uploader.start()
upload_file_patch.return_value = ""
self.mock_requests_delete.return_value = self.get_mock_response()
self.mock_requests_get.side_effect = get_server
self.dash.add_ttable_data(
"Template:", TABLE_NAME)
self.dash.add_ttable(TABLE_NAME)
# GET calls:
# 1) Create dashboard
# 2) Get dashboard widgets (2 times)
# 3) Search for templates
# 4) Get template
# POST calls:
# 1) Create dashboard
# 2) Update queries (5 events * 2 requests each: update + refresh)
# 3) Get Ttable query results for 5 rows
# 4) Create query (doesn't return ID, so no refresh)
# 5) Add query to dashboard
# 6) Make dashboard public
self.assertEqual(self.mock_requests_post.call_count, 19)
self.assertEqual(self.mock_requests_get.call_count, 5)
self.assertEqual(self.mock_requests_delete.call_count, 2)
mock_json_uploader.stop()

Просмотреть файл

@ -0,0 +1,295 @@
import json
from stmoab.constants import RetentionType
from stmoab.tests.base import AppTest
from stmoab.templates import active_users
class TestSummaryDashboard(AppTest):
def test_update_refresh_schedule_success(self):
EXPECTED_QUERY_ID = "query_id123"
WIDGETS_RESPONSE = {
"widgets": [[{
"visualization": {
"query": {
"nope": "fail"
}
}}],
[{"visualization": {
"query": {
"id": EXPECTED_QUERY_ID
}
}},
{"visualization": {
"query": {
"muhahaha": "you can't catch me!"
}
}}
]]
}
self.mock_requests_get.return_value = self.get_mock_response(
content=json.dumps(WIDGETS_RESPONSE))
self.mock_requests_post.return_value = self.get_mock_response()
self.dash.update_refresh_schedule(86400)
# 2 posts to create the dashboard and make it public
# 1 post for refreshing the one valid visualization ID
# 2 gets for creating the dashboard and looking up chart names
self.assertEqual(self.mock_requests_post.call_count, 3)
self.assertEqual(self.mock_requests_get.call_count, 2)
self.assertEqual(self.mock_requests_delete.call_count, 0)
def test_get_chart_data_success(self):
EXPECTED_QUERY_NAME = "query_name123"
EXPECTED_QUERY_NAME2 = "query_name456"
EXPECTED_QUERY_NAME3 = "query_name789"
WIDGETS_RESPONSE = {
"widgets": [[{
"visualization": {
"query": {
"name": EXPECTED_QUERY_NAME,
"id": 1
}
},
"id": 4}],
[{
"visualization": {
"query": {
"not_a_name": EXPECTED_QUERY_NAME2,
"id": 2
}
},
"id": 5
}, {
"visualization": {
"query": {
"name": EXPECTED_QUERY_NAME3,
"id": 3
}
},
"id": 6
}
]]
}
EXPECTED_NAMES = [EXPECTED_QUERY_NAME, EXPECTED_QUERY_NAME3]
EXPECTED_QUERY_IDS = [1, 3]
EXPECTED_WIDGET_IDS = [4, 6]
self.mock_requests_get.return_value = self.get_mock_response(
content=json.dumps(WIDGETS_RESPONSE))
data_dict = self.dash.get_query_ids_and_names()
self.assertEqual(len(data_dict), 2)
for name in data_dict:
self.assertEqual(len(data_dict[name]), 3)
self.assertTrue(name in EXPECTED_NAMES)
self.assertTrue(data_dict[name]["query_id"] in EXPECTED_QUERY_IDS)
self.assertTrue(data_dict[name]["widget_id"] in EXPECTED_WIDGET_IDS)
def test_remove_all_graphs_success(self):
EXPECTED_QUERY_ID = "query_id123"
EXPECTED_QUERY_ID2 = "query_id456"
EXPECTED_QUERY_ID3 = "query_id789"
WIDGETS_RESPONSE = {
"widgets": [[{
"id": EXPECTED_QUERY_ID,
"visualization": {
"query": {
"id": EXPECTED_QUERY_ID,
"name": "A"
}
}}], [{
"id": EXPECTED_QUERY_ID2,
"visualization": {
"query": {
"id": EXPECTED_QUERY_ID2,
"name": "B"
}
}
}, {
"id": EXPECTED_QUERY_ID3,
"visualization": {
"query": {
"id": EXPECTED_QUERY_ID3,
"name": "C"
}
}
}
]]
}
self.mock_requests_delete.return_value = self.get_mock_response()
self.mock_requests_get.return_value = self.get_mock_response(
content=json.dumps(WIDGETS_RESPONSE))
self.dash.remove_all_graphs()
# 2 posts to create the dashboard and make it public
# 2 gets for creating the dashboard and looking up chart names
self.assertEqual(self.mock_requests_post.call_count, 2)
self.assertEqual(self.mock_requests_get.call_count, 2)
self.assertEqual(self.mock_requests_delete.call_count, 6)
def test_mau_dau_column_mapping_returns_correct_mappings(self):
EXPECTED_MAU_DAU_MAPPING = {
"date": "x",
"dau": "y",
"wau": "y",
"mau": "y",
}
EXPECTED_ENGAGEMENT_RATIO_MAPPING = {
"date": "x",
"weekly_engagement": "y",
"monthly_engagement": "y",
}
query_string, fields = active_users(
self.dash._events_table, self.dash._start_date)
mau_mapping, er_mapping = self.dash._get_mau_dau_column_mappings(fields)
self.assertEqual(mau_mapping, EXPECTED_MAU_DAU_MAPPING)
self.assertEqual(er_mapping, EXPECTED_ENGAGEMENT_RATIO_MAPPING)
def test_mau_dau_graphs_exist_makes_no_request(self):
WIDGETS_RESPONSE = {
"widgets": [[{
"visualization": {
"query": {
"name": self.dash.MAU_DAU_TITLE,
},
},
}]]
}
self.mock_requests_get.return_value = self.get_mock_response(
content=json.dumps(WIDGETS_RESPONSE))
self.dash.add_mau_dau()
# 2 posts to create the dashboard and make it public
# 2 gets for creating the dashboard and looking up chart names
self.assertEqual(self.mock_requests_post.call_count, 2)
self.assertEqual(self.mock_requests_get.call_count, 2)
self.assertEqual(self.mock_requests_delete.call_count, 0)
def test_mau_dau_graphs_make_expected_calls(self):
self.server_calls = 0
self.mock_requests_get.return_value = self.get_mock_response()
self.mock_requests_post.side_effect = self.post_server
self.dash.add_mau_dau()
# GET calls:
# 1) Create dashboard
# 2) Get dashboard widgets
# 3) Get first table ID
# 4) Get second table ID
# POST calls:
# 1) Create dashboard
# 2) Create first query
# 3) Refresh first query
# 4) Create second query
# 5) Refresh second query
# 6) Create first visualization
# 7) Append first visualization to dashboard
# 8) Create second visualization
# 9) Append second visualization to dashboard
# 10) Make dashboard public
self.assertEqual(self.mock_requests_post.call_count, 10)
self.assertEqual(self.mock_requests_get.call_count, 4)
self.assertEqual(self.mock_requests_delete.call_count, 0)
def test_retention_graphs_exist_makes_no_request(self):
WIDGETS_RESPONSE = {
"widgets": [[{
"visualization": {
"query": {
"name": self.dash.WEEKLY_RETENTION_TITLE,
},
},
}]]
}
self.mock_requests_get.return_value = self.get_mock_response(
content=json.dumps(WIDGETS_RESPONSE))
self.dash.add_retention_graph(RetentionType.WEEKLY)
# 2 posts to create the dashboard and make it public
# 2 gets for creating the dashboard and looking up chart names
self.assertEqual(self.mock_requests_post.call_count, 2)
self.assertEqual(self.mock_requests_get.call_count, 2)
self.assertEqual(self.mock_requests_delete.call_count, 0)
def test_retention_graph_makes_expected_calls(self):
self.server_calls = 0
self.mock_requests_get.return_value = self.get_mock_response()
self.mock_requests_post.side_effect = self.post_server
self.dash.add_retention_graph(RetentionType.DAILY)
# GET calls:
# 1) Create dashboard
# 2) Get dashboard widgets
# 3) Get table ID
# POST calls:
# 1) Create dashboard
# 2) Create query
# 3) Refresh query
# 4) Create visualization
# 5) Append visualization to dashboard
# 6) Make dashboard public
self.assertEqual(self.mock_requests_post.call_count, 6)
self.assertEqual(self.mock_requests_get.call_count, 3)
self.assertEqual(self.mock_requests_delete.call_count, 0)
def test_weekly_events_graph_exist_makes_no_request(self):
WIDGETS_RESPONSE = {
"widgets": [[{
"visualization": {
"query": {
"name": self.dash.EVENTS_WEEKLY_TITLE,
},
},
}]]
}
self.mock_requests_get.return_value = self.get_mock_response(
content=json.dumps(WIDGETS_RESPONSE))
self.dash.add_events_weekly()
# 2 posts to create the dashboard and make it public
# 2 gets for creating the dashboard and looking up chart names
self.assertEqual(self.mock_requests_post.call_count, 2)
self.assertEqual(self.mock_requests_get.call_count, 2)
self.assertEqual(self.mock_requests_delete.call_count, 0)
def test_weekly_events_graph_makes_expected_calls(self):
self.server_calls = 0
self.mock_requests_get.return_value = self.get_mock_response()
self.mock_requests_post.side_effect = self.post_server
self.dash.add_events_weekly()
# GET calls:
# 1) Create dashboard
# 2) Get dashboard widgets
# 3) Get table ID
# POST calls:
# 1) Create dashboard
# 2) Create query
# 3) Refresh query
# 4) Create visualization
# 5) Append visualization to dashboard
# 6) Make dashboard public
self.assertEqual(self.mock_requests_post.call_count, 6)
self.assertEqual(self.mock_requests_get.call_count, 3)
self.assertEqual(self.mock_requests_delete.call_count, 0)

118
stmoab/tests/test_utils.py Normal file
Просмотреть файл

@ -0,0 +1,118 @@
import mock
import json
import tempfile
import calendar
from datetime import datetime, timedelta
from stmoab.tests.base import AppTest
from stmoab.constants import TTableSchema
from stmoab.utils import (
upload_as_json, read_experiment_definition, create_boto_transfer,
read_experiment_definition_s3, format_date, is_old_date)
class TestUtils(AppTest):
def test_upload_as_json_return_val(self):
DIRECTORY_NAME = "experiments"
FILENAME = "test_file_name"
ACCESS_KEY = "key"
SECRET_KEY = "secret"
BUCKET_ID = "bucket"
DATA = {"columns": TTableSchema, "rows": []}
EXPECTED_S3_KEY = "activity-stream/" + DIRECTORY_NAME + "/" + FILENAME
EXPECTED_BASE_URL = "https://analysis-output.telemetry.mozilla.org/"
mock_boto_transfer_patcher = mock.patch("stmoab.utils.S3Transfer")
mock_boto_transfer_patcher.start()
transfer = create_boto_transfer(ACCESS_KEY, SECRET_KEY)
query_string = upload_as_json(DIRECTORY_NAME, FILENAME, transfer, BUCKET_ID, DATA)
self.assertEqual(query_string, EXPECTED_BASE_URL + EXPECTED_S3_KEY)
mock_boto_transfer_patcher.stop()
def test_download_experiment_definition_json_non_json_return_val(self):
mock_boto_transfer_patcher = mock.patch("stmoab.utils.s3.get_object")
mock_transfer = mock_boto_transfer_patcher.start()
mock_transfer.return_value = "fail"
json_result = read_experiment_definition_s3("beep")
self.assertEqual(json_result, {})
mock_boto_transfer_patcher.stop()
def test_download_experiment_definition_non_json_return_val(self):
mock_boto_transfer_patcher = mock.patch("stmoab.utils.urllib.urlopen")
mock_transfer = mock_boto_transfer_patcher.start()
mock_transfer.return_value = "fail"
json_result = read_experiment_definition("beep")
self.assertEqual(json_result, {})
mock_boto_transfer_patcher.stop()
def test_download_experiment_definition_json_return_val(self):
EXPECTED_JSON = json.dumps({"experiment1": "some_value"})
download_patcher = mock.patch("stmoab.utils.urllib.urlopen")
mock_download = download_patcher.start()
# Make a temp file for returning
temp_file = tempfile.mkstemp()
file_handle = open(temp_file[1], "w+")
file_handle.write(EXPECTED_JSON)
file_handle.seek(0)
mock_download.return_value = file_handle
json_result = read_experiment_definition("boop")
self.assertEqual(json_result, json.loads(EXPECTED_JSON))
download_patcher.stop()
file_handle.close()
def test_download_experiment_definition_s3_json_return_val(self):
EXPECTED_JSON = json.dumps({"experiment1": "some_value"})
mock_boto_download_patcher = mock.patch("stmoab.utils.s3.get_object")
mock_download = mock_boto_download_patcher.start()
# Make a temp file for returning
temp_file = tempfile.mkstemp()
file_handle = open(temp_file[1], "w+")
file_handle.write(EXPECTED_JSON)
file_handle.seek(0)
mock_download.return_value = {"Body": file_handle}
json_result = read_experiment_definition_s3("boop")
self.assertEqual(json_result, json.loads(EXPECTED_JSON))
mock_boto_download_patcher.stop()
file_handle.close()
def test_date_format(self):
MS_DATE = 1493671545000.0
EXPECTED_FORMAT = '05/01/17'
formatted_date = format_date(MS_DATE)
self.assertEqual(formatted_date, EXPECTED_FORMAT)
def test_is_old_date(self):
new_datetime = datetime.today() - timedelta(days=1)
MS_DATE_OLD = 1493671545000.0
MS_DATE_NEW = calendar.timegm(new_datetime.utctimetuple()) * 1000.0
is_old = is_old_date(MS_DATE_OLD)
self.assertEqual(is_old, True)
is_old = is_old_date(MS_DATE_NEW)
self.assertEqual(is_old, False)

72
stmoab/utils.py Normal file
Просмотреть файл

@ -0,0 +1,72 @@
import json
import boto3
import urllib
from boto3.s3.transfer import S3Transfer
from datetime import datetime, timedelta
s3 = boto3.client("s3")
def create_boto_transfer(access_key, secret_key):
client = boto3.client(
"s3",
region_name="us-west-2",
aws_access_key_id=access_key,
aws_secret_access_key=secret_key)
transfer = S3Transfer(client)
return transfer
def upload_as_json(directory_name, filename, transfer, bucket_id, data):
path = "activity-stream/" + directory_name + "/"
s3_key = path + filename
json_data = json.dumps(data)
file_path = "/tmp/{filename}".format(filename=filename)
with open(file_path, 'w') as f:
f.write(json_data)
transfer.upload_file(
file_path,
bucket_id,
s3_key,
extra_args={"ContentType": "application/json"})
return "https://analysis-output.telemetry.mozilla.org/" + s3_key
def read_experiment_definition(url):
try:
response = urllib.urlopen(url)
return json.loads(response.read())
except:
return {}
def read_experiment_definition_s3(filename):
DIRECTORY_NAME = "experiments/json_definitions"
path = "activity-stream/" + DIRECTORY_NAME + "/"
s3_key = path + filename
obj = s3.get_object(Bucket="telemetry-public-analysis-2", Key=s3_key)
try:
experiments_string = obj["Body"].read()
return json.loads(experiments_string)
except:
return {}
def format_date(date):
date_epoch = datetime.fromtimestamp(date / 1000.0)
date = date_epoch.strftime("%m/%d/%y")
return date
# A date is considered "old" if it's > 3 days earlier than today.
def is_old_date(date):
three_old = datetime.today() - timedelta(days=3)
input_date_epoch = datetime.fromtimestamp(date / 1000.0)
return input_date_epoch < three_old