diff --git a/stmoab/ActivityStreamExperimentDashboard.py b/stmoab/ActivityStreamExperimentDashboard.py new file mode 100644 index 0000000..8647109 --- /dev/null +++ b/stmoab/ActivityStreamExperimentDashboard.py @@ -0,0 +1,205 @@ +import logging + +from redash_client.constants import VizWidth + +from stmoab.SummaryDashboard import SummaryDashboard + + +class ActivityStreamExperimentDashboard(SummaryDashboard): + # These are either strings representing both the measurement name + # event being measured or a key value pair: {: } + DEFAULT_EVENTS = ["CLICK", "SEARCH", "BLOCK", "DELETE", + { + "event_name": "Positive Interactions", + "event_list": ["CLICK", "BOOKMARK_ADD", "SEARCH"]}] + UT_EVENTS = [ + "scalar_parent_browser_engagement_unique_domains_count", + "scalar_parent_browser_engagement_active_ticks", + "scalar_parent_browser_engagement_tab_open_event_count", + "scalar_parent_browser_engagement_max_concurrent_tab_count", + "scalar_parent_browser_engagement_unfiltered_uri_count"] + UT_HOURLY_EVENTS = [ + "scalar_parent_browser_engagement_unique_domains_count", + "scalar_parent_browser_engagement_tab_open_event_count", + "scalar_parent_browser_engagement_max_concurrent_tab_count", + "scalar_parent_browser_engagement_unfiltered_uri_count"] + UT_MAPPED_HOURLY_EVENTS = [ + "scalar_parent_browser_engagement_navigation_searchbar", + "scalar_parent_browser_engagement_navigation_about_newtab" + "scalar_parent_browser_engagement_navigation_about_home"] + MAPPED_UT_EVENTS = [ + "scalar_parent_browser_engagement_navigation_searchbar", + "scalar_parent_browser_engagement_navigation_about_newtab", + "scalar_parent_browser_engagement_navigation_about_home"] + + DEFAULT_EVENTS_TABLE = "assa_events_daily" + URL_FETCHER_DATA_SOURCE_ID = 28 + DISABLE_TITLE = "Disable Rate" + RETENTION_DIFF_TITLE = "Daily Retention Difference (Experiment - Control)" + + def __init__(self, redash_client, project_name, dash_name, exp_id, + start_date=None, end_date=None): + DASH_TITLE = "{project}: {dash}".format( + project=project_name, dash=dash_name) + super(ActivityStreamExperimentDashboard, self).__init__( + redash_client, + DASH_TITLE, + self.DEFAULT_EVENTS_TABLE, + start_date, end_date) + + logging.basicConfig() + self._logger = logging.getLogger() + self._logger.setLevel(logging.INFO) + self._experiment_id = exp_id + + self._params["experiment_id"] = self._experiment_id + self._logger.info(( + "ActivityStreamExperimentDashboard: {name} " + "Initialization Complete".format(name=dash_name))) + + def _get_title(self, template_name): + title = template_name.title().split(": ") + if len(title) > 1: + title = title[1] + return title + + def _get_event_title_description(self, template, event): + if type(event) == str: + event_name = event.capitalize() + event_string = "('{}')".format(event) + else: + event_name = event["event_name"] + events = [] + for event in event["event_list"]: + events.append("'{}'".format(event)) + event_string = "(" + ", ".join(events) + ")" + + self._params["event"] = event + self._params["event_string"] = event_string + title = description = self._get_title(template["name"]).replace( + "Event", event_name) + + if template["description"]: + description = template["description"].lower().replace( + "event", event_name).capitalize() + + event_data = { + "title": title, + "description": description + } + return event_data + + def _create_options(self): + options = { + "parameters": [] + } + + for param in self._params: + param_obj = { + "title": param, + "name": param, + "type": "text", + "value": self._params[param], + "global": False + } + options["parameters"].append(param_obj) + + return options + + def _apply_non_event_template(self, template, chart_data, values=None): + title = description = self._get_title(template["name"]) + + if template["description"]: + description = template["description"] + + self._add_template_to_dashboard( + template, + chart_data, + title, + VizWidth.WIDE, + description + ) + + def _apply_event_template(self, template, chart_data, + events_list, events_table, title=None): + for event in events_list: + event_data = self._get_event_title_description(template, event) + + self._add_template_to_dashboard( + template, + chart_data, + event_data["title"], + VizWidth.REGULAR, + event_data["description"], + ) + + def _add_template_to_dashboard(self, template, chart_data, title, + viz_width, description): + # Remove graphs if they already exist. + if title in chart_data: + self._logger.info(("ActivityStreamExperimentDashboard: " + "{template} graph exists and is being removed" + .format(template=title))) + + query_id = chart_data[title]["query_id"] + widget_id = chart_data[title]["widget_id"] + self.remove_graph_from_dashboard(widget_id, query_id) + + self._logger.info(("ActivityStreamExperimentDashboard: " + "New {title} graph is being added" + .format(title=title))) + self._add_forked_query_to_dashboard( + title, + template["id"], + self._params, + viz_width, + template["options"], + template["type"], + description + ) + + def _apply_functions_to_templates( + self, template_keyword, events_list, events_table, + events_function, general_function=None, title=None + ): + if events_table is None: + events_table = self._events_table + self._params["events_table"] = events_table + + templates = self.redash.search_queries(template_keyword) + chart_data = self.get_query_ids_and_names() + + for template in templates: + if "event" in template["name"].lower(): + self._logger.info(( + "ActivityStreamExperimentDashboard: " + "Processing template '{template_name}'" + .format(template_name=template["name"]))) + events_function( + template, + chart_data, + events_list, + events_table, + title) + else: + self._logger.info(( + "ActivityStreamExperimentDashboard: " + "Processing template '{template_name}'" + .format(template_name=template["name"]))) + general_function(template, chart_data) + + def add_graph_templates(self, template_keyword, + events_list=None, events_table=None): + self._logger.info( + "ActivityStreamExperimentDashboard: Adding templates.") + + if events_list is None: + events_list = self.DEFAULT_EVENTS + + self._apply_functions_to_templates( + template_keyword, + events_list, + events_table, + self._apply_event_template, + self._apply_non_event_template + ) diff --git a/stmoab/StatisticalDashboard.py b/stmoab/StatisticalDashboard.py new file mode 100644 index 0000000..ed103a2 --- /dev/null +++ b/stmoab/StatisticalDashboard.py @@ -0,0 +1,224 @@ +import math +import statistics +from scipy import stats +import statsmodels.stats.power as smp + +from redash_client.constants import VizWidth + +from stmoab.utils import upload_as_json, create_boto_transfer +from stmoab.constants import TTableSchema +from stmoab.ActivityStreamExperimentDashboard import ( + ActivityStreamExperimentDashboard) + + +class StatisticalDashboard(ActivityStreamExperimentDashboard): + TTABLE_DESCRIPTION = ( + "Smaller p-values (e.g. <= 0.05) indicate a high " + "probability that the variants have different distributions. Alpha " + "error indicates the probability a difference is observed when one " + "does not exists. Larger power (e.g. >= 0.7) indicates a high " + "probability that an observed difference is correct. Beta error " + "(1 - power) indicates the probability that no difference is observed " + "when indeed one exists.") + ALPHA_ERROR = 0.005 + TTABLE_TEMPLATE = {"columns": TTableSchema, "rows": []} + + + def __init__( + self, redash_client, aws_access_key, aws_secret_key, + s3_bucket_id, project_name, dash_name, exp_id, + start_date=None, end_date=None + ): + super(StatisticalDashboard, self).__init__( + redash_client, + project_name, + dash_name, + exp_id, + start_date, + end_date) + + self._ttables = {} + self._s3_bucket = s3_bucket_id + self._transfer = create_boto_transfer(aws_access_key, aws_secret_key) + + def _copy_ttable_tempalte(self): + template_copy = self.TTABLE_TEMPLATE.copy() + template_copy["rows"] = [] + return template_copy + + def _compute_pooled_stddev(self, control_std, exp_std, + control_vals, exp_vals): + + control_len_sub_1 = len(control_vals) - 1 + exp_len_sub_1 = len(exp_vals) - 1 + + pooled_stddev_num = (pow(control_std, 2) * control_len_sub_1 + + pow(exp_std, 2) * exp_len_sub_1) + pooled_stddev_denom = control_len_sub_1 + exp_len_sub_1 + + pooled_stddev = math.sqrt(pooled_stddev_num / float(pooled_stddev_denom)) + return pooled_stddev + + def _power_and_ttest(self, control_vals, exp_vals): + control_mean = statistics.mean(control_vals) + control_std = statistics.stdev(control_vals) + exp_mean = statistics.mean(exp_vals) + exp_std = statistics.stdev(exp_vals) + + pooled_stddev = self._compute_pooled_stddev( + control_std, exp_std, control_vals, exp_vals) + + power = 0 + percent_diff = None + if control_mean != 0 and pooled_stddev != 0: + percent_diff = (control_mean - exp_mean) / float(control_mean) + effect_size = (abs(percent_diff) * float(control_mean)) / float(pooled_stddev) + power = smp.TTestIndPower().solve_power( + effect_size, + nobs1=len(control_vals), + ratio=len(exp_vals) / float(len(control_vals)), + alpha=self.ALPHA_ERROR, alternative='two-sided') + + ttest_result = stats.ttest_ind(control_vals, exp_vals, equal_var=False) + p_val = "" + if len(ttest_result) >= 2 and not math.isnan(ttest_result[1]): + p_val = ttest_result[1] + + mean_diff = exp_mean - control_mean + + if p_val <= self.ALPHA_ERROR and mean_diff < 0: + significance = "Negative" + elif p_val <= self.ALPHA_ERROR and mean_diff > 0: + significance = "Positive" + else: + significance = "Neutral" + + return { + "power": power, + "p_val": p_val, + "control_mean": control_mean, + "mean_diff": mean_diff, + "percent_diff": 0 if percent_diff is None else percent_diff * -100, + "significance": significance, + } + + def _get_ttable_data_for_query(self, label, query_string, + column_name, data_source_id): + data = self.redash.get_query_results( + query_string, data_source_id) + + if data is None or len(data) <= 3 or (column_name not in data[0]): + return {} + + control_vals = [] + exp_vals = [] + for row in data: + if "type" in row and row["type"].find("control") == -1: + exp_vals.append(row[column_name]) + elif "type" in row: + control_vals.append(row[column_name]) + else: + return {} + + results = self._power_and_ttest(control_vals, exp_vals) + return { + "Metric": label, + "Alpha Error": self.ALPHA_ERROR, + "Power": results["power"], + "Two-Tailed P-value (ttest)": results["p_val"], + "Control Mean": results["control_mean"], + "Experiment Mean - Control Mean": results["mean_diff"], + "Percent Difference in Means": results["percent_diff"], + "Significance": results["significance"] + } + + def _apply_ttable_event_template(self, template, chart_data, events_list, + events_table, title): + + if title not in self._ttables: + self._ttables[title] = self._copy_ttable_tempalte() + + self._params["events_table"] = events_table + for event in events_list: + event_data = self._get_event_title_description(template, event) + options = self._create_options() + + adjusted_string = template["query"].replace( + "{{{", "{").replace("}}}", "}") + query_string = adjusted_string.format(**self._params) + + self.redash.update_query( + template["id"], + template["name"], + template["query"], + template["data_source_id"], + event_data["description"], + options + ) + ttable_row = self._get_ttable_data_for_query( + event_data["title"], + query_string, + "count", + template["data_source_id"]) + + if len(ttable_row) == 0: + self._logger.info(( + "StatisticalTester: " + "Query '{name}' has no relevant data and will not be " + "included in T-Table.".format(name=event_data["title"]))) + continue + + self._ttables[title]["rows"].append(ttable_row) + + def add_ttable_data(self, template_keyword, title, + events_list=None, events_table=None): + self._logger.info(( + "StatisticalTester: Adding data for " + "{keyword}").format(keyword=template_keyword)) + + if events_list is None: + events_list = self.DEFAULT_EVENTS + events_table = self._events_table + + # Create the t-table + self._apply_functions_to_templates( + template_keyword, + events_list, + events_table, + self._apply_ttable_event_template, + None, + title) + + def add_ttable(self, title): + self._logger.info(( + "StatisticalTester: Creating a T-Table with " + "title {title}").format(title=title)) + + FILENAME = '{exp_id}_{title}'.format(exp_id=self._experiment_id, title=title) + + chart_data = self.get_query_ids_and_names() + + # Remove a table if it already exists + if title in chart_data: + self._logger.info(( + "StatisticalTester: " + "Stale T-Table exists and will be removed")) + query_id = chart_data[title]["query_id"] + widget_id = chart_data[title]["widget_id"] + self.remove_graph_from_dashboard(widget_id, query_id) + + query_string = upload_as_json( + "experiments", + FILENAME, + self._transfer, + self._s3_bucket, + self._ttables[title], + ) + query_id, table_id = self.redash.create_new_query( + title, + query_string, + self.URL_FETCHER_DATA_SOURCE_ID, + self.TTABLE_DESCRIPTION, + ) + self.redash.add_visualization_to_dashboard( + self._dash_id, table_id, VizWidth.WIDE) diff --git a/stmoab/SummaryDashboard.py b/stmoab/SummaryDashboard.py new file mode 100644 index 0000000..0f8d115 --- /dev/null +++ b/stmoab/SummaryDashboard.py @@ -0,0 +1,262 @@ +import time + +from redash_client.constants import ( + VizWidth, VizType, ChartType, TimeInterval) + +from stmoab.templates import retention, all_events_weekly, active_users +from stmoab.constants import RetentionType + + +class SummaryDashboard(object): + TILES_DATA_SOURCE_ID = 5 + DAILY_RETENTION_TITLE = "Daily Retention" + WEEKLY_RETENTION_TITLE = "Weekly Retention" + EVENTS_WEEKLY_TITLE = "Weely Events" + MAU_DAU_TITLE = "Engagement" + MAU_DAU_SERIES_OPTIONS = { + "mau": { + "type": ChartType.AREA, + "yAxis": 0, + "zIndex": 0, + "index": 0 + }, + "wau": { + "type": ChartType.AREA, + "yAxis": 0, + "zIndex": 1, + "index": 0 + }, + "dau": { + "type": ChartType.AREA, + "yAxis": 0, + "zIndex": 2, + "index": 0 + }, + } + + class SummaryDashboardException(Exception): + pass + + def __init__(self, redash_client, dash_name, events_table_name, + start_date, end_date=None): + self._dash_name = dash_name + self._events_table = events_table_name + self._start_date = start_date + self._end_date = end_date if end_date else time.strftime("%Y-%m-%d") + self._params = { + "start_date": self._start_date, + "end_date": self._end_date + } + + self.redash = redash_client + self._dash_id = self.redash.create_new_dashboard(self._dash_name) + self.redash.publish_dashboard(self._dash_id) + self.public_url = self.redash.get_public_url(self._dash_id) + + def update_refresh_schedule(self, seconds_to_refresh): + widgets = self.redash.get_widget_from_dash(self._dash_name) + + for widget in widgets: + widget_id = widget.get( + "visualization", {}).get("query", {}).get("id", None) + + if not widget_id: + continue + + self.redash.update_query_schedule(widget_id, seconds_to_refresh) + + def get_query_ids_and_names(self): + widgets = self.redash.get_widget_from_dash(self._dash_name) + + data = {} + for widget in widgets: + widget_id = widget.get("id", None) + + query_id = widget.get( + "visualization", {}).get("query", {}).get("id", None) + + widget_name = widget.get( + "visualization", {}).get("query", {}).get("name", None) + + widget_query = widget.get( + "visualization", {}).get("query", {}).get("query", None) + + if not widget_name: + continue + + data[widget_name] = { + "query_id": query_id, + "widget_id": widget_id, + "query": widget_query, + } + + return data + + def remove_graph_from_dashboard(self, widget_id, query_id): + if widget_id is not None: + self.redash.remove_visualization(widget_id) + + if query_id is not None: + self.redash.delete_query(query_id) + + def remove_all_graphs(self): + widgets = self.get_query_ids_and_names() + + for widget_name in widgets: + widget = widgets[widget_name] + widget_id = widget.get("widget_id", None) + query_id = widget.get("query_id", None) + + self.remove_graph_from_dashboard(widget_id, query_id) + + def _get_mau_dau_column_mappings(self, query_fields): + mau_dau_column_mapping = { + # Date + query_fields[0]: "x", + # DAU + query_fields[1]: "y", + # WAU + query_fields[2]: "y", + # MAU + query_fields[3]: "y", + } + engagement_ratio_column_mapping = { + # Date + query_fields[0]: "x", + # Weekly Engagement + query_fields[4]: "y", + # Montly Engagement + query_fields[5]: "y", + } + return mau_dau_column_mapping, engagement_ratio_column_mapping + + def _add_forked_query_to_dashboard( + self, query_title, parent_query_id, query_params, visualization_width, + options, visualization_type=VizType.CHART, visualization_name="Chart" + ): + + fork = self.redash.fork_query(parent_query_id) + adjusted_string = fork["query"].replace("{{{", "{").replace("}}}", "}") + sql_query = adjusted_string.format(**query_params) + + self.redash.update_query( + fork["id"], + query_title, + sql_query, + fork["data_source_id"], + "", + ) + + viz_id = self.redash.make_new_visualization_request( + fork["id"], + visualization_type, + options, + visualization_name, + ) + self.redash.add_visualization_to_dashboard( + self._dash_id, viz_id, visualization_width) + + def _add_query_to_dashboard(self, query_title, query_string, + data_source, visualization_width, + visualization_type=VizType.CHART, + visualization_name="", chart_type=None, + column_mapping=None, series_options=None, + time_interval=None, stacking=True): + + query_id, table_id = self.redash.create_new_query( + query_title, query_string, data_source) + viz_id = self.redash.create_new_visualization( + query_id, + visualization_type, + visualization_name, + chart_type, + column_mapping, + series_options, + time_interval, + stacking, + ) + self.redash.add_visualization_to_dashboard( + self._dash_id, viz_id, visualization_width) + + def add_mau_dau(self, where_clause=""): + if self.MAU_DAU_TITLE in self.get_query_ids_and_names(): + return + + query_string, fields = active_users( + self._events_table, self._start_date, where_clause) + + mau_dau_mapping, er_mapping = self._get_mau_dau_column_mappings(fields) + + # Make the MAU/WAU/DAU graph + self._add_query_to_dashboard( + self.MAU_DAU_TITLE, + query_string, + self.TILES_DATA_SOURCE_ID, + VizWidth.WIDE, + VizType.CHART, + "", + ChartType.AREA, + mau_dau_mapping, + series_options=self.MAU_DAU_SERIES_OPTIONS, + ) + + # Make the engagement ratio graph + self._add_query_to_dashboard( + self.MAU_DAU_TITLE, + query_string, + self.TILES_DATA_SOURCE_ID, + VizWidth.WIDE, + VizType.CHART, + "", + ChartType.LINE, + er_mapping, + ) + + def add_retention_graph(self, retention_type, where_clause=""): + time_interval = TimeInterval.WEEKLY + graph_title = self.WEEKLY_RETENTION_TITLE + + if retention_type == RetentionType.DAILY: + time_interval = TimeInterval.DAILY + graph_title = self.DAILY_RETENTION_TITLE + + current_charts = self.get_query_ids_and_names() + if graph_title in current_charts: + return + + query_string, fields = retention( + self._events_table, retention_type, self._start_date, where_clause) + + self._add_query_to_dashboard( + graph_title, + query_string, + self.TILES_DATA_SOURCE_ID, + VizWidth.WIDE, + VizType.COHORT, + time_interval=time_interval, + ) + + def add_events_weekly(self, where_clause="", event_column="event_type"): + if self.EVENTS_WEEKLY_TITLE in self.get_query_ids_and_names(): + return + + query_string, fields = all_events_weekly( + self._events_table, self._start_date, where_clause, event_column) + + column_mapping = { + fields[0]: "x", + fields[1]: "y", + fields[2]: "series", + } + + self._add_query_to_dashboard( + self.EVENTS_WEEKLY_TITLE, + query_string, + self.TILES_DATA_SOURCE_ID, + VizWidth.WIDE, + VizType.CHART, + "", + ChartType.BAR, + column_mapping, + stacking=True + ) diff --git a/stmoab/__init__.py b/stmoab/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/stmoab/__init__.py @@ -0,0 +1 @@ + diff --git a/stmoab/constants.py b/stmoab/constants.py new file mode 100644 index 0000000..dab9c64 --- /dev/null +++ b/stmoab/constants.py @@ -0,0 +1,38 @@ +class RetentionType: + DAILY = "day" + WEEKLY = "week" + +TTableSchema = [ + { + "name": "Metric", + "type": "string", + "friendly_name": "Metric" + }, { + "name": "Alpha Error", + "type": "float", + "friendly_name": "Alpha Error" + }, { + "name": "Power", + "type": "float", + "friendly_name": "Power" + }, { + "name": "Two-Tailed P-value (ttest)", + "type": "float", + "friendly_name": "Two-Tailed P-value (ttest)" + }, { + "name": "Control Mean", + "type": "float", + "friendly_name": "Control Mean" + }, { + "name": "Experiment Mean - Control Mean", + "type": "float", + "friendly_name": "Experiment Mean - Control Mean" + }, { + "name": "Percent Difference in Means", + "type": "float", + "friendly_name": "Percent Difference in Means" + }, { + "name": "Significance", + "type": "string", + "friendly_name": "Significance" + }] diff --git a/stmoab/dashboard_instances/__init__.py b/stmoab/dashboard_instances/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/stmoab/dashboard_instances/ios.py b/stmoab/dashboard_instances/ios.py new file mode 100644 index 0000000..c5d87d8 --- /dev/null +++ b/stmoab/dashboard_instances/ios.py @@ -0,0 +1,26 @@ +import os + +from redash_client.client import RedashClient + +from stmoab.constants import RetentionType +from stmoab.SummaryDashboard import SummaryDashboard + + +if __name__ == '__main__': + api_key = os.environ["REDASH_API_KEY"] + redash_client = RedashClient(api_key) + + dash = SummaryDashboard( + redash_client, + "Firefox iOS: Metrics Summary", + "activity_stream_mobile_events_daily", + "02/17/2017" + ) + + dash._events_table = "activity_stream_mobile_stats_daily" + dash.add_mau_dau() + dash._events_table = "activity_stream_mobile_events_daily" + dash.add_retention_graph(RetentionType.DAILY) + dash.add_events_weekly(event_column="event") + dash.update_refresh_schedule(3600) + #dash.remove_all_graphs() diff --git a/stmoab/dashboard_instances/lambda.py b/stmoab/dashboard_instances/lambda.py new file mode 100644 index 0000000..f330f5d --- /dev/null +++ b/stmoab/dashboard_instances/lambda.py @@ -0,0 +1,39 @@ +import os + +from redash_client.client import RedashClient + +from stmoab.utils import read_experiment_definition, format_date, is_old_date +from stmoab.ActivityStreamExperimentDashboard import ( + ActivityStreamExperimentDashboard) + +DIRECTORY_NAME = "experiments/json_definitions" +URL = ( + "https://experimenter.dev.mozaws.net/api/v1/" + "activity-stream/experiments.json?format=json") + + +def handler(json_input, context): + api_key = os.environ["REDASH_API_KEY"] + redash_client = RedashClient(api_key) + + experiments = read_experiment_definition(URL) + for experiment in experiments: + end_date = None + if "end_date" in experiment and experiment["end_date"] is not None: + if is_old_date(experiment["end_date"]): + continue + + end_date = format_date(experiment["end_date"]) + + dash = ActivityStreamExperimentDashboard( + redash_client, + experiment["name"], + experiment["slug"], + experiment["addon_versions"], + format_date(experiment["start_date"]), + end_date, + ) + + dash.add_graph_templates("AS Template:") + dash.add_ttable("TTests Template:") + dash.update_refresh_schedule(43200) diff --git a/stmoab/dashboard_instances/sample.py b/stmoab/dashboard_instances/sample.py new file mode 100644 index 0000000..ec0712a --- /dev/null +++ b/stmoab/dashboard_instances/sample.py @@ -0,0 +1,222 @@ +import os + +from redash_client.client import RedashClient + +from stmoab.StatisticalDashboard import StatisticalDashboard + +''' + "Deduped Combined Frecency", + "exp-006-deduped-combined-frecency", + ['1.2.0', '1.3.0', '1.4.0', '1.4.1'], + "01/18/17" +''' +''' + "Original Newtab Sites", + "exp-008-original-newtab-sites", + ['1.3.0', '1.4.0', '1.4.1'], + "02/02/17" +''' +''' + "Locally Fetch Metadata", + "exp-007-locally-fetch-metadata", + ['1.3.0', '1.4.0'], + "02/02/17" +''' +''' + "Locally Fetch Metadata", + "exp-010-locally-fetch-metadata", + ['1.4.1', '1.5.0', '1.6.0'], + "02/15/17" +''' +''' + "Screenshots", + "exp-009-screenshots", + ['1.5.0', '1.6.0'], + "02/23/17" +''' +''' + "Async Screenshots", + "exp-012-screenshotsasync", + ['1.7.0'], + "03/20/17" +''' +''' + "Bookmark Screenshots", + "exp-013-bookmark-screenshots", + ['1.8.0'], + "04/06/17" +''' +''' + "Metadata Long Cache", + "exp-015-metadatalongcache", + ['1.8.0', '1.9.0'], + "04/06/17" +''' +''' + "Screenshots Long Cache", + "exp-014-screenshotsasync", + ['1.8.0'], + "04/06/17" +''' +''' + "Pocket", + "exp-021-pocketstories", + ['1.10.1'], + "05/02/17" +''' +''' + "Metadata No Service", + "exp-018-metadata-no-service", + ['1.10.0', '1.10.1'], + "05/01/17" +''' +''' + "Metadata Local Refresh", + "exp-019-metadata-local-refresh", + ['1.10.0', '1.10.1'], + "05/01/17" +''' +''' + "Activity Stream System Addon Experiment", + "v1 Nightly Pocket User Personalization", + "as-nightly-personalization-1400890", + start_date="2017-09-27", +''' +''' + "Activity Stream System Addon Experiment", + "v2 Beta", + "pref-flip-activity-stream-beta-1389722-v2", + start_date="2017-08-30", + end_date="2017-09-09" +''' +''' + "Activity Stream System Addon Experiment", + "v1 Release", + "pref-flip-activity-stream-56-release-bug-1405332", + start_date="2017-10-05", + end_date="2017-10-20" +''' +''' + "Activity Stream System Addon Experiment", + "v1 About Home", + "pref-flip-activity-stream-56-beta-about-home-bug-1405334", + start_date="2017-10-05", +''' +''' + "Activity Stream System Addon Experiment", + "v2 Pocket Personalization", + "pref-flip-activity-stream-58-nightly-pocket-personalization-bug-1400890", + start_date="2017-10-06", +''' +''' + "Activity Stream System Addon Experiment", + "Beta Revisited", + "pref-flip-activity-stream-beta-1389722-v2", + start_date="2017-08-30", + end_date="2017-09-08" +''' +''' + "Activity Stream System Addon Experiment", + "Release enUS", + "pref-flip-activity-stream-56-release-bug-1405332", + start_date="2017-10-05", + end_date="2017-10-20" +''' +''' + "Activity Stream System Addon Experiment", + "Beta Post Bug Fix", + "pref-flip-activity-stream-beta-1389722-v2-round2", + start_date="2017-09-19", + end_date="2017-09-24" +''' +''' + "Activity Stream System Addon Experiment", + "Beta All Pocket Geos Post Bug Fix", + "pref-flip-activity-stream-beta-1389722-v2-2-round2", + start_date="2017-09-20", + end_date="2017-09-28" +''' +''' + "Activity Stream System Addon Experiment", + "Beta 57 Study", + "pref-flip-activity-stream-57-beta-enabled-bug-1410535", + start_date="2017-10-25", +''' +''' + "Activity Stream System Addon Experiment", + "Beta 57 Two Rows of Topsites", + "pref-flip-activity-stream-57-beta-two-rows-bug-1411695", + start_date=" ", +''' +''' + "Activity Stream System Addon Experiment", + "Beta 57 Two Rows v2", + "pref-flip-activity-stream-57-beta-two-rows-user-pref-bug-1411695", + start_date="2017-10-31", +''' +''' + "Activity Stream System Addon Experiment", + "v3 Pocket Personalization", + "pref-flip-activity-stream-58-nightly-optimized-pocket-personalization-bug-1410483", + start_date="2017-10-31", +''' +''' + "Activity Stream System Addon Experiment", + "57 Release", + "pref-flip-activity-stream-57-release-enabled-existing-users-bug-1415966", + start_date="2017-11-14" +''' +''' + "Activity Stream System Addon Experiment", + "57 Release New Users", + "pref-flip-activity-stream-57-release-enabled-new-users-bug-1415967", + start_date="2017-11-14" +''' + +if __name__ == '__main__': + api_key = os.environ["REDASH_API_KEY"] + aws_access_key = os.environ['AWS_ACCESS_KEY'] + aws_secret_key = os.environ['AWS_SECRET_KEY'] + s3_bucket_id_stats = os.environ['S3_BUCKET_ID_STATS'] + + redash_client = RedashClient(api_key) + + PING_CENTRE_TTABLE = "Statistical Analysis - Ping Centre" + UT_TTABLE = "Statistical Analysis - UT" + UT_HOURLY_TTABLE = "Statistical Analysis (Per Active Hour) - UT" + + dash = StatisticalDashboard( + redash_client, + "Activity Stream System Addon Experiment", + "57 Release", + "pref-flip-activity-stream-57-release-enabled-existing-users-bug-1415966", + start_date="2017-11-14" + ) + + # Average Events per Day UT + #dash.add_graph_templates("AS Template UT One:", dash.UT_EVENTS) + #dash.add_graph_templates("AS Template UT Mapped Two:", dash.MAPPED_UT_EVENTS) + + # Average Events per Active Hour UT + dash.add_graph_templates("AS Template UT Three:", dash.UT_HOURLY_EVENTS) + dash.add_graph_templates("AS Template UT Mapped Four:", dash.MAPPED_UT_EVENTS) + + # Average Events per Day Ping Centre + #dash.add_graph_templates("ASSA Template:", dash.DEFAULT_EVENTS) + + #dash.add_ttable_data("TTests Template UT Four:", UT_TTABLE, dash.UT_EVENTS) + #dash.add_ttable_data("TTests Template Mapped UT Six:", UT_TTABLE, dash.MAPPED_UT_EVENTS) + + #dash.add_ttable(UT_TTABLE) + + # Events per Hour TTests + #dash.add_ttable_data("TTests Template Per Hour UT Five:", UT_HOURLY_TTABLE, dash.UT_HOURLY_EVENTS) + #dash.add_ttable_data("TTests Template Per Hour Mapped UT:", UT_HOURLY_TTABLE, dash.MAPPED_UT_EVENTS) + + #dash.add_ttable(UT_HOURLY_TTABLE) + + #dash.add_ttable_data("TTests Template:", PING_CENTRE_TTABLE, dash.DEFAULT_EVENTS) + #dash.add_ttable(PING_CENTRE_TTABLE) + + #dash.update_refresh_schedule(86400) + #dash.remove_all_graphs() diff --git a/stmoab/dashboard_instances/testpilot.py b/stmoab/dashboard_instances/testpilot.py new file mode 100644 index 0000000..c033fee --- /dev/null +++ b/stmoab/dashboard_instances/testpilot.py @@ -0,0 +1,35 @@ +import os + +from redash_client.client import RedashClient + +from stmoab.constants import RetentionType +from stmoab.SummaryDashboard import SummaryDashboard + + +if __name__ == '__main__': + api_key = os.environ["REDASH_API_KEY"] + redash_client = RedashClient(api_key) + + test_pilot_experiments = { + "Summary": "@testpilot-addon", + "Min Vid": "@min-vid", + "Cliqz": "testpilot@cliqz.com", + "Pulse": "pulse@mozilla.com", + "Snooze Tabs": "snoozetabs@mozilla.com" + } + + for exp_name in test_pilot_experiments: + where_clause = "AND addon_id = '{0}'".format(test_pilot_experiments[exp_name]) + + dash = SummaryDashboard( + redash_client, + "Test Pilot: {0}".format(exp_name), + "ping_centre_test_pilot" + "02/13/2017" + ) + + dash.add_mau_dau(where_clause) + dash.add_retention_graph(RetentionType.WEEKLY, where_clause) + dash.add_events_weekly(where_clause) + dash.update_refresh_schedule(3600) + #dash.remove_all_graphs() diff --git a/stmoab/templates.py b/stmoab/templates.py new file mode 100644 index 0000000..35fc17c --- /dev/null +++ b/stmoab/templates.py @@ -0,0 +1,123 @@ +def retention(events_table, retention_type, start_date, where_clause): + return """ + WITH population AS + (SELECT client_id AS unique_id, DATE_TRUNC('{1}', date) AS cohort_date, COUNT(*) + FROM {0} + WHERE 1 = 1 + {3} + GROUP BY 1, 2), + + activity AS + (SELECT DATE_TRUNC('{1}', date) AS activity_date, client_id AS unique_id, cohort_date + FROM {0} + JOIN population + ON population.unique_id = client_id + WHERE DATE_TRUNC('{1}', date) >= (CURRENT_DATE - INTERVAL '91 days') + AND DATE_TRUNC('{1}', cohort_date) >= (CURRENT_DATE - INTERVAL '91 days') + {3}), + + population_agg AS + (SELECT DATE_TRUNC('{1}', date) AS cohort_date, COUNT(DISTINCT client_id) AS total + FROM {0} + WHERE 1 = 1 + {3} + GROUP BY 1) + + SELECT * FROM + (SELECT date, day as week_number, value, total, MAX(day) over (PARTITION BY date) AS max_week_num + FROM + (SELECT activity.cohort_date AS date, + DATE_DIFF('{1}', activity.cohort_date, activity_date) AS day, + total, + COUNT(DISTINCT unique_id) AS value + FROM activity + JOIN population_agg + ON activity.cohort_date = population_agg.cohort_date + WHERE activity_date >= activity.cohort_date + AND activity.cohort_date > '{2}' + GROUP BY 1, 2, 3)) + WHERE week_number < max_week_num + ORDER BY date, week_number""".format(events_table, retention_type, start_date, where_clause), [] + +def all_events_weekly(events_table, start_date, where_clause, event_column): + return """ + WITH weekly_events AS + (SELECT DATE_TRUNC('week', date) AS week, COUNT(*) + FROM {0} + WHERE DATE_TRUNC('week', date) >= '{1}' + {2} + GROUP BY 1), + + event_counts AS + (SELECT week, {3}, count FROM + (SELECT *, RANK() over (PARTITION BY week ORDER BY count) AS rank FROM + (SELECT DATE_TRUNC('week', date) AS week, {3}, COUNT(*) + FROM {0} + WHERE DATE_TRUNC('week', date) >= '{1}' + {2} + GROUP BY 1, 2 + ORDER BY 1, 2)) + WHERE rank <= 20) + + SELECT weekly_events.week, event_counts.{3}, event_counts.count / weekly_events.count::FLOAT * 100 AS rate + FROM weekly_events + LEFT JOIN event_counts + ON weekly_events.week = event_counts.week""".format(events_table, start_date, where_clause, event_column), ["week", "rate", event_column] + +def active_users(events_table, start_date, where_clause=""): + return """ + WITH weekly AS + (SELECT day, COUNT(DISTINCT client_id) AS dist_clients + FROM + (SELECT DISTINCT date + FROM {0} + WHERE date >= '{1}' + {2} + ORDER BY date) AS g(day) + LEFT JOIN {0} + ON {0}.date BETWEEN g.day - 7 AND g.day + AND {0}.date >= '{1}' + {2} + GROUP BY day + ORDER BY day), + + monthly AS + (SELECT day, count(DISTINCT client_id) AS dist_clients + FROM + (SELECT DISTINCT date + FROM {0} + WHERE date >= '{1}' + {2} + ORDER BY date) AS g(day) + LEFT JOIN {0} + ON {0}.date BETWEEN g.day - 28 AND g.day + AND {0}.date >= '{1}' + {2} + GROUP BY day + ORDER BY day), + + daily AS + (SELECT date, COUNT(DISTINCT a.client_id) AS dau + FROM {0} AS a WHERE date >= '{1}' {2} GROUP BY date), + + smoothed_daily AS + (SELECT date as day, + dau, + AVG(dau) OVER(order by date ROWS BETWEEN 7 PRECEDING AND 0 FOLLOWING) as dist_clients + FROM daily + ORDER BY day desc) + + SELECT + date, + d.dist_clients as dau, + w.dist_clients as wau, + m.dist_clients as mau, + (d.dist_clients::FLOAT / w.dist_clients) * 100.0 as weekly_engagement, + (d.dist_clients::FLOAT / m.dist_clients) * 100.0 as monthly_engagement + FROM {0} a + JOIN smoothed_daily d on d.day = date + JOIN weekly w on w.day = date + JOIN monthly m on m.day = date + WHERE date < current_date and date >= '2016-05-10' + GROUP BY date, d.dist_clients, wau, mau + ORDER BY date, dau, wau, mau""".format(events_table, start_date, where_clause), ["date", "dau", "wau", "mau", "weekly_engagement", "monthly_engagement"] diff --git a/stmoab/tests/__init__.py b/stmoab/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/stmoab/tests/base.py b/stmoab/tests/base.py new file mode 100644 index 0000000..8c067be --- /dev/null +++ b/stmoab/tests/base.py @@ -0,0 +1,76 @@ +import mock +import json +import unittest + +from redash_client.client import RedashClient +from stmoab.SummaryDashboard import SummaryDashboard + + +class AppTest(unittest.TestCase): + + def post_server(self, url, data): + EXPECTED_QUERY_ID = "query_id123" + EXPECTED_QUERY_STRING = "select some_stuff from table" + QUERY_ID_RESPONSE = { + "id": EXPECTED_QUERY_ID + } + FORK_RESPONSE = { + "id": EXPECTED_QUERY_ID, + "query": EXPECTED_QUERY_STRING, + "data_source_id": 3 + } + + response = self.get_mock_response( + content=json.dumps(QUERY_ID_RESPONSE)) + + if "fork" in url: + response = self.get_mock_response( + content=json.dumps(FORK_RESPONSE)) + + self.server_calls += 1 + return response + + def get_dashboard(self, api_key): + EVENTS_TABLE_NAME = "activity_stream_mobile_events_daily" + START_DATE = "02/17/2017" + DASH_NAME = "Firefox iOS: Metrics Summary" + + self.mock_requests_get.return_value = self.get_mock_response() + self.mock_requests_post.return_value = self.get_mock_response() + + dashboard = SummaryDashboard( + self.redash, + DASH_NAME, + EVENTS_TABLE_NAME, + START_DATE, + ) + return dashboard + + def setUp(self): + API_KEY = "test_key" + + self.redash = RedashClient(API_KEY) + + mock_requests_post_patcher = mock.patch( + "redash_client.client.requests.post") + self.mock_requests_post = mock_requests_post_patcher.start() + self.addCleanup(mock_requests_post_patcher.stop) + + mock_requests_get_patcher = mock.patch( + "redash_client.client.requests.get") + self.mock_requests_get = mock_requests_get_patcher.start() + self.addCleanup(mock_requests_get_patcher.stop) + + mock_requests_delete_patcher = mock.patch( + "redash_client.client.requests.delete") + self.mock_requests_delete = mock_requests_delete_patcher.start() + self.addCleanup(mock_requests_delete_patcher.stop) + + self.dash = self.get_dashboard(API_KEY) + + def get_mock_response(self, status=200, content='{}'): + mock_response = mock.Mock() + mock_response.status_code = status + mock_response.content = content + + return mock_response diff --git a/stmoab/tests/test_activity_stream_experiment_dashboard.py b/stmoab/tests/test_activity_stream_experiment_dashboard.py new file mode 100644 index 0000000..982ab59 --- /dev/null +++ b/stmoab/tests/test_activity_stream_experiment_dashboard.py @@ -0,0 +1,116 @@ +import json +import time + +from stmoab.tests.base import AppTest +from stmoab.ActivityStreamExperimentDashboard import ( + ActivityStreamExperimentDashboard) + + +class TestActivityStreamExperimentDashboard(AppTest): + + START_DATE = "2017-17-02" + END_DATE = time.strftime("%Y-%m-%d") + DASH_PROJECT = "Activity Stream Experiment" + DASH_NAME = "Screenshots Long Cache" + EXPERIMENT_ID = "exp-014-screenshotsasync" + + def get_dashboard(self, api_key): + self.mock_requests_get.return_value = self.get_mock_response() + self.mock_requests_post.return_value = self.get_mock_response() + + dashboard = ActivityStreamExperimentDashboard( + self.redash, + self.DASH_PROJECT, + self.DASH_NAME, + self.EXPERIMENT_ID, + self.START_DATE, + ) + return dashboard + + def test_correct_values_at_initialization(self): + self.assertEqual(self.dash._experiment_id, self.EXPERIMENT_ID) + self.assertEqual( + self.dash._dash_name, + "{project}: {dash}".format( + project=self.DASH_PROJECT, dash=self.DASH_NAME)) + self.assertEqual(self.dash._start_date, self.START_DATE) + self.assertEqual(self.dash._end_date, self.END_DATE) + + # 2 posts to create the dashboard and make it public + self.assertEqual(self.mock_requests_post.call_count, 2) + self.assertEqual(self.mock_requests_get.call_count, 1) + self.assertEqual(self.mock_requests_delete.call_count, 0) + + def test_add_templates_makes_correct_calls(self): + self.get_calls = 0 + QUERIES_IN_SEARCH = [{ + "id": 5, + "description": "SomeQuery", + "name": "AS Template: Query Title Event", + "data_source_id": 5 + }, { + "id": 6, + "description": "SomeQuery2", + "name": "AS Template: Query Title", + "data_source_id": 5 + }] + VISUALIZATIONS_FOR_QUERY = { + "visualizations": [ + {"options": {}}, + {"options": {}} + ] + } + WIDGETS_RESPONSE = { + "widgets": [[{ + "id": "the_widget_id", + "visualization": { + "query": { + "id": "some_id", + "name": "Query Title Click" + }, + }, + }]] + } + + def get_server(url): + response = self.get_mock_response() + if self.get_calls == 0: + response = self.get_mock_response( + content=json.dumps(QUERIES_IN_SEARCH)) + elif self.get_calls <= 2: + response = self.get_mock_response( + content=json.dumps(VISUALIZATIONS_FOR_QUERY)) + else: + response = self.get_mock_response( + content=json.dumps(WIDGETS_RESPONSE)) + + self.get_calls += 1 + return response + + self.server_calls = 0 + self.mock_requests_delete.return_value = self.get_mock_response() + self.mock_requests_post.side_effect = self.post_server + self.mock_requests_get.side_effect = get_server + + self.dash.add_graph_templates("Template:") + + # GET calls: + # 1) Create dashboard + # 2) Get dashboard widgets + # 3) Search queries + # 4) Get two existing visualizations + # POST calls: + # 1) Create dashboard + # 2) Search queries + # 3) Fork query + # 4) Update query + # 5) Create visualization + # 6) Append visualization to dashboard + # 7) Repeat 2-6 six times + # 8) Make dashboard public + # DELETE calls: + # One existing graph is removed from dashboard + # and deleted (2 calls) + self.assertEqual(self.mock_requests_post.call_count, 32) + self.assertEqual(self.mock_requests_get.call_count, 5) + self.assertEqual(self.mock_requests_delete.call_count, 2) diff --git a/stmoab/tests/test_statistical_dashboard.py b/stmoab/tests/test_statistical_dashboard.py new file mode 100644 index 0000000..c604119 --- /dev/null +++ b/stmoab/tests/test_statistical_dashboard.py @@ -0,0 +1,401 @@ +import math +import mock +import json +import time +import statistics + +from stmoab.tests.base import AppTest +from stmoab.StatisticalDashboard import ( + StatisticalDashboard) + + +class TestStatisticalDashboard(AppTest): + + START_DATE = "02/17/2017" + END_DATE = time.strftime("%m/%d/%y") + DASH_PROJECT = "Activity Stream Experiment" + DASH_NAME = "Screenshots Long Cache" + EXPERIMENT_ID = "exp-014-screenshotsasync" + AWS_ACCESS_KEY = "access" + AWS_SECRET_KEY = "secret" + AWS_BUCKET_ID = "bucket" + + def get_dashboard(self, api_key): + self.mock_requests_get.return_value = self.get_mock_response() + self.mock_requests_post.return_value = self.get_mock_response() + mock_boto_transfer_patcher = mock.patch( + "stmoab.utils.S3Transfer") + mock_boto_transfer_patcher.start() + self.addCleanup(mock_boto_transfer_patcher.stop) + + dashboard = StatisticalDashboard( + self.redash, + self.AWS_ACCESS_KEY, + self.AWS_SECRET_KEY, + self.AWS_BUCKET_ID, + self.DASH_PROJECT, + self.DASH_NAME, + self.EXPERIMENT_ID, + self.START_DATE, + ) + return dashboard + + def test_pooled_stddev(self): + exp_vals = [1, 2, 3] + control_vals = [4, 6, 8] + EXPECTED_POOLED_STDDEV = math.sqrt(10 / float(4)) + + exp_std = statistics.stdev(exp_vals) + control_std = statistics.stdev(control_vals) + + pooled_stddev = self.dash._compute_pooled_stddev( + control_std, exp_std, control_vals, exp_vals) + + self.assertEqual(pooled_stddev, EXPECTED_POOLED_STDDEV) + + def test_power_and_ttest_negative_results(self): + exp_vals = [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3] + control_vals = [4, 6, 8, 4, 6, 8, 4, 6, 8, 4, 6, 8] + MEAN_DIFFERENCE = -4 + + results = self.dash._power_and_ttest( + control_vals, exp_vals) + + self.assertEqual(results["mean_diff"], MEAN_DIFFERENCE) + self.assertEqual(results["significance"], "Negative") + self.assertTrue(0 <= results["p_val"] <= 0.05) + self.assertTrue(0.5 <= results["power"] <= 1) + + def test_power_and_ttest_positive_results(self): + exp_vals = [4, 6, 8, 4, 6, 8, 4, 6, 8, 4, 6, 8] + control_vals = [1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3] + MEAN_DIFFERENCE = 4 + + results = self.dash._power_and_ttest( + control_vals, exp_vals) + + self.assertEqual(results["mean_diff"], MEAN_DIFFERENCE) + self.assertEqual(results["significance"], "Positive") + self.assertTrue(0 <= results["p_val"] <= 0.05) + self.assertTrue(0.5 <= results["power"] <= 1) + + def test_power_and_ttest_neutral_results(self): + exp_vals = [4, 6, 8, 4, 6, 8, 4, 6, 8, 4, 6, 8] + control_vals = [4, 6, 8, 4, 6, 8, 4, 6, 8, 4, 6, 8] + MEAN_DIFFERENCE = 0 + + results = self.dash._power_and_ttest( + control_vals, exp_vals) + + self.assertEqual(results["mean_diff"], MEAN_DIFFERENCE) + self.assertEqual(results["significance"], "Neutral") + self.assertEqual(results["p_val"], 1) + self.assertTrue(0 <= results["power"] <= 0.1) + + def test_get_ttable_data_for_non_existent_query(self): + QUERY_RESULTS_RESPONSE = {} + + self.mock_requests_post.return_value = self.get_mock_response( + content=json.dumps(QUERY_RESULTS_RESPONSE)) + + ttable_row = self.dash._get_ttable_data_for_query( + "beep", "meep", "boop", 5) + + self.assertEqual(ttable_row, {}) + + def test_ttable_not_made_for_non_matching_graph(self): + BAD_ROW = [] + for i in range(5): + BAD_ROW.append({ + "some_weird_row": "beep", + "count": 5 + }) + + QUERY_RESULTS_RESPONSE = { + "query_result": { + "data": { + "rows": BAD_ROW + } + } + } + + self.mock_requests_post.return_value = self.get_mock_response( + content=json.dumps(QUERY_RESULTS_RESPONSE)) + + ttable_row = self.dash._get_ttable_data_for_query( + "beep", "meep", "count", 5) + + self.assertEqual(len(ttable_row), 0) + + def test_ttable_row_data_is_correct(self): + EXPECTED_LABEL = "beep" + EXPECTED_ROWS = [] + EXPECTED_MEAN_DIFFERENCE = -4 + + for i in range(12): + EXPECTED_ROWS.append({ + "date": 123, + "count": (i % 3) + 1, + "type": "experiment" + }) + EXPECTED_ROWS.append({ + "date": 123, + "count": ((i * 2) % 6) + 4, # 4, 6, 8 + "type": "control" + }) + + QUERY_RESULTS_RESPONSE = { + "query_result": { + "data": { + "rows": EXPECTED_ROWS + } + } + } + + self.mock_requests_post.return_value = self.get_mock_response( + content=json.dumps(QUERY_RESULTS_RESPONSE)) + + ttable_row = self.dash._get_ttable_data_for_query( + EXPECTED_LABEL, "meep", "count", 5) + + self.assertEqual(len(ttable_row), 8) + self.assertEqual(ttable_row["Metric"], EXPECTED_LABEL) + self.assertEqual(ttable_row["Alpha Error"], self.dash.ALPHA_ERROR) + self.assertTrue(0.5 <= ttable_row["Power"] <= 1) + self.assertTrue(0 <= ttable_row["Two-Tailed P-value (ttest)"] <= 0.05) + self.assertEqual( + ttable_row["Experiment Mean - Control Mean"], EXPECTED_MEAN_DIFFERENCE) + + def test_add_ttable_makes_correct_calls(self): + self.get_calls = 0 + self.server_calls = 0 + QUERIES_IN_SEARCH = [{ + "id": 5, + "description": "SomeQuery", + "name": "AS Template: Query Title Event", + "data_source_id": 5, + "query": "SELECT stuff FROM things" + }] + VISUALIZATIONS_FOR_QUERY = { + "visualizations": [ + {"options": {}}, + ] + } + WIDGETS_RESPONSE = { + "widgets": [[{ + "visualization": { + "query": { + "name": "Some table", + }, + }, + }]] + } + EXPECTED_ROWS = [{ + "count": 123, + "type": "experiment", + }, { + "count": 789, + "type": "control", + }, { + "count": 1233, + "type": "experiment", + }, { + "count": 7819, + "type": "control", + }] + + QUERY_RESULTS_RESPONSE = { + "query_result": { + "data": { + "rows": EXPECTED_ROWS + } + } + } + + def get_server(url): + if self.get_calls == 0: + response = self.get_mock_response( + content=json.dumps(QUERIES_IN_SEARCH)) + elif self.get_calls <= 2 and self.get_calls > 0: + response = self.get_mock_response( + content=json.dumps(VISUALIZATIONS_FOR_QUERY)) + else: + response = self.get_mock_response( + content=json.dumps(WIDGETS_RESPONSE)) + + self.get_calls += 1 + return response + + self.mock_requests_get.side_effect = get_server + self.mock_requests_post.return_value = self.get_mock_response( + content=json.dumps(QUERY_RESULTS_RESPONSE)) + + TABLE_NAME = "Table Name" + self.dash.add_ttable_data( + "Template:", TABLE_NAME, self.dash.DEFAULT_EVENTS) + self.dash.add_ttable(TABLE_NAME) + + # GET calls: + # 1) Create dashboard + # 2) Get dashboard widgets (2 times) + # 3) Search for templates + # 4) Get template + # POST calls: + # 1) Create dashboard + # 2) Update queries (5 events * 2 requests each: update + refresh) + # 3) Get Ttable query results for 5 rows + # 4) Create query (doesn't return ID, so no refresh) + # 5) Add query to dashboard + # 6) Make dashboard public + self.assertEqual(self.mock_requests_post.call_count, 19) + self.assertEqual(self.mock_requests_get.call_count, 5) + self.assertEqual(self.mock_requests_delete.call_count, 0) + + def test_ttable_with_no_rows(self): + self.get_calls = 0 + self.server_calls = 0 + QUERIES_IN_SEARCH = [{ + "id": 5, + "description": "SomeQuery", + "name": "AS Template: Query Title Event", + "data_source_id": 5, + "query": "SELECT stuff FROM things" + }] + VISUALIZATIONS_FOR_QUERY = { + "visualizations": [ + {"options": {}}, + {"options": {}} + ] + } + WIDGETS_RESPONSE = { + "widgets": [[{ + "visualization": { + "query": { + "name": "Some Graph", + }, + }, + }]] + } + + def get_server(url): + response = self.get_mock_response() + if self.get_calls == 0: + response = self.get_mock_response( + content=json.dumps(QUERIES_IN_SEARCH)) + elif self.get_calls <= 2 and self.get_calls > 0: + response = self.get_mock_response( + content=json.dumps(VISUALIZATIONS_FOR_QUERY)) + else: + response = self.get_mock_response( + content=json.dumps(WIDGETS_RESPONSE)) + + self.get_calls += 1 + return response + + mock_json_uploader = mock.patch( + ("stmoab.StatisticalDashboard.upload_as_json")) + upload_file_patch = mock_json_uploader.start() + upload_file_patch.return_value = "" + + self.mock_requests_get.side_effect = get_server + self.mock_requests_post.side_effect = self.post_server + + TABLE_NAME = "Table Name" + self.dash.add_ttable_data( + "Template:", TABLE_NAME, self.dash.DEFAULT_EVENTS) + self.dash.add_ttable(TABLE_NAME) + + # GET calls: + # 1) Create dashboard + # 2) Get dashboard widgets (2 times) + # 3) Search for templates + # 4) Get templates (2 calls) + # POST calls: + # 1) Create dashboard + # 2) Update queries (5 events * 2 requests each: update + refresh) + # 3) Get Ttable query results for 5 rows + # 4) Create query (create + refresh) + # 5) Add query to dashboard + # 6) Make dashboard public + self.assertEqual(self.mock_requests_post.call_count, 20) + self.assertEqual(self.mock_requests_get.call_count, 6) + self.assertEqual(self.mock_requests_delete.call_count, 0) + + # The ttable has no rows + args, kwargs = upload_file_patch.call_args + self.assertEqual(len(args[2]["rows"]), 0) + + mock_json_uploader.stop() + + def test_statistical_analysis_graph_exist_deletes_and_creates_new(self): + self.get_calls = 0 + TABLE_NAME = "Table Name" + QUERIES_IN_SEARCH = [{ + "id": 5, + "description": "SomeQuery", + "name": "AS Template: Query Title Event", + "data_source_id": 5, + "query": "SELECT stuff FROM things" + }] + VISUALIZATIONS_FOR_QUERY = { + "visualizations": [ + {"options": {}}, + {"options": {}} + ] + } + WIDGETS_RESPONSE = { + "widgets": [[{ + "id": "123", + "visualization": { + "query": { + "name": TABLE_NAME, + "id": "abc" + }, + }, + }]] + } + + def get_server(url): + response = self.get_mock_response() + if self.get_calls == 0: + response = self.get_mock_response( + content=json.dumps(QUERIES_IN_SEARCH)) + elif self.get_calls <= 2 and self.get_calls > 0: + response = self.get_mock_response( + content=json.dumps(VISUALIZATIONS_FOR_QUERY)) + else: + response = self.get_mock_response( + content=json.dumps(WIDGETS_RESPONSE)) + + self.get_calls += 1 + return response + + mock_json_uploader = mock.patch( + ("stmoab.StatisticalDashboard.upload_as_json")) + upload_file_patch = mock_json_uploader.start() + upload_file_patch.return_value = "" + + self.mock_requests_delete.return_value = self.get_mock_response() + self.mock_requests_get.side_effect = get_server + + self.dash.add_ttable_data( + "Template:", TABLE_NAME) + self.dash.add_ttable(TABLE_NAME) + + # GET calls: + # 1) Create dashboard + # 2) Get dashboard widgets (2 times) + # 3) Search for templates + # 4) Get template + # POST calls: + # 1) Create dashboard + # 2) Update queries (5 events * 2 requests each: update + refresh) + # 3) Get Ttable query results for 5 rows + # 4) Create query (doesn't return ID, so no refresh) + # 5) Add query to dashboard + # 6) Make dashboard public + self.assertEqual(self.mock_requests_post.call_count, 19) + self.assertEqual(self.mock_requests_get.call_count, 5) + self.assertEqual(self.mock_requests_delete.call_count, 2) + + mock_json_uploader.stop() diff --git a/stmoab/tests/test_summary_dashboard.py b/stmoab/tests/test_summary_dashboard.py new file mode 100644 index 0000000..3c7adad --- /dev/null +++ b/stmoab/tests/test_summary_dashboard.py @@ -0,0 +1,295 @@ +import json + +from stmoab.constants import RetentionType +from stmoab.tests.base import AppTest +from stmoab.templates import active_users + + +class TestSummaryDashboard(AppTest): + + def test_update_refresh_schedule_success(self): + EXPECTED_QUERY_ID = "query_id123" + WIDGETS_RESPONSE = { + "widgets": [[{ + "visualization": { + "query": { + "nope": "fail" + } + }}], + [{"visualization": { + "query": { + "id": EXPECTED_QUERY_ID + } + }}, + {"visualization": { + "query": { + "muhahaha": "you can't catch me!" + } + }} + ]] + } + + self.mock_requests_get.return_value = self.get_mock_response( + content=json.dumps(WIDGETS_RESPONSE)) + self.mock_requests_post.return_value = self.get_mock_response() + + self.dash.update_refresh_schedule(86400) + + # 2 posts to create the dashboard and make it public + # 1 post for refreshing the one valid visualization ID + # 2 gets for creating the dashboard and looking up chart names + self.assertEqual(self.mock_requests_post.call_count, 3) + self.assertEqual(self.mock_requests_get.call_count, 2) + self.assertEqual(self.mock_requests_delete.call_count, 0) + + def test_get_chart_data_success(self): + EXPECTED_QUERY_NAME = "query_name123" + EXPECTED_QUERY_NAME2 = "query_name456" + EXPECTED_QUERY_NAME3 = "query_name789" + WIDGETS_RESPONSE = { + "widgets": [[{ + "visualization": { + "query": { + "name": EXPECTED_QUERY_NAME, + "id": 1 + } + }, + "id": 4}], + [{ + "visualization": { + "query": { + "not_a_name": EXPECTED_QUERY_NAME2, + "id": 2 + } + }, + "id": 5 + }, { + "visualization": { + "query": { + "name": EXPECTED_QUERY_NAME3, + "id": 3 + } + }, + "id": 6 + } + ]] + } + EXPECTED_NAMES = [EXPECTED_QUERY_NAME, EXPECTED_QUERY_NAME3] + EXPECTED_QUERY_IDS = [1, 3] + EXPECTED_WIDGET_IDS = [4, 6] + + self.mock_requests_get.return_value = self.get_mock_response( + content=json.dumps(WIDGETS_RESPONSE)) + + data_dict = self.dash.get_query_ids_and_names() + + self.assertEqual(len(data_dict), 2) + for name in data_dict: + self.assertEqual(len(data_dict[name]), 3) + self.assertTrue(name in EXPECTED_NAMES) + self.assertTrue(data_dict[name]["query_id"] in EXPECTED_QUERY_IDS) + self.assertTrue(data_dict[name]["widget_id"] in EXPECTED_WIDGET_IDS) + + def test_remove_all_graphs_success(self): + EXPECTED_QUERY_ID = "query_id123" + EXPECTED_QUERY_ID2 = "query_id456" + EXPECTED_QUERY_ID3 = "query_id789" + WIDGETS_RESPONSE = { + "widgets": [[{ + "id": EXPECTED_QUERY_ID, + "visualization": { + "query": { + "id": EXPECTED_QUERY_ID, + "name": "A" + } + }}], [{ + "id": EXPECTED_QUERY_ID2, + "visualization": { + "query": { + "id": EXPECTED_QUERY_ID2, + "name": "B" + } + } + }, { + "id": EXPECTED_QUERY_ID3, + "visualization": { + "query": { + "id": EXPECTED_QUERY_ID3, + "name": "C" + } + } + } + ]] + } + + self.mock_requests_delete.return_value = self.get_mock_response() + self.mock_requests_get.return_value = self.get_mock_response( + content=json.dumps(WIDGETS_RESPONSE)) + + self.dash.remove_all_graphs() + + # 2 posts to create the dashboard and make it public + # 2 gets for creating the dashboard and looking up chart names + self.assertEqual(self.mock_requests_post.call_count, 2) + self.assertEqual(self.mock_requests_get.call_count, 2) + self.assertEqual(self.mock_requests_delete.call_count, 6) + + def test_mau_dau_column_mapping_returns_correct_mappings(self): + EXPECTED_MAU_DAU_MAPPING = { + "date": "x", + "dau": "y", + "wau": "y", + "mau": "y", + } + EXPECTED_ENGAGEMENT_RATIO_MAPPING = { + "date": "x", + "weekly_engagement": "y", + "monthly_engagement": "y", + } + + query_string, fields = active_users( + self.dash._events_table, self.dash._start_date) + mau_mapping, er_mapping = self.dash._get_mau_dau_column_mappings(fields) + + self.assertEqual(mau_mapping, EXPECTED_MAU_DAU_MAPPING) + self.assertEqual(er_mapping, EXPECTED_ENGAGEMENT_RATIO_MAPPING) + + def test_mau_dau_graphs_exist_makes_no_request(self): + WIDGETS_RESPONSE = { + "widgets": [[{ + "visualization": { + "query": { + "name": self.dash.MAU_DAU_TITLE, + }, + }, + }]] + } + + self.mock_requests_get.return_value = self.get_mock_response( + content=json.dumps(WIDGETS_RESPONSE)) + + self.dash.add_mau_dau() + + # 2 posts to create the dashboard and make it public + # 2 gets for creating the dashboard and looking up chart names + self.assertEqual(self.mock_requests_post.call_count, 2) + self.assertEqual(self.mock_requests_get.call_count, 2) + self.assertEqual(self.mock_requests_delete.call_count, 0) + + def test_mau_dau_graphs_make_expected_calls(self): + self.server_calls = 0 + + self.mock_requests_get.return_value = self.get_mock_response() + self.mock_requests_post.side_effect = self.post_server + + self.dash.add_mau_dau() + + # GET calls: + # 1) Create dashboard + # 2) Get dashboard widgets + # 3) Get first table ID + # 4) Get second table ID + # POST calls: + # 1) Create dashboard + # 2) Create first query + # 3) Refresh first query + # 4) Create second query + # 5) Refresh second query + # 6) Create first visualization + # 7) Append first visualization to dashboard + # 8) Create second visualization + # 9) Append second visualization to dashboard + # 10) Make dashboard public + self.assertEqual(self.mock_requests_post.call_count, 10) + self.assertEqual(self.mock_requests_get.call_count, 4) + self.assertEqual(self.mock_requests_delete.call_count, 0) + + def test_retention_graphs_exist_makes_no_request(self): + WIDGETS_RESPONSE = { + "widgets": [[{ + "visualization": { + "query": { + "name": self.dash.WEEKLY_RETENTION_TITLE, + }, + }, + }]] + } + + self.mock_requests_get.return_value = self.get_mock_response( + content=json.dumps(WIDGETS_RESPONSE)) + + self.dash.add_retention_graph(RetentionType.WEEKLY) + + # 2 posts to create the dashboard and make it public + # 2 gets for creating the dashboard and looking up chart names + self.assertEqual(self.mock_requests_post.call_count, 2) + self.assertEqual(self.mock_requests_get.call_count, 2) + self.assertEqual(self.mock_requests_delete.call_count, 0) + + def test_retention_graph_makes_expected_calls(self): + self.server_calls = 0 + + self.mock_requests_get.return_value = self.get_mock_response() + self.mock_requests_post.side_effect = self.post_server + + self.dash.add_retention_graph(RetentionType.DAILY) + + # GET calls: + # 1) Create dashboard + # 2) Get dashboard widgets + # 3) Get table ID + # POST calls: + # 1) Create dashboard + # 2) Create query + # 3) Refresh query + # 4) Create visualization + # 5) Append visualization to dashboard + # 6) Make dashboard public + self.assertEqual(self.mock_requests_post.call_count, 6) + self.assertEqual(self.mock_requests_get.call_count, 3) + self.assertEqual(self.mock_requests_delete.call_count, 0) + + def test_weekly_events_graph_exist_makes_no_request(self): + WIDGETS_RESPONSE = { + "widgets": [[{ + "visualization": { + "query": { + "name": self.dash.EVENTS_WEEKLY_TITLE, + }, + }, + }]] + } + + self.mock_requests_get.return_value = self.get_mock_response( + content=json.dumps(WIDGETS_RESPONSE)) + + self.dash.add_events_weekly() + + # 2 posts to create the dashboard and make it public + # 2 gets for creating the dashboard and looking up chart names + self.assertEqual(self.mock_requests_post.call_count, 2) + self.assertEqual(self.mock_requests_get.call_count, 2) + self.assertEqual(self.mock_requests_delete.call_count, 0) + + def test_weekly_events_graph_makes_expected_calls(self): + self.server_calls = 0 + + self.mock_requests_get.return_value = self.get_mock_response() + self.mock_requests_post.side_effect = self.post_server + + self.dash.add_events_weekly() + + # GET calls: + # 1) Create dashboard + # 2) Get dashboard widgets + # 3) Get table ID + # POST calls: + # 1) Create dashboard + # 2) Create query + # 3) Refresh query + # 4) Create visualization + # 5) Append visualization to dashboard + # 6) Make dashboard public + self.assertEqual(self.mock_requests_post.call_count, 6) + self.assertEqual(self.mock_requests_get.call_count, 3) + self.assertEqual(self.mock_requests_delete.call_count, 0) diff --git a/stmoab/tests/test_utils.py b/stmoab/tests/test_utils.py new file mode 100644 index 0000000..957a8b5 --- /dev/null +++ b/stmoab/tests/test_utils.py @@ -0,0 +1,118 @@ +import mock +import json +import tempfile +import calendar +from datetime import datetime, timedelta + +from stmoab.tests.base import AppTest +from stmoab.constants import TTableSchema +from stmoab.utils import ( + upload_as_json, read_experiment_definition, create_boto_transfer, + read_experiment_definition_s3, format_date, is_old_date) + + +class TestUtils(AppTest): + + def test_upload_as_json_return_val(self): + DIRECTORY_NAME = "experiments" + FILENAME = "test_file_name" + ACCESS_KEY = "key" + SECRET_KEY = "secret" + BUCKET_ID = "bucket" + DATA = {"columns": TTableSchema, "rows": []} + + EXPECTED_S3_KEY = "activity-stream/" + DIRECTORY_NAME + "/" + FILENAME + EXPECTED_BASE_URL = "https://analysis-output.telemetry.mozilla.org/" + + mock_boto_transfer_patcher = mock.patch("stmoab.utils.S3Transfer") + mock_boto_transfer_patcher.start() + + transfer = create_boto_transfer(ACCESS_KEY, SECRET_KEY) + query_string = upload_as_json(DIRECTORY_NAME, FILENAME, transfer, BUCKET_ID, DATA) + + self.assertEqual(query_string, EXPECTED_BASE_URL + EXPECTED_S3_KEY) + + mock_boto_transfer_patcher.stop() + + def test_download_experiment_definition_json_non_json_return_val(self): + mock_boto_transfer_patcher = mock.patch("stmoab.utils.s3.get_object") + mock_transfer = mock_boto_transfer_patcher.start() + mock_transfer.return_value = "fail" + + json_result = read_experiment_definition_s3("beep") + + self.assertEqual(json_result, {}) + + mock_boto_transfer_patcher.stop() + + def test_download_experiment_definition_non_json_return_val(self): + mock_boto_transfer_patcher = mock.patch("stmoab.utils.urllib.urlopen") + mock_transfer = mock_boto_transfer_patcher.start() + mock_transfer.return_value = "fail" + + json_result = read_experiment_definition("beep") + + self.assertEqual(json_result, {}) + + mock_boto_transfer_patcher.stop() + + def test_download_experiment_definition_json_return_val(self): + EXPECTED_JSON = json.dumps({"experiment1": "some_value"}) + + download_patcher = mock.patch("stmoab.utils.urllib.urlopen") + mock_download = download_patcher.start() + + # Make a temp file for returning + temp_file = tempfile.mkstemp() + file_handle = open(temp_file[1], "w+") + file_handle.write(EXPECTED_JSON) + file_handle.seek(0) + + mock_download.return_value = file_handle + + json_result = read_experiment_definition("boop") + + self.assertEqual(json_result, json.loads(EXPECTED_JSON)) + + download_patcher.stop() + file_handle.close() + + def test_download_experiment_definition_s3_json_return_val(self): + EXPECTED_JSON = json.dumps({"experiment1": "some_value"}) + + mock_boto_download_patcher = mock.patch("stmoab.utils.s3.get_object") + mock_download = mock_boto_download_patcher.start() + + # Make a temp file for returning + temp_file = tempfile.mkstemp() + file_handle = open(temp_file[1], "w+") + file_handle.write(EXPECTED_JSON) + file_handle.seek(0) + + mock_download.return_value = {"Body": file_handle} + + json_result = read_experiment_definition_s3("boop") + + self.assertEqual(json_result, json.loads(EXPECTED_JSON)) + + mock_boto_download_patcher.stop() + file_handle.close() + + def test_date_format(self): + MS_DATE = 1493671545000.0 + EXPECTED_FORMAT = '05/01/17' + formatted_date = format_date(MS_DATE) + + self.assertEqual(formatted_date, EXPECTED_FORMAT) + + def test_is_old_date(self): + new_datetime = datetime.today() - timedelta(days=1) + + MS_DATE_OLD = 1493671545000.0 + MS_DATE_NEW = calendar.timegm(new_datetime.utctimetuple()) * 1000.0 + + is_old = is_old_date(MS_DATE_OLD) + self.assertEqual(is_old, True) + + is_old = is_old_date(MS_DATE_NEW) + self.assertEqual(is_old, False) diff --git a/stmoab/utils.py b/stmoab/utils.py new file mode 100644 index 0000000..743b9de --- /dev/null +++ b/stmoab/utils.py @@ -0,0 +1,72 @@ +import json +import boto3 +import urllib +from boto3.s3.transfer import S3Transfer + +from datetime import datetime, timedelta + +s3 = boto3.client("s3") + + +def create_boto_transfer(access_key, secret_key): + client = boto3.client( + "s3", + region_name="us-west-2", + aws_access_key_id=access_key, + aws_secret_access_key=secret_key) + transfer = S3Transfer(client) + return transfer + + +def upload_as_json(directory_name, filename, transfer, bucket_id, data): + path = "activity-stream/" + directory_name + "/" + s3_key = path + filename + + json_data = json.dumps(data) + file_path = "/tmp/{filename}".format(filename=filename) + with open(file_path, 'w') as f: + f.write(json_data) + + transfer.upload_file( + file_path, + bucket_id, + s3_key, + extra_args={"ContentType": "application/json"}) + + return "https://analysis-output.telemetry.mozilla.org/" + s3_key + + +def read_experiment_definition(url): + try: + response = urllib.urlopen(url) + return json.loads(response.read()) + except: + return {} + + +def read_experiment_definition_s3(filename): + DIRECTORY_NAME = "experiments/json_definitions" + + path = "activity-stream/" + DIRECTORY_NAME + "/" + s3_key = path + filename + + obj = s3.get_object(Bucket="telemetry-public-analysis-2", Key=s3_key) + + try: + experiments_string = obj["Body"].read() + return json.loads(experiments_string) + except: + return {} + + +def format_date(date): + date_epoch = datetime.fromtimestamp(date / 1000.0) + date = date_epoch.strftime("%m/%d/%y") + return date + + +# A date is considered "old" if it's > 3 days earlier than today. +def is_old_date(date): + three_old = datetime.today() - timedelta(days=3) + input_date_epoch = datetime.fromtimestamp(date / 1000.0) + return input_date_epoch < three_old