Add preliminary analysis

2018-04-21 14:57:20 +02:00 · 2018-04-21 14:57:20 +02:00 · f4ec75f5c2
--- a/analysis/TAARExperimentV2Analysis.ipynb
+++ b/analysis/TAARExperimentV2Analysis.ipynb
--- a/analysis/TAARExperimentV2ETL.ipynb
+++ b/analysis/TAARExperimentV2ETL.ipynb
@ -0,0 +1,325 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from moztelemetry import Dataset\n",
+    "from pyspark.sql import Row\n",
+    "from pyspark.sql.types import BooleanType, LongType\n",
+    "import pandas as pd\n",
+    "import pyspark.sql.functions as F\n",
+    "import datetime as dt\n",
+    "\n",
+    "sc.setLogLevel(\"INFO\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Define util funcs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def utc2date(seconds):\n",
+    "    \"\"\"\n",
+    "    Takes unix time in seconds and returns a string representation\n",
+    "    \"\"\"\n",
+    "    utc = dt.datetime(1970, 1, 1)\n",
+    "    try:\n",
+    "        return dt.datetime.strftime(utc + dt.timedelta(seconds=seconds), format='%Y%m%d')\n",
+    "    except:\n",
+    "        return None\n",
+    "\n",
+    "    \n",
+    "def shield_data(x):\n",
+    "    \"\"\"\n",
+    "    Grabs the data reported by the shield add-on \n",
+    "    \"\"\"\n",
+    "    return x.get(\"payload\", {}).get(\"data\", {}).get(\"attributes\", {})\n",
+    "\n",
+    "\n",
+    "def _cast(col, f):\n",
+    "    if col != 'null':\n",
+    "        try:\n",
+    "            return f(col)\n",
+    "        except:\n",
+    "            pass\n",
+    "    return\n",
+    "\n",
+    "_bool = lambda x: True if x == 'true' else False\n",
+    "\n",
+    "castLong = F.udf(lambda x: _cast(x, long), LongType())\n",
+    "castBool = F.udf(lambda x: _cast(x, _bool), BooleanType())\n",
+    "    \n",
+    "\n",
+    "def collapse_fields(x):\n",
+    "    \"\"\"\n",
+    "    Collapsed nested field names \n",
+    "    and returns a flattened object as a \n",
+    "    PySpark Row to prepare for DataFrame \n",
+    "    conversion\n",
+    "    \"\"\"\n",
+    "    if x is None:\n",
+    "        x = {}\n",
+    "    data = x.get(\"payload\", {}).get(\"data\").get(\"attributes\", {})\n",
+    "    addons= x.get(\"environment\", {}).get(\"addons\", {}).get(\"activeAddons\", {})\n",
+    "    result = Row(\n",
+    "        client_id=x.get(\"clientId\"),\n",
+    "        locale=x.get(\"environment\", {}).get(\"settings\", {}).get(\"locale\"),\n",
+    "        branch=x.get(\"payload\", {}).get(\"branch\"),\n",
+    "        addon_id=data.get(\"addon_id\"),\n",
+    "        clicked_button=data.get(\"clickedButton\"),\n",
+    "        creation_date=x.get(\"creationDate\"),\n",
+    "        ping_type=data.get(\"pingType\"),\n",
+    "        saw_popup=data.get(\"sawPopup\"),\n",
+    "        src=data.get(\"srcURI\"),\n",
+    "        start_time_utc=data.get(\"startTime\"),\n",
+    "        dwell_time=data.get(\"aboutAddonsActiveTabSeconds\"),\n",
+    "        discopane_loaded=data.get(\"discoPaneLoaded\"),\n",
+    "        submission_date_s3=x.get(\"meta\").get(\"submissionDate\"),\n",
+    "        current_addons=[i for i in addons if \\\n",
+    "                        not addons[i].get('isSystem', True) and \\\n",
+    "                        not addons[i].get('foreignInstall', True)]\n",
+    "        )\n",
+    "    return result"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Define study dates in string and unix format"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "20180312\n"
+     ]
+    }
+   ],
+   "source": [
+    "START_DATE_UNIX = 17602 * 24 * 60 * 60\n",
+    "START_DATE_STR  = utc2date(START_DATE_UNIX)\n",
+    "print START_DATE_STR\n",
+    "END_DATE_STR = \"20180418\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Load raw pings from experiment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "code_folding": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "fetching 15639.14247MB in 284387 files...\n"
+     ]
+    }
+   ],
+   "source": [
+    "# load all taar pings from our adjusted start date of 20171008\n",
+    "taarv2_pings = (\n",
+    "    Dataset.from_source(\"telemetry\")\n",
+    "           .where(docType=\"shield-study-addon\")\n",
+    "           .where(submissionDate=lambda x: x >= START_DATE_STR and x <= END_DATE_STR)\n",
+    "           .records(sc)\n",
+    "           .filter(lambda x: x.get(\"payload\", {}).get(\"study_name\") == \"TAARExperimentV2\")\n",
+    "           .filter(lambda x: x.get(\"payload\", {}).get(\"addon_version\") == \"1.0.13\")\n",
+    "           .filter(lambda x: x.get(\"payload\", {}).get(\"testing\") == False)\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Convert pings to a structured spark DataFrame"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "# sampleRatio infers schema from first 0.1% of rows\n",
+    "taarv2_DF = taarv2_pings.map(collapse_fields).toDF(sampleRatio=0.001)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Cast non-string columns to the appropriate type"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bool_cols = [\n",
+    "    'discopane_loaded',\n",
+    "    'clicked_button',\n",
+    "    'saw_popup', \n",
+    "]\n",
+    "\n",
+    "long_cols = [\n",
+    "    'start_time_utc',\n",
+    "    'dwell_time',\n",
+    "]\n",
+    "\n",
+    "for b in bool_cols:\n",
+    "    taarv2_DF = taarv2_DF.withColumn(b, castBool(b))\n",
+    "    \n",
+    "for l in long_cols:\n",
+    "    taarv2_DF = taarv2_DF.withColumn(l, castLong(l))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "root\n",
+      " |-- addon_id: string (nullable = true)\n",
+      " |-- branch: string (nullable = true)\n",
+      " |-- clicked_button: boolean (nullable = true)\n",
+      " |-- client_id: string (nullable = true)\n",
+      " |-- creation_date: string (nullable = true)\n",
+      " |-- current_addons: array (nullable = true)\n",
+      " |    |-- element: string (containsNull = true)\n",
+      " |-- discopane_loaded: boolean (nullable = true)\n",
+      " |-- dwell_time: long (nullable = true)\n",
+      " |-- locale: string (nullable = true)\n",
+      " |-- ping_type: string (nullable = true)\n",
+      " |-- saw_popup: boolean (nullable = true)\n",
+      " |-- src: string (nullable = true)\n",
+      " |-- start_time_utc: long (nullable = true)\n",
+      " |-- submission_date_s3: string (nullable = true)\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "taarv2_DF.printSchema()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Write to S3, partitioning by `branch`, since most subsequent queries will involve aggregating by this field"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "code_folding": [],
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "S3_PATH = 's3://telemetry-test-bucket/bmiroglio/taar-v2/'\n",
+    "\n",
+    "(\n",
+    "taarv2_DF\n",
+    " .repartition(1)\n",
+    " .write\n",
+    " .partitionBy('branch')\n",
+    " .mode(\"overwrite\")\n",
+    " .parquet(S3_PATH)\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "n records: 8762664\n",
+      "n clients: 3491762\n",
+      "[Row(min(submission_date_s3)=u'20180312', max(submission_date_s3)=u'20180417')]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# verify\n",
+    "t = sqlContext.read.parquet(S3_PATH)\n",
+    "\n",
+    "print \"n records:\", t.count()\n",
+    "print \"n clients:\", t.select('client_id').distinct().count()\n",
+    "sd = t.select(F.min(\"submission_date_s3\"), \n",
+    "              F.max('submission_date_s3'))\n",
+    "print sd.collect()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "anaconda-cloud": {},
+  "kernelspec": {
+   "display_name": "Python [conda root]",
+   "language": "python",
+   "name": "conda-root-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
--- a/analysis/TAARV2.Rmd
+++ b/analysis/TAARV2.Rmd
@ -0,0 +1,645 @@
+---
+title: "TAAR V2 Experiment Analysis"
+author: "Ben Miroglio, Martin Lopatka"
+date: "4/18/2018"
+output: 
+  html_document:
+    theme: cosmo
+    toc_float: true
+    code_folding: hide
+---
+<style>
+ul.nav.nav-pills {
+    list-style:none;
+    position:relative;
+    left: 20%;
+}
+
+ul.nav.nav-pills li {
+  width: 20%;
+  text-align: center;
+}
+</style>
+
+# {.tabset}
+
+## Study Details
+
+### About
+
+This report outlines the results of the **2nd** shield experiment that exposes the Telemetry Aware Addon Recommendations (TAAR) service, which serves personalized recommendations to Firefox users via the discovery pane (about:addons). The experiment launched on March 12th, 2018 and ended on April 17th, 2018. 
+
+### Links
+* [Dashboard of TAAR Experiment V1](https://sql.telemetry.mozilla.org/dashboard/taar-experiment_1)
+*  [TAAR Experiment V1 Results](https://docs.google.com/presentation/d/1B6CcyGfLvIiweBgGchWEZU_7QsF0M8w07_2UG1oZW1g/edit#slide=id.g2830b63ec5_0_45)
+*  [TAAR Medium Post](https://medium.com/firefox-context-graph/add-on-recommendations-for-firefox-users-7774cc5a5117)
+
+
+### Experiment Design
+
+The Experiment has three cohorts:
+
+* `control` (no change to about:addons)
+* `linear-taar` (previous iteration of TAAR)
+* `ensemble-taar` (current iteration of TAAR)
+
+
+Users in all cohorts are exposed to a pop-up that "nudges" them to about:addons. When a user lands on the page, we can track the add-ons they install/uninstall/enable/disable, and how long they stayed on the page.
+
+### Questions?
+Questions or requests for these data can be directed to Ben Miroglio (:bmiroglio).
+
+
+
+
+## Results {.tabset .tabset-pills .tabset-fade}
+
+```{r, setup, include=T, warning=F}
+library(data.table)
+library(DT)
+library(gridExtra)
+library(ggplot2)
+
+options(DT.options = list(
+  searching=F,
+  paging=F,
+  info=F)
+)
+
+
+# util functions
+
+
+# Performs a two-sided proportion test 
+# on a dataframe d, containing the 
+# number of successes as a column <c1>
+# and the number of trials as a column <c2>
+prop.test.by.branch <- function(d, c1, c2) {
+  ensem_linear <- d[branch!='control']
+  ensem_control <- d[branch!='linear-taar']
+  
+  tovec <- function(dt) as.matrix(dt)[,1]
+  
+
+  v1 <- tovec(ensem_linear[,c1, with=F])
+  v2 <- tovec(ensem_linear[,c2, with=F])
+  v3 <- tovec(ensem_control[,c1, with=F])
+  v4 <- tovec(ensem_control[,c2, with=F])
+  
+  tel <- prop.test(v1, v2)
+  tec <- prop.test(v3, v4)
+  
+  tel.low <- tel$conf.int[1]
+  tel.upper <- tel$conf.int[2]
+  
+  tec.low <- tec$conf.int[1]
+  tec.upper <- tec$conf.int[2]
+  
+  return(list(el.low=tel.low,
+              el.high=tel.upper,
+              ec.low=tec.low,
+              ec.high=tec.upper,
+              el.p=tel$p.val,
+              ec.p=tec$p.val,
+              el.true=(v1/v2)[1] - (v1/v2)[2],
+              ec.true=(v3/v4)[1] - (v3/v4)[2]))
+  
+  
+}
+
+# unpacks the results of a proportion test
+# and arranges the effect, CIs, and p-value
+# into a formatted table.
+spec_results <- function(spec, group, p) {
+  r <- data.table(names(p), unlist(p))
+  s <- data.table(t(r[grepl(spec, V1)]))
+  colnames(s) <- as.character(s[1,])
+  s <- s[-1,]
+  r <- function(x, n=6) round(as.numeric(x), n)
+  s$Test <- group
+  colnames(s) <- c('lower95', 'upper95', 'Pvalue', 'Effect', 'Test')
+  return(s[,.(Test, Effect=r(Effect), 
+              lower95=r(lower95),
+              upper95=r(upper95),
+              Pvalue=r(Pvalue))])
+}
+
+# Performs a proportion test
+# for ensemble vs. linear 
+# and ensemble vs. control
+# and organizes results into a table
+get_results_table <- function(p) {
+  funion(spec_results('ec', "Ensemble vs. Control", p),
+      spec_results('el', "Ensemble vs. Linear", p))
+}
+
+format_results <- function(d, cap, extra=c()) {
+  # Round floating point numbers
+  d$prob_at_least_one_install <- round(d$prob_at_least_one_install, 4)
+  d$n_installs_per_client <- round(d$n_installs_per_client, 4)
+  
+  # prettify column names
+  dis <- d[order(src, branch)]
+  if (length(extra) > 0) {
+    if (extra[1] == 'is_en_US') {
+      extra <- c("en-US")
+    }
+  }
+  cols <- c('Branch', extra, 'Clients that Installed', 'Total Installs', 
+            'Total Clients', 'Probability of Install', 
+            'Installs per Client', "Install Source")
+  colnames(dis) <- cols
+  dis
+}
+
+
+
+
+# pull files living in s3 
+# and load into R
+PULL <- T
+files <- c('ctr', 'ctr-en-us',  "taar-installs", 'taar-baseline-installs', 'taar-en-us-installs',
+          'taar-locale-installs', 'taar-visits', 'taar-en-us-visits', 'taar-locale-visits',
+          'daily_installs', 'enrollment', 'addon-retention', 'addon-en-us-retention',
+          'addon-locale-retention')
+head_cmd <- 'aws s3 cp s3://telemetry-test-bucket/bmiroglio/'
+
+for (x in files) {
+  csv <- paste(x, '.csv', sep='')
+  cmd <- paste(paste(head_cmd, csv, sep=''), '.')
+  cat(cmd)
+  #if(PULL) system(cmd)
+  assign(gsub('-', '_', x),  fread(csv))
+}
+
+
+```
+
+### Overall
+
+
+---
+
+#### Installs
+
+```{r, warning=F}
+datatable(format_results(taar_installs),
+               cap="Installation Statistics per Branch, Install Source")
+```
+
+
+
+
+
+```{R}
+disco = taar_installs[src == 'about:addons']
+disco_results <- get_results_table(p = prop.test.by.branch(disco, 
+                                    'distinct_clients_that_installed',
+                                    'total_distinct_clients'))[,c("Install Source"):=("about:addons")]
+
+amo = taar_installs[src == 'AMO']
+amo_results <- get_results_table(p = prop.test.by.branch(amo, 
+                                  'distinct_clients_that_installed',
+                                  'total_distinct_clients'))[,c("Install Source"):=("AMO")]
+
+comb_results <- funion(disco_results, amo_results)
+
+setcolorder(comb_results, c("Test", "Install Source",
+                            "Effect", "lower95", "upper95", 
+                            "Pvalue"))
+
+
+
+datatable(comb_results, 
+          caption='Signifiance Test Results for Probability of Install per Branch, Install Source')
+
+```
+
+-----
+
+
+
+<center>
+```{R, fig.width=6, fig.height=4}
+
+
+ggplot(taar_installs) + 
+    geom_col(aes(x=branch,y=prob_at_least_one_install, fill=src),
+             position = 'dodge') +
+    theme_bw() + 
+    labs(title="Probability of Install per Branch, Install Source",
+         y="Probility of Install")
+```
+
+</center>
+
+
+
+
+
+
+```{r, fig.width=10, fig.height=6}
+ggplot(daily_installs[src != 'other']) + 
+  geom_density(aes(count, fill=branch), alpha=.6) + 
+  facet_grid(src ~ .) + 
+  theme_bw() + 
+  labs(title='Installs per Day Distribution',
+       x='Installs per Day')
+
+
+```
+
+
+
+```{R, fig.width=10, fig.height=6}
+daily_installs$submission_date_s3 <- as.Date(as.character(daily_installs$submission_date_s3), 
+                                             '%Y%m%d')
+
+ggplot(daily_installs[src != 'other']) +
+  geom_line(aes(x=submission_date_s3, y=count, color=branch)) +
+  facet_grid(src ~ .) + theme_bw() + 
+  labs(title="Installs over Time", x='Date', y='Count')
+
+daily_cumsum <- (daily_installs[src != 'other']
+                              [order(submission_date_s3)]
+                              [,cumcount:=cumsum(count), 
+                                by=.(branch, src)])
+ggplot(daily_cumsum) +
+  geom_line(aes(x=submission_date_s3, y=cumcount, color=branch)) +
+  facet_grid(src ~ .) + theme_bw() + 
+  labs(title="Cumulative Installs over Time", 
+       x="Date", 
+       y='Cumulative Count')
+
+
+```
+
+
+
+
+
+
+
+
+
+
+
+
+
+------
+
+#### Disopane Visits (exposure to treatment)
+
+```{r}
+taar_visits$prob_visit <- round(as.numeric(taar_visits$prob_visit), 6)
+taar_visits$visits_per_client <- round(as.numeric(taar_visits$visits_per_client), 6)
+dis <- taar_visits[,.(branch, total_visits, total_distinct_clients, visits_per_client)]
+colnames(dis) <- c("Branch", "Total Visits", "Total Clients", "Vistits per Client")
+datatable(dis[order(Branch)], caption="Discopane Visit Statistics per Branch")
+
+
+```
+
+
+------
+
+
+#### Pop-up Click Through Rate
+
+
+```{r}
+
+ctr$prob_click <- round(ctr$prob_click, 4)
+cols <- c("UI", "Clients that clicked", "Total Clients", "Click Through Rate")
+dis <- ctr[,.(branch, n_clicked, n_total, prob_click)]
+colnames(dis) <- cols
+dis$UI <- c("Old UI", "New UI")
+datatable(dis, caption='Popup Click Through Rates by Branch')
+
+```
+
+
+---
+
+#### Add-on Retention
+
+
+
+```{R}
+setcolorder(addon_retention, c('branch', 'n_installed', 'n_total', 
+                               'n_retained', 'addon_retention_rate'))
+dis <- addon_retention[,.(branch, n_retained, n_total, addon_retention_rate)]
+cols <- c("Branch", "Retained", "Total", "Retention Rate")
+colnames(dis) <- cols
+
+datatable(dis, caption = "Add-on Retention Rate by Branch")
+
+datatable(get_results_table(p = prop.test.by.branch(addon_retention, 
+                                  'n_retained',
+                                  'n_total')),
+           caption='Significance Test Results For Add-on Retention Rate')
+
+
+```
+
+<br><br><br>
+<br><br><br>
+<br><br><br>
+
+
+
+### en-US vs. non en-US
+
+----
+
+#### Installs
+
+```{r, warning=F}
+
+format_results(taar_en_us_installs, extra=c("is_en_US"))
+
+disco = taar_en_us_installs[src == 'about:addons']
+amo = taar_en_us_installs[src == 'AMO']
+
+result <- NULL
+for (i in c(T, F)) {
+  disco_results <- get_results_table(p = prop.test.by.branch(disco[is_en_US == i], 
+                      'distinct_clients_that_installed',
+                      'total_distinct_clients'))[,c("Install Source", "en-US"):=list("about:addons", i)]
+  amo_results <- get_results_table(p = prop.test.by.branch(amo[is_en_US == i], 
+                      'distinct_clients_that_installed',
+                      'total_distinct_clients'))[,c("Install Source", "en-US"):=list("AMO", i)]
+  comb_results <- funion(disco_results, amo_results)
+
+  setcolorder(comb_results, c("Test", "Install Source","en-US",
+                             "Effect", "lower95", "upper95", 
+                             "Pvalue"))
+  
+  if (is.null(result)) {
+    result <- comb_results
+  } else {
+    result <- funion(result, comb_results)
+  } 
+}
+
+datatable(result,
+          caption='Signifiance Test Results for Probability of Install per Branch, Install Source, en-US')
+```
+
+
+
+<center>
+
+```{R, fig.width=10, fig.height=6}
+
+taar_en_us_installs$locale <- ifelse(taar_en_us_installs$is_en_US, 'en-US', 'not en-US')
+ggplot(taar_en_us_installs) + 
+    geom_col(aes(x=locale,y=prob_at_least_one_install, fill=branch),
+             position = 'dodge', alpha=.8) +
+    facet_grid(src ~ .) +
+    theme_bw() + 
+    labs(title="Probability of Install per Branch, Locale",
+         y="Probility of Install")
+```
+
+</center>
+
+
+
+
+
+------
+
+#### Disopane Visits (exposure to treatment)
+
+```{r}
+
+taar_en_us_visits$prob_visit <- round(
+  as.numeric(taar_en_us_visits$prob_visit), 6)
+taar_en_us_visits$visits_per_client <- round(
+  as.numeric(taar_en_us_visits$visits_per_client), 6)
+dis <- taar_en_us_visits[,.(branch, is_en_US, total_visits, total_distinct_clients, visits_per_client)]
+colnames(dis) <- c("Branch", "is en-US", "Total Visits", "Total Clients", "Vistits per Client")
+datatable(dis[order(Branch)], caption="Discopane Visit Statistics per Branch")
+
+
+```
+
+
+------
+
+
+#### Pop-up Click Through Rate
+
+
+```{r}
+
+ctr_en_us$prob_click <- round(ctr$prob_click, 4)
+cols <- c("UI", 'is en-US', "Clients that clicked", "Total Clients", "Click Through Rate")
+dis <- ctr_en_us[,.(branch, is_en_US, n_clicked, n_total, prob_click)]
+colnames(dis) <- cols
+dis$UI <- ifelse(dis$UI == 'old', 'Old UI', 'New UI')
+datatable(dis, caption='Popup Click Through Rates by Branch')
+
+```
+
+
+---
+
+#### Add-on Retention
+
+
+
+```{R}
+setcolorder(addon_en_us_retention, c('branch', 'is_en_US', 'n_installed', 'n_total', 
+                               'n_retained', 'addon_retention_rate'))
+dis <- addon_en_us_retention[,.(branch, is_en_US, n_retained, n_total, addon_retention_rate)]
+cols <- c("Branch", "is en-US", "Retained", "Total", "Retention Rate")
+colnames(dis) <- cols
+
+datatable(dis, caption = "Add-on Retention Rate by Branch, en-US locale")
+
+
+result <- NULL
+for (i in c(T, F)) {
+  
+  r <- get_results_table(p = prop.test.by.branch(addon_en_us_retention[is_en_US == i], 
+                                    'n_retained',
+                                    'n_total'))[,c('is en-US'):=(i)]
+  if (is.null(result)) {
+    result <- r
+  } else {
+    result <- funion(result, r)
+  }
+}
+
+setcolorder(result, c("Test", "is en-US", "Effect", "lower95", "upper95", "Pvalue"))
+datatable(result, caption='Significance Test Results For Add-on Retention Rate')
+
+
+```
+
+
+<br><br><br>
+<br><br><br>
+<br><br><br>
+
+
+
+### By Locale
+
+
+----
+
+#### Installs
+
+```{r, warning=F}
+options(DT.options = list(
+  searching=T,
+  paging=T,
+  info=F)
+)
+
+
+format_results(taar_locale_installs[order(total_installs)], 
+               cap="Installation Statistics per Branch, Install Source, Locale",
+               extra=c("locale"))
+
+disco = taar_locale_installs[src == 'about:addons']
+amo = taar_locale_installs[src == 'AMO']
+
+taar_locale_installs <- taar_locale_installs[total_distinct_clients > 500]
+result <- NULL
+for (i in unique(taar_locale_installs$locale)) {
+  cat(i)
+  disco_results <- get_results_table(p = prop.test.by.branch(disco[locale == i], 
+                      'distinct_clients_that_installed',
+                      'total_distinct_clients'))[,c("Install Source", "Locale"):=list("about:addons", i)]
+  amo_results <- get_results_table(p = prop.test.by.branch(amo[locale == i], 
+                      'distinct_clients_that_installed',
+                      'total_distinct_clients'))[,c("Install Source", "Locale"):=list("AMO", i)]
+  comb_results <- funion(disco_results, amo_results)
+
+  setcolorder(comb_results, c("Test", "Install Source","Locale",
+                             "Effect", "lower95", "upper95", 
+                             "Pvalue"))
+  
+  if (is.null(result)) {
+    result <- comb_results
+  } else {
+    result <- funion(result, comb_results)
+  } 
+}
+
+datatable(result[order(Locale)],
+          caption='Signifiance Test Results for Probability of Install per Branch, Install Source, en-US')
+```
+
+
+
+<center>
+
+```{R, fig.width=10, fig.height=6}
+
+ggplot(taar_locale_installs[total_distinct_clients > 500]) + 
+    geom_col(aes(x=locale,y=prob_at_least_one_install, fill=branch),
+             position = 'dodge', alpha=.8) +
+    facet_grid(src ~ .) +
+    theme_bw() + 
+    labs(title="Probability of Install per Branch, Locale (n distinct clients > 500)",
+         y="Probility of Install")
+```
+
+</center>
+
+
+
+
+
+------
+
+#### Disopane Visits (exposure to treatment)
+
+```{r}
+
+taar_locale_visits$prob_visit <- round(
+  as.numeric(taar_locale_visits$prob_visit), 6)
+taar_locale_visits$visits_per_client <- round(
+  as.numeric(taar_locale_visits$visits_per_client), 6)
+dis <- taar_locale_visits[,.(branch, locale, total_visits, total_distinct_clients, visits_per_client)]
+colnames(dis) <- c("Branch", "Locale", "Total Visits", "Total Clients", "Vistits per Client")
+datatable(dis[order(Locale)], caption="Discopane Visit Statistics per Branch, Locale")
+
+
+```
+
+
+------
+
+
+#### Pop-up Click Through Rate
+
+
+```{r}
+
+ctr_en_us$prob_click <- round(ctr$prob_click, 4)
+cols <- c("UI", 'is en-US', "Clients that clicked", "Total Clients", "Click Through Rate")
+dis <- ctr_en_us[,.(branch, is_en_US, n_clicked, n_total, prob_click)]
+colnames(dis) <- cols
+dis$UI <- ifelse(dis$UI == 'old', 'Old UI', 'New UI')
+datatable(dis, caption='Popup Click Through Rates by Branch')
+
+```
+
+
+---
+
+#### Add-on Retention
+
+
+
+```{R}
+setcolorder(addon_locale_retention, c('branch', 'locale', 'n_installed', 'n_total', 
+                               'n_retained', 'addon_retention_rate'))
+dis <- addon_locale_retention[,.(branch, locale, n_retained, n_total, addon_retention_rate)]
+cols <- c("Branch", "Locale", "Retained", "Total", "Retention Rate")
+colnames(dis) <- cols
+
+datatable(dis, caption = "Add-on Retention Rate by Branch, en-US locale")
+
+
+result <- NULL
+for (i in unique(addon_locale_retention$locale)) {
+  cat(i, '\n')
+  
+  r <- tryCatch({
+    get_results_table(p = prop.test.by.branch(addon_locale_retention[locale == i], 
+                                    'n_retained',
+                                    'n_total'))[,c('Locale'):=(i)]
+
+    }, error = function(e) {
+        NULL
+    })
+  
+  
+
+  if(is.null(r)) next
+  
+  if (is.null(result)) {
+    result <- r
+  } else {
+    result <- funion(result, r)
+  }
+}
+
+setcolorder(result, c("Test", "Locale", "Effect", "lower95", "upper95", "Pvalue"))
+datatable(result, caption='Significance Test Results For Add-on Retention Rate')
+
+
+```
+
+
+<br><br><br>
+<br><br><br>
+<br><br><br>
+