зеркало из https://github.com/mozilla/taar.git
Add preliminary analysis
This commit is contained in:
Родитель
94d0de5175
Коммит
f4ec75f5c2
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -0,0 +1,325 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from moztelemetry import Dataset\n",
|
||||
"from pyspark.sql import Row\n",
|
||||
"from pyspark.sql.types import BooleanType, LongType\n",
|
||||
"import pandas as pd\n",
|
||||
"import pyspark.sql.functions as F\n",
|
||||
"import datetime as dt\n",
|
||||
"\n",
|
||||
"sc.setLogLevel(\"INFO\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Define util funcs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def utc2date(seconds):\n",
|
||||
" \"\"\"\n",
|
||||
" Takes unix time in seconds and returns a string representation\n",
|
||||
" \"\"\"\n",
|
||||
" utc = dt.datetime(1970, 1, 1)\n",
|
||||
" try:\n",
|
||||
" return dt.datetime.strftime(utc + dt.timedelta(seconds=seconds), format='%Y%m%d')\n",
|
||||
" except:\n",
|
||||
" return None\n",
|
||||
"\n",
|
||||
" \n",
|
||||
"def shield_data(x):\n",
|
||||
" \"\"\"\n",
|
||||
" Grabs the data reported by the shield add-on \n",
|
||||
" \"\"\"\n",
|
||||
" return x.get(\"payload\", {}).get(\"data\", {}).get(\"attributes\", {})\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def _cast(col, f):\n",
|
||||
" if col != 'null':\n",
|
||||
" try:\n",
|
||||
" return f(col)\n",
|
||||
" except:\n",
|
||||
" pass\n",
|
||||
" return\n",
|
||||
"\n",
|
||||
"_bool = lambda x: True if x == 'true' else False\n",
|
||||
"\n",
|
||||
"castLong = F.udf(lambda x: _cast(x, long), LongType())\n",
|
||||
"castBool = F.udf(lambda x: _cast(x, _bool), BooleanType())\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"def collapse_fields(x):\n",
|
||||
" \"\"\"\n",
|
||||
" Collapsed nested field names \n",
|
||||
" and returns a flattened object as a \n",
|
||||
" PySpark Row to prepare for DataFrame \n",
|
||||
" conversion\n",
|
||||
" \"\"\"\n",
|
||||
" if x is None:\n",
|
||||
" x = {}\n",
|
||||
" data = x.get(\"payload\", {}).get(\"data\").get(\"attributes\", {})\n",
|
||||
" addons= x.get(\"environment\", {}).get(\"addons\", {}).get(\"activeAddons\", {})\n",
|
||||
" result = Row(\n",
|
||||
" client_id=x.get(\"clientId\"),\n",
|
||||
" locale=x.get(\"environment\", {}).get(\"settings\", {}).get(\"locale\"),\n",
|
||||
" branch=x.get(\"payload\", {}).get(\"branch\"),\n",
|
||||
" addon_id=data.get(\"addon_id\"),\n",
|
||||
" clicked_button=data.get(\"clickedButton\"),\n",
|
||||
" creation_date=x.get(\"creationDate\"),\n",
|
||||
" ping_type=data.get(\"pingType\"),\n",
|
||||
" saw_popup=data.get(\"sawPopup\"),\n",
|
||||
" src=data.get(\"srcURI\"),\n",
|
||||
" start_time_utc=data.get(\"startTime\"),\n",
|
||||
" dwell_time=data.get(\"aboutAddonsActiveTabSeconds\"),\n",
|
||||
" discopane_loaded=data.get(\"discoPaneLoaded\"),\n",
|
||||
" submission_date_s3=x.get(\"meta\").get(\"submissionDate\"),\n",
|
||||
" current_addons=[i for i in addons if \\\n",
|
||||
" not addons[i].get('isSystem', True) and \\\n",
|
||||
" not addons[i].get('foreignInstall', True)]\n",
|
||||
" )\n",
|
||||
" return result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Define study dates in string and unix format"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"20180312\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"START_DATE_UNIX = 17602 * 24 * 60 * 60\n",
|
||||
"START_DATE_STR = utc2date(START_DATE_UNIX)\n",
|
||||
"print START_DATE_STR\n",
|
||||
"END_DATE_STR = \"20180418\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Load raw pings from experiment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"code_folding": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"fetching 15639.14247MB in 284387 files...\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# load all taar pings from our adjusted start date of 20171008\n",
|
||||
"taarv2_pings = (\n",
|
||||
" Dataset.from_source(\"telemetry\")\n",
|
||||
" .where(docType=\"shield-study-addon\")\n",
|
||||
" .where(submissionDate=lambda x: x >= START_DATE_STR and x <= END_DATE_STR)\n",
|
||||
" .records(sc)\n",
|
||||
" .filter(lambda x: x.get(\"payload\", {}).get(\"study_name\") == \"TAARExperimentV2\")\n",
|
||||
" .filter(lambda x: x.get(\"payload\", {}).get(\"addon_version\") == \"1.0.13\")\n",
|
||||
" .filter(lambda x: x.get(\"payload\", {}).get(\"testing\") == False)\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Convert pings to a structured spark DataFrame"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# sampleRatio infers schema from first 0.1% of rows\n",
|
||||
"taarv2_DF = taarv2_pings.map(collapse_fields).toDF(sampleRatio=0.001)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Cast non-string columns to the appropriate type"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"bool_cols = [\n",
|
||||
" 'discopane_loaded',\n",
|
||||
" 'clicked_button',\n",
|
||||
" 'saw_popup', \n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"long_cols = [\n",
|
||||
" 'start_time_utc',\n",
|
||||
" 'dwell_time',\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"for b in bool_cols:\n",
|
||||
" taarv2_DF = taarv2_DF.withColumn(b, castBool(b))\n",
|
||||
" \n",
|
||||
"for l in long_cols:\n",
|
||||
" taarv2_DF = taarv2_DF.withColumn(l, castLong(l))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"root\n",
|
||||
" |-- addon_id: string (nullable = true)\n",
|
||||
" |-- branch: string (nullable = true)\n",
|
||||
" |-- clicked_button: boolean (nullable = true)\n",
|
||||
" |-- client_id: string (nullable = true)\n",
|
||||
" |-- creation_date: string (nullable = true)\n",
|
||||
" |-- current_addons: array (nullable = true)\n",
|
||||
" | |-- element: string (containsNull = true)\n",
|
||||
" |-- discopane_loaded: boolean (nullable = true)\n",
|
||||
" |-- dwell_time: long (nullable = true)\n",
|
||||
" |-- locale: string (nullable = true)\n",
|
||||
" |-- ping_type: string (nullable = true)\n",
|
||||
" |-- saw_popup: boolean (nullable = true)\n",
|
||||
" |-- src: string (nullable = true)\n",
|
||||
" |-- start_time_utc: long (nullable = true)\n",
|
||||
" |-- submission_date_s3: string (nullable = true)\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"taarv2_DF.printSchema()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Write to S3, partitioning by `branch`, since most subsequent queries will involve aggregating by this field"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {
|
||||
"code_folding": [],
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"S3_PATH = 's3://telemetry-test-bucket/bmiroglio/taar-v2/'\n",
|
||||
"\n",
|
||||
"(\n",
|
||||
"taarv2_DF\n",
|
||||
" .repartition(1)\n",
|
||||
" .write\n",
|
||||
" .partitionBy('branch')\n",
|
||||
" .mode(\"overwrite\")\n",
|
||||
" .parquet(S3_PATH)\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"n records: 8762664\n",
|
||||
"n clients: 3491762\n",
|
||||
"[Row(min(submission_date_s3)=u'20180312', max(submission_date_s3)=u'20180417')]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# verify\n",
|
||||
"t = sqlContext.read.parquet(S3_PATH)\n",
|
||||
"\n",
|
||||
"print \"n records:\", t.count()\n",
|
||||
"print \"n clients:\", t.select('client_id').distinct().count()\n",
|
||||
"sd = t.select(F.min(\"submission_date_s3\"), \n",
|
||||
" F.max('submission_date_s3'))\n",
|
||||
"print sd.collect()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"anaconda-cloud": {},
|
||||
"kernelspec": {
|
||||
"display_name": "Python [conda root]",
|
||||
"language": "python",
|
||||
"name": "conda-root-py"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
|
@ -0,0 +1,645 @@
|
|||
---
|
||||
title: "TAAR V2 Experiment Analysis"
|
||||
author: "Ben Miroglio, Martin Lopatka"
|
||||
date: "4/18/2018"
|
||||
output:
|
||||
html_document:
|
||||
theme: cosmo
|
||||
toc_float: true
|
||||
code_folding: hide
|
||||
---
|
||||
<style>
|
||||
ul.nav.nav-pills {
|
||||
list-style:none;
|
||||
position:relative;
|
||||
left: 20%;
|
||||
}
|
||||
|
||||
ul.nav.nav-pills li {
|
||||
width: 20%;
|
||||
text-align: center;
|
||||
}
|
||||
</style>
|
||||
|
||||
# {.tabset}
|
||||
|
||||
## Study Details
|
||||
|
||||
### About
|
||||
|
||||
This report outlines the results of the **2nd** shield experiment that exposes the Telemetry Aware Addon Recommendations (TAAR) service, which serves personalized recommendations to Firefox users via the discovery pane (about:addons). The experiment launched on March 12th, 2018 and ended on April 17th, 2018.
|
||||
|
||||
### Links
|
||||
* [Dashboard of TAAR Experiment V1](https://sql.telemetry.mozilla.org/dashboard/taar-experiment_1)
|
||||
* [TAAR Experiment V1 Results](https://docs.google.com/presentation/d/1B6CcyGfLvIiweBgGchWEZU_7QsF0M8w07_2UG1oZW1g/edit#slide=id.g2830b63ec5_0_45)
|
||||
* [TAAR Medium Post](https://medium.com/firefox-context-graph/add-on-recommendations-for-firefox-users-7774cc5a5117)
|
||||
|
||||
|
||||
### Experiment Design
|
||||
|
||||
The Experiment has three cohorts:
|
||||
|
||||
* `control` (no change to about:addons)
|
||||
* `linear-taar` (previous iteration of TAAR)
|
||||
* `ensemble-taar` (current iteration of TAAR)
|
||||
|
||||
|
||||
Users in all cohorts are exposed to a pop-up that "nudges" them to about:addons. When a user lands on the page, we can track the add-ons they install/uninstall/enable/disable, and how long they stayed on the page.
|
||||
|
||||
### Questions?
|
||||
Questions or requests for these data can be directed to Ben Miroglio (:bmiroglio).
|
||||
|
||||
|
||||
|
||||
|
||||
## Results {.tabset .tabset-pills .tabset-fade}
|
||||
|
||||
```{r, setup, include=T, warning=F}
|
||||
library(data.table)
|
||||
library(DT)
|
||||
library(gridExtra)
|
||||
library(ggplot2)
|
||||
|
||||
options(DT.options = list(
|
||||
searching=F,
|
||||
paging=F,
|
||||
info=F)
|
||||
)
|
||||
|
||||
|
||||
# util functions
|
||||
|
||||
|
||||
# Performs a two-sided proportion test
|
||||
# on a dataframe d, containing the
|
||||
# number of successes as a column <c1>
|
||||
# and the number of trials as a column <c2>
|
||||
prop.test.by.branch <- function(d, c1, c2) {
|
||||
ensem_linear <- d[branch!='control']
|
||||
ensem_control <- d[branch!='linear-taar']
|
||||
|
||||
tovec <- function(dt) as.matrix(dt)[,1]
|
||||
|
||||
|
||||
v1 <- tovec(ensem_linear[,c1, with=F])
|
||||
v2 <- tovec(ensem_linear[,c2, with=F])
|
||||
v3 <- tovec(ensem_control[,c1, with=F])
|
||||
v4 <- tovec(ensem_control[,c2, with=F])
|
||||
|
||||
tel <- prop.test(v1, v2)
|
||||
tec <- prop.test(v3, v4)
|
||||
|
||||
tel.low <- tel$conf.int[1]
|
||||
tel.upper <- tel$conf.int[2]
|
||||
|
||||
tec.low <- tec$conf.int[1]
|
||||
tec.upper <- tec$conf.int[2]
|
||||
|
||||
return(list(el.low=tel.low,
|
||||
el.high=tel.upper,
|
||||
ec.low=tec.low,
|
||||
ec.high=tec.upper,
|
||||
el.p=tel$p.val,
|
||||
ec.p=tec$p.val,
|
||||
el.true=(v1/v2)[1] - (v1/v2)[2],
|
||||
ec.true=(v3/v4)[1] - (v3/v4)[2]))
|
||||
|
||||
|
||||
}
|
||||
|
||||
# unpacks the results of a proportion test
|
||||
# and arranges the effect, CIs, and p-value
|
||||
# into a formatted table.
|
||||
spec_results <- function(spec, group, p) {
|
||||
r <- data.table(names(p), unlist(p))
|
||||
s <- data.table(t(r[grepl(spec, V1)]))
|
||||
colnames(s) <- as.character(s[1,])
|
||||
s <- s[-1,]
|
||||
r <- function(x, n=6) round(as.numeric(x), n)
|
||||
s$Test <- group
|
||||
colnames(s) <- c('lower95', 'upper95', 'Pvalue', 'Effect', 'Test')
|
||||
return(s[,.(Test, Effect=r(Effect),
|
||||
lower95=r(lower95),
|
||||
upper95=r(upper95),
|
||||
Pvalue=r(Pvalue))])
|
||||
}
|
||||
|
||||
# Performs a proportion test
|
||||
# for ensemble vs. linear
|
||||
# and ensemble vs. control
|
||||
# and organizes results into a table
|
||||
get_results_table <- function(p) {
|
||||
funion(spec_results('ec', "Ensemble vs. Control", p),
|
||||
spec_results('el', "Ensemble vs. Linear", p))
|
||||
}
|
||||
|
||||
format_results <- function(d, cap, extra=c()) {
|
||||
# Round floating point numbers
|
||||
d$prob_at_least_one_install <- round(d$prob_at_least_one_install, 4)
|
||||
d$n_installs_per_client <- round(d$n_installs_per_client, 4)
|
||||
|
||||
# prettify column names
|
||||
dis <- d[order(src, branch)]
|
||||
if (length(extra) > 0) {
|
||||
if (extra[1] == 'is_en_US') {
|
||||
extra <- c("en-US")
|
||||
}
|
||||
}
|
||||
cols <- c('Branch', extra, 'Clients that Installed', 'Total Installs',
|
||||
'Total Clients', 'Probability of Install',
|
||||
'Installs per Client', "Install Source")
|
||||
colnames(dis) <- cols
|
||||
dis
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
# pull files living in s3
|
||||
# and load into R
|
||||
PULL <- T
|
||||
files <- c('ctr', 'ctr-en-us', "taar-installs", 'taar-baseline-installs', 'taar-en-us-installs',
|
||||
'taar-locale-installs', 'taar-visits', 'taar-en-us-visits', 'taar-locale-visits',
|
||||
'daily_installs', 'enrollment', 'addon-retention', 'addon-en-us-retention',
|
||||
'addon-locale-retention')
|
||||
head_cmd <- 'aws s3 cp s3://telemetry-test-bucket/bmiroglio/'
|
||||
|
||||
for (x in files) {
|
||||
csv <- paste(x, '.csv', sep='')
|
||||
cmd <- paste(paste(head_cmd, csv, sep=''), '.')
|
||||
cat(cmd)
|
||||
#if(PULL) system(cmd)
|
||||
assign(gsub('-', '_', x), fread(csv))
|
||||
}
|
||||
|
||||
|
||||
```
|
||||
|
||||
### Overall
|
||||
|
||||
|
||||
---
|
||||
|
||||
#### Installs
|
||||
|
||||
```{r, warning=F}
|
||||
datatable(format_results(taar_installs),
|
||||
cap="Installation Statistics per Branch, Install Source")
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
```{R}
|
||||
disco = taar_installs[src == 'about:addons']
|
||||
disco_results <- get_results_table(p = prop.test.by.branch(disco,
|
||||
'distinct_clients_that_installed',
|
||||
'total_distinct_clients'))[,c("Install Source"):=("about:addons")]
|
||||
|
||||
amo = taar_installs[src == 'AMO']
|
||||
amo_results <- get_results_table(p = prop.test.by.branch(amo,
|
||||
'distinct_clients_that_installed',
|
||||
'total_distinct_clients'))[,c("Install Source"):=("AMO")]
|
||||
|
||||
comb_results <- funion(disco_results, amo_results)
|
||||
|
||||
setcolorder(comb_results, c("Test", "Install Source",
|
||||
"Effect", "lower95", "upper95",
|
||||
"Pvalue"))
|
||||
|
||||
|
||||
|
||||
datatable(comb_results,
|
||||
caption='Signifiance Test Results for Probability of Install per Branch, Install Source')
|
||||
|
||||
```
|
||||
|
||||
-----
|
||||
|
||||
|
||||
|
||||
<center>
|
||||
```{R, fig.width=6, fig.height=4}
|
||||
|
||||
|
||||
ggplot(taar_installs) +
|
||||
geom_col(aes(x=branch,y=prob_at_least_one_install, fill=src),
|
||||
position = 'dodge') +
|
||||
theme_bw() +
|
||||
labs(title="Probability of Install per Branch, Install Source",
|
||||
y="Probility of Install")
|
||||
```
|
||||
|
||||
</center>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
```{r, fig.width=10, fig.height=6}
|
||||
ggplot(daily_installs[src != 'other']) +
|
||||
geom_density(aes(count, fill=branch), alpha=.6) +
|
||||
facet_grid(src ~ .) +
|
||||
theme_bw() +
|
||||
labs(title='Installs per Day Distribution',
|
||||
x='Installs per Day')
|
||||
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
```{R, fig.width=10, fig.height=6}
|
||||
daily_installs$submission_date_s3 <- as.Date(as.character(daily_installs$submission_date_s3),
|
||||
'%Y%m%d')
|
||||
|
||||
ggplot(daily_installs[src != 'other']) +
|
||||
geom_line(aes(x=submission_date_s3, y=count, color=branch)) +
|
||||
facet_grid(src ~ .) + theme_bw() +
|
||||
labs(title="Installs over Time", x='Date', y='Count')
|
||||
|
||||
daily_cumsum <- (daily_installs[src != 'other']
|
||||
[order(submission_date_s3)]
|
||||
[,cumcount:=cumsum(count),
|
||||
by=.(branch, src)])
|
||||
ggplot(daily_cumsum) +
|
||||
geom_line(aes(x=submission_date_s3, y=cumcount, color=branch)) +
|
||||
facet_grid(src ~ .) + theme_bw() +
|
||||
labs(title="Cumulative Installs over Time",
|
||||
x="Date",
|
||||
y='Cumulative Count')
|
||||
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
------
|
||||
|
||||
#### Disopane Visits (exposure to treatment)
|
||||
|
||||
```{r}
|
||||
taar_visits$prob_visit <- round(as.numeric(taar_visits$prob_visit), 6)
|
||||
taar_visits$visits_per_client <- round(as.numeric(taar_visits$visits_per_client), 6)
|
||||
dis <- taar_visits[,.(branch, total_visits, total_distinct_clients, visits_per_client)]
|
||||
colnames(dis) <- c("Branch", "Total Visits", "Total Clients", "Vistits per Client")
|
||||
datatable(dis[order(Branch)], caption="Discopane Visit Statistics per Branch")
|
||||
|
||||
|
||||
```
|
||||
|
||||
|
||||
------
|
||||
|
||||
|
||||
#### Pop-up Click Through Rate
|
||||
|
||||
|
||||
```{r}
|
||||
|
||||
ctr$prob_click <- round(ctr$prob_click, 4)
|
||||
cols <- c("UI", "Clients that clicked", "Total Clients", "Click Through Rate")
|
||||
dis <- ctr[,.(branch, n_clicked, n_total, prob_click)]
|
||||
colnames(dis) <- cols
|
||||
dis$UI <- c("Old UI", "New UI")
|
||||
datatable(dis, caption='Popup Click Through Rates by Branch')
|
||||
|
||||
```
|
||||
|
||||
|
||||
---
|
||||
|
||||
#### Add-on Retention
|
||||
|
||||
|
||||
|
||||
```{R}
|
||||
setcolorder(addon_retention, c('branch', 'n_installed', 'n_total',
|
||||
'n_retained', 'addon_retention_rate'))
|
||||
dis <- addon_retention[,.(branch, n_retained, n_total, addon_retention_rate)]
|
||||
cols <- c("Branch", "Retained", "Total", "Retention Rate")
|
||||
colnames(dis) <- cols
|
||||
|
||||
datatable(dis, caption = "Add-on Retention Rate by Branch")
|
||||
|
||||
datatable(get_results_table(p = prop.test.by.branch(addon_retention,
|
||||
'n_retained',
|
||||
'n_total')),
|
||||
caption='Significance Test Results For Add-on Retention Rate')
|
||||
|
||||
|
||||
```
|
||||
|
||||
<br><br><br>
|
||||
<br><br><br>
|
||||
<br><br><br>
|
||||
|
||||
|
||||
|
||||
### en-US vs. non en-US
|
||||
|
||||
----
|
||||
|
||||
#### Installs
|
||||
|
||||
```{r, warning=F}
|
||||
|
||||
format_results(taar_en_us_installs, extra=c("is_en_US"))
|
||||
|
||||
disco = taar_en_us_installs[src == 'about:addons']
|
||||
amo = taar_en_us_installs[src == 'AMO']
|
||||
|
||||
result <- NULL
|
||||
for (i in c(T, F)) {
|
||||
disco_results <- get_results_table(p = prop.test.by.branch(disco[is_en_US == i],
|
||||
'distinct_clients_that_installed',
|
||||
'total_distinct_clients'))[,c("Install Source", "en-US"):=list("about:addons", i)]
|
||||
amo_results <- get_results_table(p = prop.test.by.branch(amo[is_en_US == i],
|
||||
'distinct_clients_that_installed',
|
||||
'total_distinct_clients'))[,c("Install Source", "en-US"):=list("AMO", i)]
|
||||
comb_results <- funion(disco_results, amo_results)
|
||||
|
||||
setcolorder(comb_results, c("Test", "Install Source","en-US",
|
||||
"Effect", "lower95", "upper95",
|
||||
"Pvalue"))
|
||||
|
||||
if (is.null(result)) {
|
||||
result <- comb_results
|
||||
} else {
|
||||
result <- funion(result, comb_results)
|
||||
}
|
||||
}
|
||||
|
||||
datatable(result,
|
||||
caption='Signifiance Test Results for Probability of Install per Branch, Install Source, en-US')
|
||||
```
|
||||
|
||||
|
||||
|
||||
<center>
|
||||
|
||||
```{R, fig.width=10, fig.height=6}
|
||||
|
||||
taar_en_us_installs$locale <- ifelse(taar_en_us_installs$is_en_US, 'en-US', 'not en-US')
|
||||
ggplot(taar_en_us_installs) +
|
||||
geom_col(aes(x=locale,y=prob_at_least_one_install, fill=branch),
|
||||
position = 'dodge', alpha=.8) +
|
||||
facet_grid(src ~ .) +
|
||||
theme_bw() +
|
||||
labs(title="Probability of Install per Branch, Locale",
|
||||
y="Probility of Install")
|
||||
```
|
||||
|
||||
</center>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
------
|
||||
|
||||
#### Disopane Visits (exposure to treatment)
|
||||
|
||||
```{r}
|
||||
|
||||
taar_en_us_visits$prob_visit <- round(
|
||||
as.numeric(taar_en_us_visits$prob_visit), 6)
|
||||
taar_en_us_visits$visits_per_client <- round(
|
||||
as.numeric(taar_en_us_visits$visits_per_client), 6)
|
||||
dis <- taar_en_us_visits[,.(branch, is_en_US, total_visits, total_distinct_clients, visits_per_client)]
|
||||
colnames(dis) <- c("Branch", "is en-US", "Total Visits", "Total Clients", "Vistits per Client")
|
||||
datatable(dis[order(Branch)], caption="Discopane Visit Statistics per Branch")
|
||||
|
||||
|
||||
```
|
||||
|
||||
|
||||
------
|
||||
|
||||
|
||||
#### Pop-up Click Through Rate
|
||||
|
||||
|
||||
```{r}
|
||||
|
||||
ctr_en_us$prob_click <- round(ctr$prob_click, 4)
|
||||
cols <- c("UI", 'is en-US', "Clients that clicked", "Total Clients", "Click Through Rate")
|
||||
dis <- ctr_en_us[,.(branch, is_en_US, n_clicked, n_total, prob_click)]
|
||||
colnames(dis) <- cols
|
||||
dis$UI <- ifelse(dis$UI == 'old', 'Old UI', 'New UI')
|
||||
datatable(dis, caption='Popup Click Through Rates by Branch')
|
||||
|
||||
```
|
||||
|
||||
|
||||
---
|
||||
|
||||
#### Add-on Retention
|
||||
|
||||
|
||||
|
||||
```{R}
|
||||
setcolorder(addon_en_us_retention, c('branch', 'is_en_US', 'n_installed', 'n_total',
|
||||
'n_retained', 'addon_retention_rate'))
|
||||
dis <- addon_en_us_retention[,.(branch, is_en_US, n_retained, n_total, addon_retention_rate)]
|
||||
cols <- c("Branch", "is en-US", "Retained", "Total", "Retention Rate")
|
||||
colnames(dis) <- cols
|
||||
|
||||
datatable(dis, caption = "Add-on Retention Rate by Branch, en-US locale")
|
||||
|
||||
|
||||
result <- NULL
|
||||
for (i in c(T, F)) {
|
||||
|
||||
r <- get_results_table(p = prop.test.by.branch(addon_en_us_retention[is_en_US == i],
|
||||
'n_retained',
|
||||
'n_total'))[,c('is en-US'):=(i)]
|
||||
if (is.null(result)) {
|
||||
result <- r
|
||||
} else {
|
||||
result <- funion(result, r)
|
||||
}
|
||||
}
|
||||
|
||||
setcolorder(result, c("Test", "is en-US", "Effect", "lower95", "upper95", "Pvalue"))
|
||||
datatable(result, caption='Significance Test Results For Add-on Retention Rate')
|
||||
|
||||
|
||||
```
|
||||
|
||||
|
||||
<br><br><br>
|
||||
<br><br><br>
|
||||
<br><br><br>
|
||||
|
||||
|
||||
|
||||
### By Locale
|
||||
|
||||
|
||||
----
|
||||
|
||||
#### Installs
|
||||
|
||||
```{r, warning=F}
|
||||
options(DT.options = list(
|
||||
searching=T,
|
||||
paging=T,
|
||||
info=F)
|
||||
)
|
||||
|
||||
|
||||
format_results(taar_locale_installs[order(total_installs)],
|
||||
cap="Installation Statistics per Branch, Install Source, Locale",
|
||||
extra=c("locale"))
|
||||
|
||||
disco = taar_locale_installs[src == 'about:addons']
|
||||
amo = taar_locale_installs[src == 'AMO']
|
||||
|
||||
taar_locale_installs <- taar_locale_installs[total_distinct_clients > 500]
|
||||
result <- NULL
|
||||
for (i in unique(taar_locale_installs$locale)) {
|
||||
cat(i)
|
||||
disco_results <- get_results_table(p = prop.test.by.branch(disco[locale == i],
|
||||
'distinct_clients_that_installed',
|
||||
'total_distinct_clients'))[,c("Install Source", "Locale"):=list("about:addons", i)]
|
||||
amo_results <- get_results_table(p = prop.test.by.branch(amo[locale == i],
|
||||
'distinct_clients_that_installed',
|
||||
'total_distinct_clients'))[,c("Install Source", "Locale"):=list("AMO", i)]
|
||||
comb_results <- funion(disco_results, amo_results)
|
||||
|
||||
setcolorder(comb_results, c("Test", "Install Source","Locale",
|
||||
"Effect", "lower95", "upper95",
|
||||
"Pvalue"))
|
||||
|
||||
if (is.null(result)) {
|
||||
result <- comb_results
|
||||
} else {
|
||||
result <- funion(result, comb_results)
|
||||
}
|
||||
}
|
||||
|
||||
datatable(result[order(Locale)],
|
||||
caption='Signifiance Test Results for Probability of Install per Branch, Install Source, en-US')
|
||||
```
|
||||
|
||||
|
||||
|
||||
<center>
|
||||
|
||||
```{R, fig.width=10, fig.height=6}
|
||||
|
||||
ggplot(taar_locale_installs[total_distinct_clients > 500]) +
|
||||
geom_col(aes(x=locale,y=prob_at_least_one_install, fill=branch),
|
||||
position = 'dodge', alpha=.8) +
|
||||
facet_grid(src ~ .) +
|
||||
theme_bw() +
|
||||
labs(title="Probability of Install per Branch, Locale (n distinct clients > 500)",
|
||||
y="Probility of Install")
|
||||
```
|
||||
|
||||
</center>
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
------
|
||||
|
||||
#### Disopane Visits (exposure to treatment)
|
||||
|
||||
```{r}
|
||||
|
||||
taar_locale_visits$prob_visit <- round(
|
||||
as.numeric(taar_locale_visits$prob_visit), 6)
|
||||
taar_locale_visits$visits_per_client <- round(
|
||||
as.numeric(taar_locale_visits$visits_per_client), 6)
|
||||
dis <- taar_locale_visits[,.(branch, locale, total_visits, total_distinct_clients, visits_per_client)]
|
||||
colnames(dis) <- c("Branch", "Locale", "Total Visits", "Total Clients", "Vistits per Client")
|
||||
datatable(dis[order(Locale)], caption="Discopane Visit Statistics per Branch, Locale")
|
||||
|
||||
|
||||
```
|
||||
|
||||
|
||||
------
|
||||
|
||||
|
||||
#### Pop-up Click Through Rate
|
||||
|
||||
|
||||
```{r}
|
||||
|
||||
ctr_en_us$prob_click <- round(ctr$prob_click, 4)
|
||||
cols <- c("UI", 'is en-US', "Clients that clicked", "Total Clients", "Click Through Rate")
|
||||
dis <- ctr_en_us[,.(branch, is_en_US, n_clicked, n_total, prob_click)]
|
||||
colnames(dis) <- cols
|
||||
dis$UI <- ifelse(dis$UI == 'old', 'Old UI', 'New UI')
|
||||
datatable(dis, caption='Popup Click Through Rates by Branch')
|
||||
|
||||
```
|
||||
|
||||
|
||||
---
|
||||
|
||||
#### Add-on Retention
|
||||
|
||||
|
||||
|
||||
```{R}
|
||||
setcolorder(addon_locale_retention, c('branch', 'locale', 'n_installed', 'n_total',
|
||||
'n_retained', 'addon_retention_rate'))
|
||||
dis <- addon_locale_retention[,.(branch, locale, n_retained, n_total, addon_retention_rate)]
|
||||
cols <- c("Branch", "Locale", "Retained", "Total", "Retention Rate")
|
||||
colnames(dis) <- cols
|
||||
|
||||
datatable(dis, caption = "Add-on Retention Rate by Branch, en-US locale")
|
||||
|
||||
|
||||
result <- NULL
|
||||
for (i in unique(addon_locale_retention$locale)) {
|
||||
cat(i, '\n')
|
||||
|
||||
r <- tryCatch({
|
||||
get_results_table(p = prop.test.by.branch(addon_locale_retention[locale == i],
|
||||
'n_retained',
|
||||
'n_total'))[,c('Locale'):=(i)]
|
||||
|
||||
}, error = function(e) {
|
||||
NULL
|
||||
})
|
||||
|
||||
|
||||
|
||||
if(is.null(r)) next
|
||||
|
||||
if (is.null(result)) {
|
||||
result <- r
|
||||
} else {
|
||||
result <- funion(result, r)
|
||||
}
|
||||
}
|
||||
|
||||
setcolorder(result, c("Test", "Locale", "Effect", "lower95", "upper95", "Pvalue"))
|
||||
datatable(result, caption='Significance Test Results For Add-on Retention Rate')
|
||||
|
||||
|
||||
```
|
||||
|
||||
|
||||
<br><br><br>
|
||||
<br><br><br>
|
||||
<br><br><br>
|
||||
|
Загрузка…
Ссылка в новой задаче