зеркало из https://github.com/mozilla/rappor.git
More accurate and stable estimation of standard error. Unit tests for new functionality. Added reporting of errors
to make_summary.py.
This commit is contained in:
Родитель
4e45641bbc
Коммит
f5632ba57f
|
@ -69,13 +69,6 @@ EstimateBloomCounts <- function(params, obs_counts) {
|
|||
p_hats <- pmax(0, pmin(1, p_hats)) # clamp to [0,1]
|
||||
r <- p_hats * p11 + (1 - p_hats) * p01 # expectation of a reported 1
|
||||
N * r * (1 - r) / p2^2 # variance of the binomial
|
||||
|
||||
# using the formula for the random sum of random variables:
|
||||
# var11 <- p_hats * N * p11 * (1 - p11) + p11^2 * p_hats * (1 - p_hats) * N
|
||||
# var01 <- (1 - p_hats) * N * p01 * (1 - p01) +
|
||||
# p01^2 * (1 - p_hats) * p_hats * N
|
||||
|
||||
# (var11 + var01) / p2^2
|
||||
})
|
||||
|
||||
# Transform counts from absolute values to fractional, removing bias due to
|
||||
|
@ -108,11 +101,10 @@ FitLasso <- function(X, Y, intercept = TRUE) {
|
|||
|
||||
# If fitting fails, return an empty data.frame.
|
||||
if (class(mod)[1] == "try-error") {
|
||||
coefs <- rep(0, ncol(X))
|
||||
names(coefs) <- colnames(X)
|
||||
coefs <- setNames(rep(0, ncol(X)), colnames(X))
|
||||
} else {
|
||||
coefs <- coef(mod)
|
||||
coefs <- coefs[-1, ncol(coefs)]
|
||||
coefs <- coefs[-1, ncol(coefs), drop = FALSE]
|
||||
}
|
||||
coefs
|
||||
}
|
||||
|
@ -135,34 +127,29 @@ PerformInference <- function(X, Y, N, mod, params, alpha, correction) {
|
|||
|
||||
betas <- matrix(mod$coefs, ncol = 1)
|
||||
|
||||
# This is what we want
|
||||
# mod_var <- summary(mod$fit)$sigma^2
|
||||
# betas_sd <- rep(sqrt(max(resid_var, mod_var) / (m * h)), length(betas))
|
||||
|
||||
# This is what we have
|
||||
mod_var <- 0
|
||||
betas_sd <- 1
|
||||
|
||||
z_values <- betas / betas_sd
|
||||
|
||||
# 1-sided t-test.
|
||||
p_values <- pnorm(z_values, lower = FALSE)
|
||||
# mod_var <- summary(mod$fit)$sigma^2
|
||||
# betas_sd <- rep(sqrt(max(resid_var, mod_var) / (m * h)), length(betas))
|
||||
#
|
||||
# z_values <- betas / betas_sd
|
||||
#
|
||||
# # 1-sided t-test.
|
||||
# p_values <- pnorm(z_values, lower = FALSE)
|
||||
|
||||
fit <- data.frame(String = colnames(X), Estimate = betas,
|
||||
SD = betas_sd, z_stat = z_values, pvalue = p_values,
|
||||
SD = mod$stds, # z_stat = z_values, pvalue = p_values,
|
||||
stringsAsFactors = FALSE)
|
||||
|
||||
if (correction == "FDR") {
|
||||
fit <- fit[order(fit$pvalue, decreasing = FALSE), ]
|
||||
ind <- which(fit$pvalue < (1:nrow(fit)) * alpha / nrow(fit))
|
||||
if (length(ind) > 0) {
|
||||
fit <- fit[1:max(ind), ]
|
||||
} else {
|
||||
fit <- fit[numeric(0), ]
|
||||
}
|
||||
} else {
|
||||
fit <- fit[fit$p < alpha, ]
|
||||
}
|
||||
# if (correction == "FDR") {
|
||||
# fit <- fit[order(fit$pvalue, decreasing = FALSE), ]
|
||||
# ind <- which(fit$pvalue < (1:nrow(fit)) * alpha / nrow(fit))
|
||||
# if (length(ind) > 0) {
|
||||
# fit <- fit[1:max(ind), ]
|
||||
# } else {
|
||||
# fit <- fit[numeric(0), ]
|
||||
# }
|
||||
# } else {
|
||||
# fit <- fit[fit$p < alpha, ]
|
||||
# }
|
||||
|
||||
fit <- fit[order(fit$Estimate, decreasing = TRUE), ]
|
||||
|
||||
|
@ -284,10 +271,11 @@ Decode <- function(counts, map, params, alpha = 0.05,
|
|||
|
||||
es <- EstimateBloomCounts(params, counts)
|
||||
|
||||
estimates_stds_filtered <- list(estimates = es$estimates[filter_cohorts,],
|
||||
stds = es$stds[filter_cohorts,])
|
||||
estimates_stds_filtered <-
|
||||
list(estimates = es$estimates[filter_cohorts, , drop = FALSE],
|
||||
stds = es$stds[filter_cohorts, , drop = FALSE])
|
||||
|
||||
coefs <- vector()
|
||||
coefs_all <- vector()
|
||||
|
||||
for(r in 1:5)
|
||||
{
|
||||
|
@ -295,21 +283,24 @@ Decode <- function(counts, map, params, alpha = 0.05,
|
|||
e <- Resample(estimates_stds_filtered)
|
||||
else
|
||||
e <- estimates_stds_filtered
|
||||
coefs <- rbind(coefs, FitDistribution(e, map[filter_bits,]))
|
||||
|
||||
coefs_all <- rbind(coefs_all, FitDistribution(e, map[filter_bits,]))
|
||||
}
|
||||
|
||||
coefs_ssd <- N * apply(coefs, 2, sd) # compute sample standard deviations
|
||||
coefs <- N * apply(coefs, 2, median)
|
||||
coefs_ssd <- N * apply(coefs_all, 2, sd) # compute sample standard deviations
|
||||
coefs_ave <- N * apply(coefs_all, 2, mean)
|
||||
|
||||
coefs[coefs < coefs_ssd] <- 0 # zero out coefficients within ssd from 0
|
||||
# Only select coefficients more than two standard deviations from 0. May
|
||||
# exaggerate empirical SD of the estimates.
|
||||
reported <- which(coefs_ave > 1E-6 + 2 * coefs_ssd)
|
||||
|
||||
mod <- list(coefs = coefs, resid = NULL) # a stub for now
|
||||
mod <- list(coefs = coefs_ave[reported], stds = coefs_ssd[reported])
|
||||
|
||||
if (correction == "Bonferroni") {
|
||||
alpha <- alpha / S
|
||||
}
|
||||
|
||||
inf <- PerformInference(map[filter_bits,],
|
||||
inf <- PerformInference(map[filter_bits,reported, drop = FALSE],
|
||||
as.vector(t(estimates_stds_filtered$estimates)),
|
||||
N, mod, params, alpha,
|
||||
correction)
|
||||
|
@ -319,12 +310,11 @@ Decode <- function(counts, map, params, alpha = 0.05,
|
|||
if (sum(map) == sum(diag(map))) {
|
||||
fit$Estimate <- colSums(counts)[-1]
|
||||
}
|
||||
resid <- mod$resid / inf$resid_sigma
|
||||
|
||||
# Estimates from the model are per instance so must be multipled by h.
|
||||
# Standard errors are also adjusted.
|
||||
fit$Total_Est <- floor(fit$Estimate)
|
||||
fit$Total_SD <- floor(fit$SD * m)
|
||||
fit$Total_SD <- floor(fit$SD)
|
||||
fit$Prop <- fit$Total_Est / N
|
||||
fit$LPB <- fit$Prop - 1.96 * fit$Total_SD / N
|
||||
fit$UPB <- fit$Prop + 1.96 * fit$Total_SD / N
|
||||
|
@ -351,7 +341,7 @@ Decode <- function(counts, map, params, alpha = 0.05,
|
|||
|
||||
list(fit = fit, summary = res_summary, privacy = privacy, params = params,
|
||||
lasso = NULL, ests = as.vector(t(estimates_stds_filtered$estimates)),
|
||||
counts = counts[, -1], resid = resid)
|
||||
counts = counts[, -1], resid = NULL)
|
||||
}
|
||||
|
||||
ComputeCounts <- function(reports, cohorts, params) {
|
||||
|
|
|
@ -12,12 +12,10 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#
|
||||
# This library implements the RAPPOR marginal decoding algorithms using LASSO.
|
||||
|
||||
library(RUnit)
|
||||
library(abind)
|
||||
|
||||
source('analysis/R/decode.R')
|
||||
source('tests/gen_counts.R')
|
||||
|
||||
L1Distance <- function(X, Y) {
|
||||
|
@ -39,61 +37,101 @@ LInfDistance <- function(X, Y) {
|
|||
Y[!names(Y) %in% common])
|
||||
}
|
||||
|
||||
MatrixVectorMerge <- function(mat, vec) {
|
||||
# Attaches a vector to a matrix, matching corresponding column names
|
||||
|
||||
RunMultipleTests <- function(title, fun, repetitions, ...)
|
||||
{
|
||||
cat(title, ": ")
|
||||
pb <- txtProgressBar(min = 0, max = repetitions,
|
||||
width = getOption("width") - 20 - nchar(title))
|
||||
mat_only <- setdiff(colnames(mat), names(vec))
|
||||
vec_only <- setdiff(names(vec), colnames(mat))
|
||||
|
||||
for(i in 1:repetitions)
|
||||
{
|
||||
setTxtProgressBar(pb, i)
|
||||
# extend the vector with missing columns
|
||||
vec_long <- c(vec, setNames(rep(NA, length(mat_only)), mat_only))
|
||||
|
||||
fun(...)
|
||||
# extend the matrix with missing columns
|
||||
newcols <- matrix(NA, nrow = nrow(mat), ncol = length(vec_only))
|
||||
colnames(newcols) <- vec_only
|
||||
mat_long <- cbind(mat, newcols)
|
||||
|
||||
# Now vec and mat have the same columns, but in the wrong order. Sort the
|
||||
# columns lexicographically.
|
||||
if(length(vec_long) > 0) {
|
||||
mat_long <- mat_long[, order(colnames(mat_long)), drop = FALSE]
|
||||
vec_long <- vec_long[order(names(vec_long))]
|
||||
}
|
||||
cat(" Done.")
|
||||
|
||||
close(pb)
|
||||
rbind(mat_long, vec_long)
|
||||
}
|
||||
|
||||
TestEstimatesAndStdsHelper <- function(params, map, partition) {
|
||||
RunMultipleTests <- function(title, fun, repetitions, ...) {
|
||||
# Run a function with an annotated progress indicator
|
||||
cat(title, ": ")
|
||||
|
||||
if(repetitions == 1) {
|
||||
# only run once
|
||||
fun(...)
|
||||
|
||||
cat(" Done.")
|
||||
}
|
||||
else { # run multiple times
|
||||
pb <- txtProgressBar(min = 0, max = repetitions,
|
||||
width = getOption("width") - 20 - nchar(title))
|
||||
|
||||
for(i in 1:repetitions) {
|
||||
setTxtProgressBar(pb, i)
|
||||
fun(...)
|
||||
}
|
||||
cat(" Done.")
|
||||
close(pb)
|
||||
}
|
||||
}
|
||||
|
||||
TestEstimatesAndStdsHelper <- function(params, map, pdf, total) {
|
||||
# Helper function for TestEstimateBloomCounts.
|
||||
partition <- RandomPartition(total, pdf)
|
||||
counts <- GenerateCounts(params, map, partition, 1)
|
||||
e <- EstimateBloomCounts(params, counts)
|
||||
|
||||
results$estimates <<- abind(results$estimates, e$estimates, along = 3)
|
||||
results$stds <<- abind(results$stds, e$stds, along = 3)
|
||||
results$counts <<- abind(results$counts, counts, along = 3)
|
||||
}
|
||||
|
||||
TestEstimatesAndStds <- function(repetitions, title,
|
||||
params, map, partition, true_distr) {
|
||||
v <- 1 # only handly one report per client
|
||||
|
||||
total <- sum(partition)
|
||||
|
||||
results <<- c(estimates = list(), stds = list(), counts = list())
|
||||
TestEstimatesAndStds <- function(repetitions, title, params, map, pdf, total) {
|
||||
# Checks that the expectations returned by EstimateBloomCounts on simulated
|
||||
# inputs match the ground truth and the empirical standard deviation matches
|
||||
# EstimateBloomCounts outputs.
|
||||
#
|
||||
# Input:
|
||||
# repetitions: the number of runs ofEstimateBloomCounts
|
||||
# title: label
|
||||
# params: params vector
|
||||
# map: the map table
|
||||
# pdf: probability density function of the distribution from which simulated
|
||||
# clients are sampled
|
||||
# total: number of reports
|
||||
results <<- c(estimates = list(), stds = list())
|
||||
|
||||
RunMultipleTests(title, TestEstimatesAndStdsHelper, repetitions,
|
||||
params, map, partition)
|
||||
params, map, pdf, total)
|
||||
|
||||
ave_e <- apply(results$estimates,1:2, mean)
|
||||
observed_stds <- apply(results$estimates,1:2, sd)
|
||||
ave_stds <- apply(results$stds,1:2, mean)
|
||||
|
||||
if(!is.null(true_distr))
|
||||
checkTrue(!any((ave_e - true_distr) > (ave_stds / repetitions^.5) * 5),
|
||||
ground_truth <- matrix(map %*% pdf, nrow = params$m, byrow = TRUE)
|
||||
|
||||
checkTrue(!any(abs(ave_e - ground_truth) > 1E-9 + # tolerance level
|
||||
(ave_stds / repetitions^.5) * 5),
|
||||
"Averages deviate too much from expectations.")
|
||||
|
||||
checkTrue(!any(observed_stds > ave_stds * 2),
|
||||
"Expected standard deviations are too pessimistic.")
|
||||
checkTrue(!any(observed_stds > ave_stds * (1 + 5 * repetitions^.5)),
|
||||
"Expected standard deviations are too high")
|
||||
|
||||
checkTrue(!any(observed_stds < ave_stds / 2),
|
||||
"Expected standard deviations are too optimistic")
|
||||
checkTrue(!any(observed_stds < ave_stds * (1 - 5 * repetitions^.5)),
|
||||
"Expected standard deviations are too low")
|
||||
}
|
||||
|
||||
TestEstimateBloomCounts <- function() {
|
||||
# Unit tests for the EstimateBloomCounts function.
|
||||
|
||||
report4x2 <- list(k = 4, m = 2) # 2 cohorts, 4 bits each
|
||||
map0 <- Matrix(0, nrow = 8, ncol = 3, sparse = TRUE) # 3 possible values
|
||||
map0[1,] <- c(1, 0, 0)
|
||||
|
@ -104,19 +142,17 @@ TestEstimateBloomCounts <- function() {
|
|||
|
||||
colnames(map0) <- c('v1', 'v2', 'v3')
|
||||
|
||||
partition0 <- c(3, 2, 1) * 100
|
||||
names(partition0) <- colnames(map0)
|
||||
|
||||
true_distr <- matrix(c(1/2, 1/3, 1/6, 1, 1/6, 0, 0, 0), 2, 4, byrow = TRUE)
|
||||
pdf0 <- c(1/2, 1/3, 1/6)
|
||||
names(pdf0) <- colnames(map0)
|
||||
|
||||
noise0 <- list(p = 0, q = 1, f = 0) # no noise at all
|
||||
|
||||
TestEstimatesAndStds(repetitions = 1000, "Testing estimates and stds (1/3)",
|
||||
c(report4x2, noise0), map0, partition0, true_distr)
|
||||
c(report4x2, noise0), map0, pdf0, 100)
|
||||
|
||||
noise1 <- list(p = 0.4, q = .6, f = 0.5)
|
||||
TestEstimatesAndStds(repetitions = 1000, "Testing estimates and stds (2/3)",
|
||||
c(report4x2, noise1), map0, partition0, true_distr)
|
||||
c(report4x2, noise1), map0, pdf0, 100)
|
||||
|
||||
# MEDIUM TEST: 100 values, 32 cohorts, 8 bits each, 10^6 reports
|
||||
values <- 100
|
||||
|
@ -127,35 +163,66 @@ TestEstimateBloomCounts <- function() {
|
|||
|
||||
colnames(map1) <- sprintf("v%d", 1:values)
|
||||
|
||||
pdf <- ComputePdf("zipf1", values)
|
||||
partition1 <- RandomPartition(10^9, pdf)
|
||||
pdf1 <- ComputePdf("zipf1", values)
|
||||
|
||||
TestEstimatesAndStds(repetitions = 100, "Testing estimates and stds (3/3)",
|
||||
c(report8x32, noise1), map1, partition1, NULL)
|
||||
c(report8x32, noise1), map1, pdf1, 10^9)
|
||||
}
|
||||
|
||||
TestDecodeHelper <- function(params, map, partition, tolerance_l1,
|
||||
tolerance_linf) {
|
||||
# Helper function for TestDecode.
|
||||
TestDecodeHelper <- function(params, map, pdf, num_clients,
|
||||
tolerance_l1, tolerance_linf) {
|
||||
# Helper function for TestDecode. Simulates a RAPPOR run and checks results of
|
||||
# Decode's output against the ground truth. Results are appended to a global
|
||||
# list.
|
||||
|
||||
partition <- RandomPartition(num_clients, pdf)
|
||||
counts <- GenerateCounts(params, map, partition, 1)
|
||||
total <- sum(partition)
|
||||
|
||||
decoded <- Decode(counts, map, params)
|
||||
|
||||
l1 <- L1Distance(setNames(decoded$fit$estimate, decoded$fit$strings),
|
||||
partition)
|
||||
decoded_partition <- setNames(decoded$fit$estimate, decoded$fit$strings)
|
||||
|
||||
checkTrue(L1Distance(setNames(decoded$fit$estimate, decoded$fit$strings),
|
||||
partition) < total^.5 * tolerance_l1,
|
||||
results$estimates <<- MatrixVectorMerge(results$estimates, decoded_partition)
|
||||
results$stds <<- MatrixVectorMerge(results$stds,
|
||||
setNames(decoded$fit$std_dev,
|
||||
decoded$fit$strings))
|
||||
|
||||
checkTrue(L1Distance(decoded_partition, partition) < total^.5 * tolerance_l1,
|
||||
"L1 distance is too large")
|
||||
|
||||
checkTrue(LInfDistance(setNames(decoded$fit$estimate, decoded$fit$strings),
|
||||
partition) < max(partition)^.5 * tolerance_linf,
|
||||
"L_inf distance is too large")
|
||||
checkTrue(LInfDistance(decoded_partition, partition) <
|
||||
max(partition)^.5 * tolerance_linf, "L_inf distance is too large")
|
||||
}
|
||||
|
||||
TestDecodeAveAndStds <- function(...) {
|
||||
# Runs Decode multiple times (specified by the repetition argument), checks
|
||||
# individuals runs against the ground truth, and the estimates of the standard
|
||||
# error against empirical observations.
|
||||
|
||||
results <<- list(estimates = matrix(nrow = 0, ncol = 0),
|
||||
stds = matrix(nrow = 0, ncol = 0))
|
||||
|
||||
RunMultipleTests(...)
|
||||
|
||||
empirical_stds <- apply(results$estimates, 2, sd, na.rm = TRUE)
|
||||
estimated_stds <- apply(results$stds, 2, mean, na.rm = TRUE)
|
||||
|
||||
if(dim(results$estimates)[1] > 1)
|
||||
{
|
||||
checkTrue(any(estimated_stds > empirical_stds / 2),
|
||||
"Our estimate for the standard deviation is too low")
|
||||
|
||||
checkTrue(any(estimated_stds < empirical_stds * 3),
|
||||
"Our estimate for the standard deviation is too high")
|
||||
}
|
||||
}
|
||||
|
||||
TestDecode <- function() {
|
||||
# Unit tests for the Decode function.
|
||||
|
||||
# TOY TESTS: three values, 2 cohorts, 4 bits each
|
||||
|
||||
report4x2 <- list(k = 4, m = 2, h = 2) # 2 cohorts, 4 bits each
|
||||
map0 <- Matrix(0, nrow = 8, ncol = 3, sparse = TRUE) # 3 possible values
|
||||
map0[1,] <- c(1, 0, 0)
|
||||
|
@ -165,30 +232,28 @@ TestDecode <- function() {
|
|||
map0[5,] <- c(0, 0, 1) # 1st bit of the second cohort gets signal from v3
|
||||
|
||||
colnames(map0) <- c('v1', 'v2', 'v3')
|
||||
distribution0 <- setNames(c(1/2, 1/3, 1/6), colnames(map0))
|
||||
|
||||
# toy example
|
||||
distribution0 <- setNames(c(.5, .3, 1/6), colnames(map0))
|
||||
|
||||
noise0 <- list(p = 0, q = 1, f = 0) # no noise whatsoever
|
||||
# Even in the absence of noise, the inferred counts won't necessarily
|
||||
# match the ground truth. Must be close enough though.
|
||||
noise0 <- list(p = 0, q = 1, f = 0) # no noise whatsoever
|
||||
|
||||
# RunMultipleTests("Testing Decode (1/5)", TestDecodeHelper, 100,
|
||||
# c(report4x2, noise0), map0, partition0,
|
||||
# tolerance_l1 = 5,
|
||||
# tolerance_linf = 3)
|
||||
TestDecodeAveAndStds("Testing Decode (1/5)", TestDecodeHelper, 100,
|
||||
c(report4x2, noise0), map0, distribution0, 100,
|
||||
tolerance_l1 = 5,
|
||||
tolerance_linf = 3)
|
||||
|
||||
noise1 <- list(p = .4, q = .6, f = .5) # substantial noise
|
||||
RunMultipleTests("Testing Decode (2/5)", TestDecodeHelper, 100,
|
||||
c(report4x2, noise1), map0, partition0,
|
||||
tolerance_l1 = 20,
|
||||
tolerance_linf = 10)
|
||||
noise1 <- list(p = .4, q = .6, f = .5) # substantial noise, very few reports
|
||||
TestDecodeAveAndStds("Testing Decode (2/5)", TestDecodeHelper, 100,
|
||||
c(report4x2, noise1), map0, distribution0, 100,
|
||||
tolerance_l1 = 20,
|
||||
tolerance_linf = 20)
|
||||
|
||||
partition1 <- setNames(c(3, 2, 1) * 100000, colnames(map0)) # many reports
|
||||
RunMultipleTests("Testing Decode (3/5)", TestDecodeHelper, 100,
|
||||
c(report4x2, noise1), map0, partition1,
|
||||
tolerance_l1 = 50,
|
||||
tolerance_linf = 40)
|
||||
# substantial noise, many reports
|
||||
TestDecodeAveAndStds("Testing Decode (3/5)", TestDecodeHelper, 100,
|
||||
c(report4x2, noise1), map0, distribution0, 100000,
|
||||
tolerance_l1 = 50,
|
||||
tolerance_linf = 40)
|
||||
|
||||
# MEDIUM TEST: 100 values, 32 cohorts, 8 bits each, 10^6 reports
|
||||
values <- 100
|
||||
|
@ -199,12 +264,12 @@ TestDecode <- function() {
|
|||
|
||||
colnames(map1) <- sprintf("v%d", 1:values)
|
||||
|
||||
pdf <- ComputePdf("zipf1", values)
|
||||
partition1 <- setNames(RandomPartition(10^6, pdf), colnames(map1))
|
||||
RunMultipleTests("Testing Decode (4/5)", TestDecodeHelper, 100,
|
||||
c(report8x32, noise1), map1, partition1,
|
||||
distribution1 <- ComputePdf("zipf1", values)
|
||||
names(distribution1) <- colnames(map1)
|
||||
TestDecodeAveAndStds("Testing Decode (4/5)", TestDecodeHelper, 100,
|
||||
c(report8x32, noise1), map1, distribution1, 10^6,
|
||||
tolerance_l1 = values * 3,
|
||||
tolerance_linf = 50)
|
||||
tolerance_linf = 100)
|
||||
|
||||
# Testing LASSO: 500 values, 32 cohorts, 8 bits each, 10^6 reports
|
||||
values <- 500
|
||||
|
@ -215,10 +280,11 @@ TestDecode <- function() {
|
|||
|
||||
colnames(map2) <- sprintf("v%d", 1:values)
|
||||
|
||||
pdf <- ComputePdf("zipf1.5", values)
|
||||
partition2 <- setNames(RandomPartition(10^6, pdf), colnames(map2))
|
||||
RunMultipleTests("Testing Decode (5/5)", TestDecodeHelper, 1,
|
||||
c(report8x32, noise0), map2, partition2,
|
||||
distribution2 <- ComputePdf("zipf1.5", values)
|
||||
names(distribution2) <- colnames(map2)
|
||||
|
||||
TestDecodeAveAndStds("Testing Decode (5/5)", TestDecodeHelper, 1,
|
||||
c(report8x32, noise0), map2, distribution2, 10^6,
|
||||
tolerance_l1 = values * 3,
|
||||
tolerance_linf = 20)
|
||||
|
||||
|
@ -229,5 +295,4 @@ TestAll <- function() {
|
|||
TestDecode()
|
||||
}
|
||||
|
||||
|
||||
TestAll()
|
||||
TestAll()
|
|
@ -232,7 +232,7 @@ make-summary() {
|
|||
local dir=$1
|
||||
local filename=${2:-results.html}
|
||||
|
||||
tests/make_summary.py $dir > $dir/rows.html
|
||||
tests/make_summary.py $dir $dir/rows.html
|
||||
|
||||
pushd $dir >/dev/null
|
||||
|
||||
|
@ -240,6 +240,8 @@ make-summary() {
|
|||
| sed -e '/TABLE_ROWS/ r rows.html' \
|
||||
> $filename
|
||||
|
||||
rm rows.html
|
||||
|
||||
popd >/dev/null
|
||||
|
||||
log "Wrote $dir/$filename"
|
||||
|
|
|
@ -19,18 +19,19 @@ source('analysis/R/read_input.R')
|
|||
RandomPartition <- function(total, weights) {
|
||||
# Outputs a random partition according to a specified distribution
|
||||
# Args:
|
||||
# total - number of balls
|
||||
# weights - vector encoding the probability that a ball lands into a bin
|
||||
# total - number of samples
|
||||
# weights - weights that are proportional to the probability density
|
||||
# function of the target distribution
|
||||
# Returns:
|
||||
# an integer vector summing up to total
|
||||
# a histogram sampled according to the pdf
|
||||
# Example:
|
||||
# > RandomPartition(100, c(3, 2, 1, 0, 1))
|
||||
# [1] 47 24 15 0 14
|
||||
if (any(weights < 0))
|
||||
stop("Weights cannot be negative")
|
||||
stop("Probabilities cannot be negative")
|
||||
|
||||
if (sum(weights) == 0)
|
||||
stop("Weights cannot sum up to 0")
|
||||
stop("Probabilities cannot sum up to 0")
|
||||
|
||||
bins <- length(weights)
|
||||
result <- rep(0, bins)
|
||||
|
@ -59,6 +60,8 @@ RandomPartition <- function(total, weights) {
|
|||
w <- w - weights[i]
|
||||
}
|
||||
|
||||
names(result) <- names(weights)
|
||||
|
||||
return(result)
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#!/usr/bin/python
|
||||
"""Given a regtest result tree, prints an HTML summary on stdout.
|
||||
"""Given a regtest result tree, prints an HTML summary to a file.
|
||||
|
||||
See HTML skeleton in tests/regtest.html.
|
||||
"""
|
||||
|
@ -170,56 +170,64 @@ def ParseMetrics(metrics_file, log_file, num_additional):
|
|||
"""Processes the metrics file.
|
||||
|
||||
Args:
|
||||
report_dir: A directory name containing metrics.csv and log.txt.
|
||||
metrics_file: name of the metrics file
|
||||
log_file: name of the log.txt file
|
||||
num_additional: A number of bogus candidates added to the candidate list.
|
||||
|
||||
Returns a pair:
|
||||
- A dictionary of metrics (some can be []).
|
||||
- An HTML-formatted portion of the report row.
|
||||
"""
|
||||
with open(metrics_file) as m:
|
||||
m.readline()
|
||||
metrics_row = m.readline().split(',')
|
||||
|
||||
(num_actual, num_rappor, num_false_pos, num_false_neg, total_variation,
|
||||
allocated_mass) = metrics_row
|
||||
if not os.path.isfile(metrics_file):
|
||||
metrics_row_str = ['', '', '', '', '', '']
|
||||
metrics_row_dict = {}
|
||||
else:
|
||||
with open(metrics_file) as m:
|
||||
m.readline()
|
||||
metrics_row = m.readline().split(',')
|
||||
|
||||
num_actual = int(num_actual)
|
||||
num_rappor = int(num_rappor)
|
||||
(num_actual, num_rappor, num_false_pos, num_false_neg, total_variation,
|
||||
allocated_mass) = metrics_row
|
||||
|
||||
num_false_pos = int(num_false_pos)
|
||||
num_false_neg = int(num_false_neg)
|
||||
num_actual = int(num_actual)
|
||||
num_rappor = int(num_rappor)
|
||||
|
||||
total_variation = float(total_variation)
|
||||
allocated_mass = float(allocated_mass)
|
||||
num_false_pos = int(num_false_pos)
|
||||
num_false_neg = int(num_false_neg)
|
||||
|
||||
total_variation = float(total_variation)
|
||||
allocated_mass = float(allocated_mass)
|
||||
|
||||
# e.g. if there are 20 additional candidates added, and 1 false positive,
|
||||
# the false positive rate is 5%.
|
||||
fp_rate = float(num_false_pos) / num_additional if num_additional else 0
|
||||
# e.g. if there are 100 strings in the true input, and 80 strings
|
||||
# detected by RAPPOR, then we have 20 false negatives, and a false
|
||||
# negative rate of 20%.
|
||||
fn_rate = float(num_false_neg) / num_actual
|
||||
|
||||
metrics_row_str = [
|
||||
str(num_actual),
|
||||
str(num_rappor),
|
||||
'%.1f%% (%d)' % (fp_rate * 100, num_false_pos) if num_additional
|
||||
else '',
|
||||
'%.1f%% (%d)' % (fn_rate * 100, num_false_neg),
|
||||
'%.3f' % total_variation,
|
||||
'%.3f' % allocated_mass,
|
||||
]
|
||||
|
||||
metrics_row_dict = {
|
||||
'tv': [total_variation],
|
||||
'fpr': [fp_rate] if num_additional else [],
|
||||
'fnr': [fn_rate],
|
||||
'am': [allocated_mass],
|
||||
}
|
||||
|
||||
elapsed_time = ExtractTime(log_file)
|
||||
|
||||
# e.g. if there are 20 additional candidates added, and 1 false positive,
|
||||
# the false positive rate is 5%.
|
||||
fp_rate = float(num_false_pos) / num_additional if num_additional else 0
|
||||
# e.g. if there are 100 strings in the true input, and 80 strings
|
||||
# detected by RAPPOR, then we have 20 false negatives, and a false
|
||||
# negative rate of 20%.
|
||||
fn_rate = float(num_false_neg) / num_actual
|
||||
|
||||
metrics_row_str = [
|
||||
str(num_actual),
|
||||
str(num_rappor),
|
||||
'%.1f%% (%d)' % (fp_rate * 100, num_false_pos) if num_additional else '',
|
||||
'%.1f%% (%d)' % (fn_rate * 100, num_false_neg),
|
||||
'%.3f' % total_variation,
|
||||
'%.3f' % allocated_mass,
|
||||
'%.2f' % elapsed_time if elapsed_time is not None else '',
|
||||
]
|
||||
|
||||
metrics_row_dict = {
|
||||
'tv': [total_variation],
|
||||
'fpr': [fp_rate] if num_additional else [],
|
||||
'fnr': [fn_rate],
|
||||
'am': [allocated_mass],
|
||||
'time': [elapsed_time] if elapsed_time is not None else [],
|
||||
}
|
||||
if elapsed_time is not None:
|
||||
metrics_row_str = metrics_row_str + ['%.2f' % elapsed_time]
|
||||
metrics_row_dict['time'] = [elapsed_time]
|
||||
|
||||
# return metrics formatted as HTML table entries
|
||||
return (metrics_row_dict,
|
||||
|
@ -292,11 +300,15 @@ def FormatPlots(base_dir, test_instances):
|
|||
|
||||
def main(argv):
|
||||
base_dir = argv[1]
|
||||
output_file = open(argv[2], 'w')
|
||||
|
||||
# This file has the test case names, in the order that they should be
|
||||
# displayed.
|
||||
path = os.path.join(base_dir, 'test-instances.txt')
|
||||
with open(path) as f:
|
||||
instances_file = os.path.join(base_dir, 'test-instances.txt')
|
||||
if not os.path.isfile(instances_file):
|
||||
raise RuntimeError('{} is missing'.format(instances_file))
|
||||
|
||||
with open(instances_file) as f:
|
||||
test_instances = [line.strip() for line in f]
|
||||
|
||||
# Metrics are assembled into a dictionary of dictionaries. The top-level
|
||||
|
@ -314,6 +326,10 @@ def main(argv):
|
|||
# file. Instead, rows' names are links to the corresponding .png files.
|
||||
include_plots = len(test_instances) < 20
|
||||
|
||||
instances_succeeded = 0
|
||||
instances_failed = 0
|
||||
instances_running = 0
|
||||
|
||||
for instance in test_instances:
|
||||
# A test instance is idenfied by the test name and the test run.
|
||||
test_case, test_instance, _ = instance.split(' ')
|
||||
|
@ -334,33 +350,48 @@ def main(argv):
|
|||
cell1_html = FormatCell1(test_case, test_instance, metrics_file, log_file,
|
||||
plot_file, include_plots)
|
||||
|
||||
if os.path.isfile(metrics_file):
|
||||
# ParseMetrics outputs an HTML table row and also updates lists
|
||||
metrics_dict, metrics_html = ParseMetrics(metrics_file, log_file,
|
||||
num_additional)
|
||||
# ParseMetrics outputs an HTML table row and also updates lists
|
||||
metrics_dict, metrics_html = ParseMetrics(metrics_file, log_file,
|
||||
num_additional)
|
||||
|
||||
# Update the metrics structure. Initialize dictionaries if necessary.
|
||||
for m in metrics:
|
||||
# Update the metrics structure. Initialize dictionaries if necessary.
|
||||
for m in metrics:
|
||||
if m in metrics_dict:
|
||||
if not test_case in metrics[m]:
|
||||
metrics[m][test_case] = metrics_dict[m]
|
||||
else:
|
||||
metrics[m][test_case] += metrics_dict[m]
|
||||
|
||||
print '<tr>{}{}{}</tr>'.format(cell1_html, spec_html, metrics_html)
|
||||
print >>output_file, '<tr>{}{}{}</tr>'.format(cell1_html,
|
||||
spec_html, metrics_html)
|
||||
|
||||
print FormatSummaryRow(metrics)
|
||||
# Update counters
|
||||
if 'tv' in metrics_dict:
|
||||
instances_succeeded += 1
|
||||
else:
|
||||
if 'time' in metrics_dict:
|
||||
instances_failed += 1
|
||||
else:
|
||||
if os.path.isfile(log_file):
|
||||
instances_running += 1
|
||||
|
||||
print '</tbody>'
|
||||
print '</table>'
|
||||
print '<p style="padding-bottom: 3em"></p>' # vertical space
|
||||
print >>output_file, FormatSummaryRow(metrics)
|
||||
|
||||
print >>output_file, '</tbody>'
|
||||
print >>output_file, '</table>'
|
||||
print >>output_file, '<p style="padding-bottom: 3em"></p>' # vertical space
|
||||
|
||||
# Plot links.
|
||||
if include_plots:
|
||||
print FormatPlots(base_dir, test_instances)
|
||||
print >>output_file, FormatPlots(base_dir, test_instances)
|
||||
else:
|
||||
print ('<p>Too many tests to include plots. '
|
||||
'Click links within rows for details.</p>')
|
||||
print >>output_file, ('<p>Too many tests to include plots. '
|
||||
'Click links within rows for details.</p>')
|
||||
|
||||
print ('Instances'
|
||||
' succeeded: {} failed: {} running: {} total: {}'.
|
||||
format(instances_succeeded, instances_failed, instances_running,
|
||||
len(test_instances)))
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
|
|
Загрузка…
Ссылка в новой задаче