More accurate and stable estimation of standard error. Unit tests for new functionality. Added reporting of errors

to make_summary.py.
This commit is contained in:
Ilya Mironov 2015-05-04 19:09:56 -07:00
Родитель 4e45641bbc
Коммит f5632ba57f
5 изменённых файлов: 276 добавлений и 185 удалений

Просмотреть файл

@ -69,13 +69,6 @@ EstimateBloomCounts <- function(params, obs_counts) {
p_hats <- pmax(0, pmin(1, p_hats)) # clamp to [0,1]
r <- p_hats * p11 + (1 - p_hats) * p01 # expectation of a reported 1
N * r * (1 - r) / p2^2 # variance of the binomial
# using the formula for the random sum of random variables:
# var11 <- p_hats * N * p11 * (1 - p11) + p11^2 * p_hats * (1 - p_hats) * N
# var01 <- (1 - p_hats) * N * p01 * (1 - p01) +
# p01^2 * (1 - p_hats) * p_hats * N
# (var11 + var01) / p2^2
})
# Transform counts from absolute values to fractional, removing bias due to
@ -108,11 +101,10 @@ FitLasso <- function(X, Y, intercept = TRUE) {
# If fitting fails, return an empty data.frame.
if (class(mod)[1] == "try-error") {
coefs <- rep(0, ncol(X))
names(coefs) <- colnames(X)
coefs <- setNames(rep(0, ncol(X)), colnames(X))
} else {
coefs <- coef(mod)
coefs <- coefs[-1, ncol(coefs)]
coefs <- coefs[-1, ncol(coefs), drop = FALSE]
}
coefs
}
@ -135,34 +127,29 @@ PerformInference <- function(X, Y, N, mod, params, alpha, correction) {
betas <- matrix(mod$coefs, ncol = 1)
# This is what we want
# mod_var <- summary(mod$fit)$sigma^2
# betas_sd <- rep(sqrt(max(resid_var, mod_var) / (m * h)), length(betas))
# This is what we have
mod_var <- 0
betas_sd <- 1
z_values <- betas / betas_sd
# 1-sided t-test.
p_values <- pnorm(z_values, lower = FALSE)
# mod_var <- summary(mod$fit)$sigma^2
# betas_sd <- rep(sqrt(max(resid_var, mod_var) / (m * h)), length(betas))
#
# z_values <- betas / betas_sd
#
# # 1-sided t-test.
# p_values <- pnorm(z_values, lower = FALSE)
fit <- data.frame(String = colnames(X), Estimate = betas,
SD = betas_sd, z_stat = z_values, pvalue = p_values,
SD = mod$stds, # z_stat = z_values, pvalue = p_values,
stringsAsFactors = FALSE)
if (correction == "FDR") {
fit <- fit[order(fit$pvalue, decreasing = FALSE), ]
ind <- which(fit$pvalue < (1:nrow(fit)) * alpha / nrow(fit))
if (length(ind) > 0) {
fit <- fit[1:max(ind), ]
} else {
fit <- fit[numeric(0), ]
}
} else {
fit <- fit[fit$p < alpha, ]
}
# if (correction == "FDR") {
# fit <- fit[order(fit$pvalue, decreasing = FALSE), ]
# ind <- which(fit$pvalue < (1:nrow(fit)) * alpha / nrow(fit))
# if (length(ind) > 0) {
# fit <- fit[1:max(ind), ]
# } else {
# fit <- fit[numeric(0), ]
# }
# } else {
# fit <- fit[fit$p < alpha, ]
# }
fit <- fit[order(fit$Estimate, decreasing = TRUE), ]
@ -284,10 +271,11 @@ Decode <- function(counts, map, params, alpha = 0.05,
es <- EstimateBloomCounts(params, counts)
estimates_stds_filtered <- list(estimates = es$estimates[filter_cohorts,],
stds = es$stds[filter_cohorts,])
estimates_stds_filtered <-
list(estimates = es$estimates[filter_cohorts, , drop = FALSE],
stds = es$stds[filter_cohorts, , drop = FALSE])
coefs <- vector()
coefs_all <- vector()
for(r in 1:5)
{
@ -295,21 +283,24 @@ Decode <- function(counts, map, params, alpha = 0.05,
e <- Resample(estimates_stds_filtered)
else
e <- estimates_stds_filtered
coefs <- rbind(coefs, FitDistribution(e, map[filter_bits,]))
coefs_all <- rbind(coefs_all, FitDistribution(e, map[filter_bits,]))
}
coefs_ssd <- N * apply(coefs, 2, sd) # compute sample standard deviations
coefs <- N * apply(coefs, 2, median)
coefs_ssd <- N * apply(coefs_all, 2, sd) # compute sample standard deviations
coefs_ave <- N * apply(coefs_all, 2, mean)
coefs[coefs < coefs_ssd] <- 0 # zero out coefficients within ssd from 0
# Only select coefficients more than two standard deviations from 0. May
# exaggerate empirical SD of the estimates.
reported <- which(coefs_ave > 1E-6 + 2 * coefs_ssd)
mod <- list(coefs = coefs, resid = NULL) # a stub for now
mod <- list(coefs = coefs_ave[reported], stds = coefs_ssd[reported])
if (correction == "Bonferroni") {
alpha <- alpha / S
}
inf <- PerformInference(map[filter_bits,],
inf <- PerformInference(map[filter_bits,reported, drop = FALSE],
as.vector(t(estimates_stds_filtered$estimates)),
N, mod, params, alpha,
correction)
@ -319,12 +310,11 @@ Decode <- function(counts, map, params, alpha = 0.05,
if (sum(map) == sum(diag(map))) {
fit$Estimate <- colSums(counts)[-1]
}
resid <- mod$resid / inf$resid_sigma
# Estimates from the model are per instance so must be multipled by h.
# Standard errors are also adjusted.
fit$Total_Est <- floor(fit$Estimate)
fit$Total_SD <- floor(fit$SD * m)
fit$Total_SD <- floor(fit$SD)
fit$Prop <- fit$Total_Est / N
fit$LPB <- fit$Prop - 1.96 * fit$Total_SD / N
fit$UPB <- fit$Prop + 1.96 * fit$Total_SD / N
@ -351,7 +341,7 @@ Decode <- function(counts, map, params, alpha = 0.05,
list(fit = fit, summary = res_summary, privacy = privacy, params = params,
lasso = NULL, ests = as.vector(t(estimates_stds_filtered$estimates)),
counts = counts[, -1], resid = resid)
counts = counts[, -1], resid = NULL)
}
ComputeCounts <- function(reports, cohorts, params) {

Просмотреть файл

@ -12,12 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This library implements the RAPPOR marginal decoding algorithms using LASSO.
library(RUnit)
library(abind)
source('analysis/R/decode.R')
source('tests/gen_counts.R')
L1Distance <- function(X, Y) {
@ -39,61 +37,101 @@ LInfDistance <- function(X, Y) {
Y[!names(Y) %in% common])
}
MatrixVectorMerge <- function(mat, vec) {
# Attaches a vector to a matrix, matching corresponding column names
RunMultipleTests <- function(title, fun, repetitions, ...)
{
cat(title, ": ")
pb <- txtProgressBar(min = 0, max = repetitions,
width = getOption("width") - 20 - nchar(title))
mat_only <- setdiff(colnames(mat), names(vec))
vec_only <- setdiff(names(vec), colnames(mat))
for(i in 1:repetitions)
{
setTxtProgressBar(pb, i)
# extend the vector with missing columns
vec_long <- c(vec, setNames(rep(NA, length(mat_only)), mat_only))
fun(...)
# extend the matrix with missing columns
newcols <- matrix(NA, nrow = nrow(mat), ncol = length(vec_only))
colnames(newcols) <- vec_only
mat_long <- cbind(mat, newcols)
# Now vec and mat have the same columns, but in the wrong order. Sort the
# columns lexicographically.
if(length(vec_long) > 0) {
mat_long <- mat_long[, order(colnames(mat_long)), drop = FALSE]
vec_long <- vec_long[order(names(vec_long))]
}
cat(" Done.")
close(pb)
rbind(mat_long, vec_long)
}
TestEstimatesAndStdsHelper <- function(params, map, partition) {
RunMultipleTests <- function(title, fun, repetitions, ...) {
# Run a function with an annotated progress indicator
cat(title, ": ")
if(repetitions == 1) {
# only run once
fun(...)
cat(" Done.")
}
else { # run multiple times
pb <- txtProgressBar(min = 0, max = repetitions,
width = getOption("width") - 20 - nchar(title))
for(i in 1:repetitions) {
setTxtProgressBar(pb, i)
fun(...)
}
cat(" Done.")
close(pb)
}
}
TestEstimatesAndStdsHelper <- function(params, map, pdf, total) {
# Helper function for TestEstimateBloomCounts.
partition <- RandomPartition(total, pdf)
counts <- GenerateCounts(params, map, partition, 1)
e <- EstimateBloomCounts(params, counts)
results$estimates <<- abind(results$estimates, e$estimates, along = 3)
results$stds <<- abind(results$stds, e$stds, along = 3)
results$counts <<- abind(results$counts, counts, along = 3)
}
TestEstimatesAndStds <- function(repetitions, title,
params, map, partition, true_distr) {
v <- 1 # only handly one report per client
total <- sum(partition)
results <<- c(estimates = list(), stds = list(), counts = list())
TestEstimatesAndStds <- function(repetitions, title, params, map, pdf, total) {
# Checks that the expectations returned by EstimateBloomCounts on simulated
# inputs match the ground truth and the empirical standard deviation matches
# EstimateBloomCounts outputs.
#
# Input:
# repetitions: the number of runs ofEstimateBloomCounts
# title: label
# params: params vector
# map: the map table
# pdf: probability density function of the distribution from which simulated
# clients are sampled
# total: number of reports
results <<- c(estimates = list(), stds = list())
RunMultipleTests(title, TestEstimatesAndStdsHelper, repetitions,
params, map, partition)
params, map, pdf, total)
ave_e <- apply(results$estimates,1:2, mean)
observed_stds <- apply(results$estimates,1:2, sd)
ave_stds <- apply(results$stds,1:2, mean)
if(!is.null(true_distr))
checkTrue(!any((ave_e - true_distr) > (ave_stds / repetitions^.5) * 5),
ground_truth <- matrix(map %*% pdf, nrow = params$m, byrow = TRUE)
checkTrue(!any(abs(ave_e - ground_truth) > 1E-9 + # tolerance level
(ave_stds / repetitions^.5) * 5),
"Averages deviate too much from expectations.")
checkTrue(!any(observed_stds > ave_stds * 2),
"Expected standard deviations are too pessimistic.")
checkTrue(!any(observed_stds > ave_stds * (1 + 5 * repetitions^.5)),
"Expected standard deviations are too high")
checkTrue(!any(observed_stds < ave_stds / 2),
"Expected standard deviations are too optimistic")
checkTrue(!any(observed_stds < ave_stds * (1 - 5 * repetitions^.5)),
"Expected standard deviations are too low")
}
TestEstimateBloomCounts <- function() {
# Unit tests for the EstimateBloomCounts function.
report4x2 <- list(k = 4, m = 2) # 2 cohorts, 4 bits each
map0 <- Matrix(0, nrow = 8, ncol = 3, sparse = TRUE) # 3 possible values
map0[1,] <- c(1, 0, 0)
@ -104,19 +142,17 @@ TestEstimateBloomCounts <- function() {
colnames(map0) <- c('v1', 'v2', 'v3')
partition0 <- c(3, 2, 1) * 100
names(partition0) <- colnames(map0)
true_distr <- matrix(c(1/2, 1/3, 1/6, 1, 1/6, 0, 0, 0), 2, 4, byrow = TRUE)
pdf0 <- c(1/2, 1/3, 1/6)
names(pdf0) <- colnames(map0)
noise0 <- list(p = 0, q = 1, f = 0) # no noise at all
TestEstimatesAndStds(repetitions = 1000, "Testing estimates and stds (1/3)",
c(report4x2, noise0), map0, partition0, true_distr)
c(report4x2, noise0), map0, pdf0, 100)
noise1 <- list(p = 0.4, q = .6, f = 0.5)
TestEstimatesAndStds(repetitions = 1000, "Testing estimates and stds (2/3)",
c(report4x2, noise1), map0, partition0, true_distr)
c(report4x2, noise1), map0, pdf0, 100)
# MEDIUM TEST: 100 values, 32 cohorts, 8 bits each, 10^6 reports
values <- 100
@ -127,35 +163,66 @@ TestEstimateBloomCounts <- function() {
colnames(map1) <- sprintf("v%d", 1:values)
pdf <- ComputePdf("zipf1", values)
partition1 <- RandomPartition(10^9, pdf)
pdf1 <- ComputePdf("zipf1", values)
TestEstimatesAndStds(repetitions = 100, "Testing estimates and stds (3/3)",
c(report8x32, noise1), map1, partition1, NULL)
c(report8x32, noise1), map1, pdf1, 10^9)
}
TestDecodeHelper <- function(params, map, partition, tolerance_l1,
tolerance_linf) {
# Helper function for TestDecode.
TestDecodeHelper <- function(params, map, pdf, num_clients,
tolerance_l1, tolerance_linf) {
# Helper function for TestDecode. Simulates a RAPPOR run and checks results of
# Decode's output against the ground truth. Results are appended to a global
# list.
partition <- RandomPartition(num_clients, pdf)
counts <- GenerateCounts(params, map, partition, 1)
total <- sum(partition)
decoded <- Decode(counts, map, params)
l1 <- L1Distance(setNames(decoded$fit$estimate, decoded$fit$strings),
partition)
decoded_partition <- setNames(decoded$fit$estimate, decoded$fit$strings)
checkTrue(L1Distance(setNames(decoded$fit$estimate, decoded$fit$strings),
partition) < total^.5 * tolerance_l1,
results$estimates <<- MatrixVectorMerge(results$estimates, decoded_partition)
results$stds <<- MatrixVectorMerge(results$stds,
setNames(decoded$fit$std_dev,
decoded$fit$strings))
checkTrue(L1Distance(decoded_partition, partition) < total^.5 * tolerance_l1,
"L1 distance is too large")
checkTrue(LInfDistance(setNames(decoded$fit$estimate, decoded$fit$strings),
partition) < max(partition)^.5 * tolerance_linf,
"L_inf distance is too large")
checkTrue(LInfDistance(decoded_partition, partition) <
max(partition)^.5 * tolerance_linf, "L_inf distance is too large")
}
TestDecodeAveAndStds <- function(...) {
# Runs Decode multiple times (specified by the repetition argument), checks
# individuals runs against the ground truth, and the estimates of the standard
# error against empirical observations.
results <<- list(estimates = matrix(nrow = 0, ncol = 0),
stds = matrix(nrow = 0, ncol = 0))
RunMultipleTests(...)
empirical_stds <- apply(results$estimates, 2, sd, na.rm = TRUE)
estimated_stds <- apply(results$stds, 2, mean, na.rm = TRUE)
if(dim(results$estimates)[1] > 1)
{
checkTrue(any(estimated_stds > empirical_stds / 2),
"Our estimate for the standard deviation is too low")
checkTrue(any(estimated_stds < empirical_stds * 3),
"Our estimate for the standard deviation is too high")
}
}
TestDecode <- function() {
# Unit tests for the Decode function.
# TOY TESTS: three values, 2 cohorts, 4 bits each
report4x2 <- list(k = 4, m = 2, h = 2) # 2 cohorts, 4 bits each
map0 <- Matrix(0, nrow = 8, ncol = 3, sparse = TRUE) # 3 possible values
map0[1,] <- c(1, 0, 0)
@ -165,30 +232,28 @@ TestDecode <- function() {
map0[5,] <- c(0, 0, 1) # 1st bit of the second cohort gets signal from v3
colnames(map0) <- c('v1', 'v2', 'v3')
distribution0 <- setNames(c(1/2, 1/3, 1/6), colnames(map0))
# toy example
distribution0 <- setNames(c(.5, .3, 1/6), colnames(map0))
noise0 <- list(p = 0, q = 1, f = 0) # no noise whatsoever
# Even in the absence of noise, the inferred counts won't necessarily
# match the ground truth. Must be close enough though.
noise0 <- list(p = 0, q = 1, f = 0) # no noise whatsoever
# RunMultipleTests("Testing Decode (1/5)", TestDecodeHelper, 100,
# c(report4x2, noise0), map0, partition0,
# tolerance_l1 = 5,
# tolerance_linf = 3)
TestDecodeAveAndStds("Testing Decode (1/5)", TestDecodeHelper, 100,
c(report4x2, noise0), map0, distribution0, 100,
tolerance_l1 = 5,
tolerance_linf = 3)
noise1 <- list(p = .4, q = .6, f = .5) # substantial noise
RunMultipleTests("Testing Decode (2/5)", TestDecodeHelper, 100,
c(report4x2, noise1), map0, partition0,
tolerance_l1 = 20,
tolerance_linf = 10)
noise1 <- list(p = .4, q = .6, f = .5) # substantial noise, very few reports
TestDecodeAveAndStds("Testing Decode (2/5)", TestDecodeHelper, 100,
c(report4x2, noise1), map0, distribution0, 100,
tolerance_l1 = 20,
tolerance_linf = 20)
partition1 <- setNames(c(3, 2, 1) * 100000, colnames(map0)) # many reports
RunMultipleTests("Testing Decode (3/5)", TestDecodeHelper, 100,
c(report4x2, noise1), map0, partition1,
tolerance_l1 = 50,
tolerance_linf = 40)
# substantial noise, many reports
TestDecodeAveAndStds("Testing Decode (3/5)", TestDecodeHelper, 100,
c(report4x2, noise1), map0, distribution0, 100000,
tolerance_l1 = 50,
tolerance_linf = 40)
# MEDIUM TEST: 100 values, 32 cohorts, 8 bits each, 10^6 reports
values <- 100
@ -199,12 +264,12 @@ TestDecode <- function() {
colnames(map1) <- sprintf("v%d", 1:values)
pdf <- ComputePdf("zipf1", values)
partition1 <- setNames(RandomPartition(10^6, pdf), colnames(map1))
RunMultipleTests("Testing Decode (4/5)", TestDecodeHelper, 100,
c(report8x32, noise1), map1, partition1,
distribution1 <- ComputePdf("zipf1", values)
names(distribution1) <- colnames(map1)
TestDecodeAveAndStds("Testing Decode (4/5)", TestDecodeHelper, 100,
c(report8x32, noise1), map1, distribution1, 10^6,
tolerance_l1 = values * 3,
tolerance_linf = 50)
tolerance_linf = 100)
# Testing LASSO: 500 values, 32 cohorts, 8 bits each, 10^6 reports
values <- 500
@ -215,10 +280,11 @@ TestDecode <- function() {
colnames(map2) <- sprintf("v%d", 1:values)
pdf <- ComputePdf("zipf1.5", values)
partition2 <- setNames(RandomPartition(10^6, pdf), colnames(map2))
RunMultipleTests("Testing Decode (5/5)", TestDecodeHelper, 1,
c(report8x32, noise0), map2, partition2,
distribution2 <- ComputePdf("zipf1.5", values)
names(distribution2) <- colnames(map2)
TestDecodeAveAndStds("Testing Decode (5/5)", TestDecodeHelper, 1,
c(report8x32, noise0), map2, distribution2, 10^6,
tolerance_l1 = values * 3,
tolerance_linf = 20)
@ -229,5 +295,4 @@ TestAll <- function() {
TestDecode()
}
TestAll()
TestAll()

Просмотреть файл

@ -232,7 +232,7 @@ make-summary() {
local dir=$1
local filename=${2:-results.html}
tests/make_summary.py $dir > $dir/rows.html
tests/make_summary.py $dir $dir/rows.html
pushd $dir >/dev/null
@ -240,6 +240,8 @@ make-summary() {
| sed -e '/TABLE_ROWS/ r rows.html' \
> $filename
rm rows.html
popd >/dev/null
log "Wrote $dir/$filename"

Просмотреть файл

@ -19,18 +19,19 @@ source('analysis/R/read_input.R')
RandomPartition <- function(total, weights) {
# Outputs a random partition according to a specified distribution
# Args:
# total - number of balls
# weights - vector encoding the probability that a ball lands into a bin
# total - number of samples
# weights - weights that are proportional to the probability density
# function of the target distribution
# Returns:
# an integer vector summing up to total
# a histogram sampled according to the pdf
# Example:
# > RandomPartition(100, c(3, 2, 1, 0, 1))
# [1] 47 24 15 0 14
if (any(weights < 0))
stop("Weights cannot be negative")
stop("Probabilities cannot be negative")
if (sum(weights) == 0)
stop("Weights cannot sum up to 0")
stop("Probabilities cannot sum up to 0")
bins <- length(weights)
result <- rep(0, bins)
@ -59,6 +60,8 @@ RandomPartition <- function(total, weights) {
w <- w - weights[i]
}
names(result) <- names(weights)
return(result)
}

Просмотреть файл

@ -1,5 +1,5 @@
#!/usr/bin/python
"""Given a regtest result tree, prints an HTML summary on stdout.
"""Given a regtest result tree, prints an HTML summary to a file.
See HTML skeleton in tests/regtest.html.
"""
@ -170,56 +170,64 @@ def ParseMetrics(metrics_file, log_file, num_additional):
"""Processes the metrics file.
Args:
report_dir: A directory name containing metrics.csv and log.txt.
metrics_file: name of the metrics file
log_file: name of the log.txt file
num_additional: A number of bogus candidates added to the candidate list.
Returns a pair:
- A dictionary of metrics (some can be []).
- An HTML-formatted portion of the report row.
"""
with open(metrics_file) as m:
m.readline()
metrics_row = m.readline().split(',')
(num_actual, num_rappor, num_false_pos, num_false_neg, total_variation,
allocated_mass) = metrics_row
if not os.path.isfile(metrics_file):
metrics_row_str = ['', '', '', '', '', '']
metrics_row_dict = {}
else:
with open(metrics_file) as m:
m.readline()
metrics_row = m.readline().split(',')
num_actual = int(num_actual)
num_rappor = int(num_rappor)
(num_actual, num_rappor, num_false_pos, num_false_neg, total_variation,
allocated_mass) = metrics_row
num_false_pos = int(num_false_pos)
num_false_neg = int(num_false_neg)
num_actual = int(num_actual)
num_rappor = int(num_rappor)
total_variation = float(total_variation)
allocated_mass = float(allocated_mass)
num_false_pos = int(num_false_pos)
num_false_neg = int(num_false_neg)
total_variation = float(total_variation)
allocated_mass = float(allocated_mass)
# e.g. if there are 20 additional candidates added, and 1 false positive,
# the false positive rate is 5%.
fp_rate = float(num_false_pos) / num_additional if num_additional else 0
# e.g. if there are 100 strings in the true input, and 80 strings
# detected by RAPPOR, then we have 20 false negatives, and a false
# negative rate of 20%.
fn_rate = float(num_false_neg) / num_actual
metrics_row_str = [
str(num_actual),
str(num_rappor),
'%.1f%% (%d)' % (fp_rate * 100, num_false_pos) if num_additional
else '',
'%.1f%% (%d)' % (fn_rate * 100, num_false_neg),
'%.3f' % total_variation,
'%.3f' % allocated_mass,
]
metrics_row_dict = {
'tv': [total_variation],
'fpr': [fp_rate] if num_additional else [],
'fnr': [fn_rate],
'am': [allocated_mass],
}
elapsed_time = ExtractTime(log_file)
# e.g. if there are 20 additional candidates added, and 1 false positive,
# the false positive rate is 5%.
fp_rate = float(num_false_pos) / num_additional if num_additional else 0
# e.g. if there are 100 strings in the true input, and 80 strings
# detected by RAPPOR, then we have 20 false negatives, and a false
# negative rate of 20%.
fn_rate = float(num_false_neg) / num_actual
metrics_row_str = [
str(num_actual),
str(num_rappor),
'%.1f%% (%d)' % (fp_rate * 100, num_false_pos) if num_additional else '',
'%.1f%% (%d)' % (fn_rate * 100, num_false_neg),
'%.3f' % total_variation,
'%.3f' % allocated_mass,
'%.2f' % elapsed_time if elapsed_time is not None else '',
]
metrics_row_dict = {
'tv': [total_variation],
'fpr': [fp_rate] if num_additional else [],
'fnr': [fn_rate],
'am': [allocated_mass],
'time': [elapsed_time] if elapsed_time is not None else [],
}
if elapsed_time is not None:
metrics_row_str = metrics_row_str + ['%.2f' % elapsed_time]
metrics_row_dict['time'] = [elapsed_time]
# return metrics formatted as HTML table entries
return (metrics_row_dict,
@ -292,11 +300,15 @@ def FormatPlots(base_dir, test_instances):
def main(argv):
base_dir = argv[1]
output_file = open(argv[2], 'w')
# This file has the test case names, in the order that they should be
# displayed.
path = os.path.join(base_dir, 'test-instances.txt')
with open(path) as f:
instances_file = os.path.join(base_dir, 'test-instances.txt')
if not os.path.isfile(instances_file):
raise RuntimeError('{} is missing'.format(instances_file))
with open(instances_file) as f:
test_instances = [line.strip() for line in f]
# Metrics are assembled into a dictionary of dictionaries. The top-level
@ -314,6 +326,10 @@ def main(argv):
# file. Instead, rows' names are links to the corresponding .png files.
include_plots = len(test_instances) < 20
instances_succeeded = 0
instances_failed = 0
instances_running = 0
for instance in test_instances:
# A test instance is idenfied by the test name and the test run.
test_case, test_instance, _ = instance.split(' ')
@ -334,33 +350,48 @@ def main(argv):
cell1_html = FormatCell1(test_case, test_instance, metrics_file, log_file,
plot_file, include_plots)
if os.path.isfile(metrics_file):
# ParseMetrics outputs an HTML table row and also updates lists
metrics_dict, metrics_html = ParseMetrics(metrics_file, log_file,
num_additional)
# ParseMetrics outputs an HTML table row and also updates lists
metrics_dict, metrics_html = ParseMetrics(metrics_file, log_file,
num_additional)
# Update the metrics structure. Initialize dictionaries if necessary.
for m in metrics:
# Update the metrics structure. Initialize dictionaries if necessary.
for m in metrics:
if m in metrics_dict:
if not test_case in metrics[m]:
metrics[m][test_case] = metrics_dict[m]
else:
metrics[m][test_case] += metrics_dict[m]
print '<tr>{}{}{}</tr>'.format(cell1_html, spec_html, metrics_html)
print >>output_file, '<tr>{}{}{}</tr>'.format(cell1_html,
spec_html, metrics_html)
print FormatSummaryRow(metrics)
# Update counters
if 'tv' in metrics_dict:
instances_succeeded += 1
else:
if 'time' in metrics_dict:
instances_failed += 1
else:
if os.path.isfile(log_file):
instances_running += 1
print '</tbody>'
print '</table>'
print '<p style="padding-bottom: 3em"></p>' # vertical space
print >>output_file, FormatSummaryRow(metrics)
print >>output_file, '</tbody>'
print >>output_file, '</table>'
print >>output_file, '<p style="padding-bottom: 3em"></p>' # vertical space
# Plot links.
if include_plots:
print FormatPlots(base_dir, test_instances)
print >>output_file, FormatPlots(base_dir, test_instances)
else:
print ('<p>Too many tests to include plots. '
'Click links within rows for details.</p>')
print >>output_file, ('<p>Too many tests to include plots. '
'Click links within rows for details.</p>')
print ('Instances'
' succeeded: {} failed: {} running: {} total: {}'.
format(instances_succeeded, instances_failed, instances_running,
len(test_instances)))
if __name__ == '__main__':
try: