зеркало из https://github.com/mozilla/rappor.git
- Addressing reviewer's comments
- Adding functionality of generating multiple reports per client to the fast count mode
This commit is contained in:
Родитель
9a9d690223
Коммит
3eafbfb018
2
demo.sh
2
demo.sh
|
@ -52,7 +52,7 @@ build() {
|
|||
run() {
|
||||
# Run all the test cases that start with "demo-", and write to "report.html".
|
||||
# (The original demo.sh used "report.html", so we're not changing the name.)
|
||||
./regtest.sh run-seq '^demo-' report.html
|
||||
./regtest.sh run-seq '^demo' 1 F
|
||||
}
|
||||
|
||||
# TODO: Port these old bad cases to regtest_spec.py.
|
||||
|
|
327
regtest.sh
327
regtest.sh
|
@ -5,24 +5,31 @@
|
|||
# Usage:
|
||||
# ./regtest.sh <function name>
|
||||
|
||||
# Examples:
|
||||
#
|
||||
# $ export NUM_PROCS=20 # 12 by default
|
||||
# $ ./regtest.sh run-all # run all reg tests with 20 parallel processes
|
||||
#
|
||||
# At the end, it will print an HTML summary.
|
||||
|
||||
# To run a subset of tests or debug a specific test case, use the 'run-seq'
|
||||
# function:
|
||||
#
|
||||
# Three main functions are
|
||||
# run [[<pattern> [<num> [<fast>]] - run tests matching <pattern> in
|
||||
# parallel, each <num> times. The fast
|
||||
# mode (T/F) shortcuts generation of
|
||||
# reports.
|
||||
# run-seq [<pattern> [<num> [<fast>]] - ditto, except that tests are run
|
||||
# sequentially
|
||||
# run-all [<num>] - run all tests, in parallel, each <num> times
|
||||
#
|
||||
# $ ./regtest.sh run-seq demo-exp # Sequential run, matches 1 case
|
||||
# $ ./regtest.sh run-seq demo- # Sequential run, matches multiple cases
|
||||
# Examples:
|
||||
# $ ./regtest.sh run-seq unif-small-typical # Sequential run, matches 1 case
|
||||
# $ ./regtest.sh run-seq unif-small- 3 F # Sequential, each test is run three
|
||||
# times, using slow generation
|
||||
# $ ./regtest.sh run unif- # Parallel run, matches multiple cases
|
||||
# $ ./regtest.sh run unif- 5 # Parallel run, matches multiple cases, each test
|
||||
# is run 5 times
|
||||
# $ ./regtest.sh run-all # Run all tests once
|
||||
#
|
||||
# The first argument to run-seq is a regex in 'grep -E' format. (Detail: Don't
|
||||
# The <pattern> argument is a regex in 'grep -E' format. (Detail: Don't
|
||||
# use $ in the pattern, since it matches the whole spec line and not just the
|
||||
# test case name.)
|
||||
# test case name.) The number of processors used in a parallel run is one less
|
||||
# than the number of CPUs on the machine.
|
||||
|
||||
# The first argument to run-all is the number of repetitions of each test
|
||||
|
||||
# Future speedups:
|
||||
# - Reuse the same input -- come up with naming scheme based on params
|
||||
|
@ -42,11 +49,6 @@ readonly REGTEST_DIR=_tmp/regtest
|
|||
# All the Python tools need this
|
||||
export PYTHONPATH=$CLIENT_DIR
|
||||
|
||||
readonly NUM_SPEC_COLS=14
|
||||
|
||||
# TODO: Get num cpus
|
||||
readonly NUM_PROCS=${NUM_PROCS:-12}
|
||||
|
||||
print-true-inputs() {
|
||||
local num_unique_values=$1
|
||||
seq 1 $num_unique_values | awk '{print "v" $1}'
|
||||
|
@ -88,128 +90,102 @@ print-candidates() {
|
|||
}
|
||||
|
||||
# Generate a single test case, specified by a line of the test spec.
|
||||
# This is a helper function for 'run-all'.
|
||||
_generate-one-case() {
|
||||
local test_case_id=$1
|
||||
local test_case_run=$2
|
||||
# This is a helper function for _run_tests().
|
||||
_setup-one-case() {
|
||||
local test_case=$1
|
||||
|
||||
# input params
|
||||
local dist=$3
|
||||
local num_unique_values=$4
|
||||
local num_clients=$5
|
||||
local values_per_client=$6
|
||||
local dist=$2
|
||||
local num_unique_values=$3
|
||||
local num_clients=$4
|
||||
local values_per_client=$5
|
||||
|
||||
# RAPPOR params
|
||||
local num_bits=$7
|
||||
local num_hashes=$8
|
||||
local num_cohorts=$9
|
||||
local p=${10} # need curly braces to get 10th arg
|
||||
local q=${11}
|
||||
local f=${12}
|
||||
local num_bits=$6
|
||||
local num_hashes=$7
|
||||
local num_cohorts=$8
|
||||
local p=$9
|
||||
local q=${10} # need curly braces to get the 10th arg
|
||||
local f=${11}
|
||||
|
||||
# map params
|
||||
local num_additional=${13}
|
||||
local to_remove=${14}
|
||||
local num_additional=${12}
|
||||
local to_remove=${13}
|
||||
|
||||
# NOTE: NUM_SPEC_COLS == 14
|
||||
banner 'Setting up parameters and candidate files for '$test_case
|
||||
|
||||
# proceed only for the first instance out of (possibly) many
|
||||
if test $test_case_run = 1; then
|
||||
banner 'Setting up parameters and candidate files for '$test_case_id
|
||||
local case_dir=$REGTEST_DIR/$test_case
|
||||
mkdir --verbose -p $case_dir
|
||||
|
||||
local case_dir=$REGTEST_DIR/$test_case_id
|
||||
mkdir --verbose -p $case_dir
|
||||
# Save the "spec"
|
||||
echo "$@" > $case_dir/spec.txt
|
||||
|
||||
# Save the "spec" for showing in the summary.
|
||||
echo "$@" > $case_dir/spec.txt
|
||||
local params_path=$case_dir/case_params.csv
|
||||
|
||||
local params_path=$case_dir/case_params.csv
|
||||
echo 'k,h,m,p,q,f' > $params_path
|
||||
echo "$num_bits,$num_hashes,$num_cohorts,$p,$q,$f" >> $params_path
|
||||
|
||||
echo 'k,h,m,p,q,f' > $params_path
|
||||
echo "$num_bits,$num_hashes,$num_cohorts,$p,$q,$f" >> $params_path
|
||||
print-true-inputs $num_unique_values > $case_dir/case_true_inputs.txt
|
||||
|
||||
print-true-inputs $num_unique_values > $case_dir/case_true_inputs.txt
|
||||
local true_map_path=$case_dir/case_true_map.csv
|
||||
|
||||
local true_map_path=$case_dir/case_true_map.csv
|
||||
analysis/tools/hash_candidates.py \
|
||||
$params_path \
|
||||
< $case_dir/case_true_inputs.txt \
|
||||
> $true_map_path
|
||||
|
||||
analysis/tools/hash_candidates.py \
|
||||
$params_path \
|
||||
< $case_dir/case_true_inputs.txt \
|
||||
> $true_map_path
|
||||
# banner "Constructing candidates"
|
||||
|
||||
# banner "Constructing candidates"
|
||||
print-candidates \
|
||||
$case_dir/case_true_inputs.txt $num_unique_values \
|
||||
$num_additional "$to_remove" \
|
||||
> $case_dir/case_candidates.txt
|
||||
|
||||
# Reuse demo.sh function
|
||||
print-candidates \
|
||||
$case_dir/case_true_inputs.txt $num_unique_values \
|
||||
$num_additional "$to_remove" \
|
||||
> $case_dir/case_candidates.txt
|
||||
# banner "Hashing candidates to get 'map'"
|
||||
|
||||
# banner "Hashing candidates to get 'map'"
|
||||
|
||||
analysis/tools/hash_candidates.py \
|
||||
$case_dir/case_params.csv \
|
||||
< $case_dir/case_candidates.txt \
|
||||
> $case_dir/case_map.csv
|
||||
fi
|
||||
analysis/tools/hash_candidates.py \
|
||||
$case_dir/case_params.csv \
|
||||
< $case_dir/case_candidates.txt \
|
||||
> $case_dir/case_map.csv
|
||||
}
|
||||
|
||||
# Run a single test instance, specified by a line of the test spec.
|
||||
# This is a helper function for 'run-all'.
|
||||
# Run a single test instance, specified by <test_name, instance_num>.
|
||||
# This is a helper function for _run_tests().
|
||||
_run-one-instance() {
|
||||
local test_case_id=$1
|
||||
local test_case_run=$2
|
||||
local test_case=$1
|
||||
local test_instance=$2
|
||||
local fast_counts=$3
|
||||
|
||||
# input params
|
||||
local dist=$3
|
||||
local num_unique_values=$4
|
||||
local num_clients=$5
|
||||
local values_per_client=$6
|
||||
local case_dir=$REGTEST_DIR/$test_case
|
||||
|
||||
read -r case_name distr num_unique_values num_clients \
|
||||
values_per_client num_bits num_hashes num_cohorts p q f num_additional \
|
||||
to_remove < $case_dir/spec.txt
|
||||
|
||||
# RAPPOR params
|
||||
local num_bits=$7
|
||||
local num_hashes=$8
|
||||
local num_cohorts=$9
|
||||
local p=${10} # need curly braces to get 10th arg
|
||||
local q=${11}
|
||||
local f=${12}
|
||||
|
||||
# map params
|
||||
local num_additional=${13}
|
||||
local to_remove=${14}
|
||||
|
||||
# NOTE: NUM_SPEC_COLS == 14
|
||||
|
||||
local case_dir=$REGTEST_DIR/$test_case_id
|
||||
|
||||
local instance_dir=$REGTEST_DIR/$test_case_id/$test_case_run
|
||||
local instance_dir=$REGTEST_DIR/$test_case/$test_instance
|
||||
mkdir --verbose -p $instance_dir
|
||||
|
||||
local fast_counts=T
|
||||
|
||||
if test $fast_counts = T; then
|
||||
local params_path=$case_dir/case_params.csv
|
||||
local true_map_path=$case_dir/case_true_map.csv
|
||||
|
||||
local num_reports=$(expr $num_clients \* $values_per_client)
|
||||
local params_file=$case_dir/case_params.csv
|
||||
local true_map_file=$case_dir/case_true_map.csv
|
||||
|
||||
banner "Using gen_counts.R"
|
||||
tests/gen_counts.R $params_path $true_map_path $dist $num_reports \
|
||||
"$instance_dir/case"
|
||||
|
||||
echo tests/gen_counts.R $distr $num_clients $values_per_client $params_file \
|
||||
$true_map_file "$instance_dir/case"
|
||||
|
||||
|
||||
tests/gen_counts.R $distr $num_clients $values_per_client $params_file \
|
||||
$true_map_file "$instance_dir/case"
|
||||
else
|
||||
banner "Generating input"
|
||||
|
||||
tests/gen_sim_input.py \
|
||||
-d $dist \
|
||||
-c $num_clients \
|
||||
-u $num_unique_values \
|
||||
-v $values_per_client \
|
||||
> $instance_dir/case.csv
|
||||
tests/gen_reports.R $distr $num_unique_values $num_clients \
|
||||
$values_per_client $instance_dir/case.csv
|
||||
|
||||
banner "Running RAPPOR client"
|
||||
|
||||
# Writes encoded "out" file, true histogram, true inputs, params CSV and JSON
|
||||
# to $case_dir.
|
||||
# Writes encoded "out" file, true histogram, true inputs to $instance_dir.
|
||||
tests/rappor_sim.py \
|
||||
--num-bits $num_bits \
|
||||
--num-hashes $num_hashes \
|
||||
|
@ -231,10 +207,14 @@ _run-one-instance() {
|
|||
local out_dir=${instance_dir}_report
|
||||
mkdir --verbose -p $out_dir
|
||||
|
||||
# Currently, the summary file shows and aggregates timing of the inference
|
||||
# engine, which excludes R's loading time and reading of the (possibly
|
||||
# substantial) map file. Timing below is more inclusive.
|
||||
TIMEFORMAT='Running analyze.R took %R seconds'
|
||||
time {
|
||||
# Input prefix, output dir
|
||||
tests/analyze.R -t "Test case: $test_case_id (instance $test_case_run)" "$case_dir/case" "$instance_dir/case" $out_dir
|
||||
tests/analyze.R -t "Test case: $test_case (instance $test_instance)" \
|
||||
"$case_dir/case" "$instance_dir/case" $out_dir
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -247,8 +227,9 @@ _run-one-instance-logged() {
|
|||
mkdir --verbose -p $log_dir
|
||||
|
||||
log "Started '$test_case_id' (instance $test_case_run) -- logging to $log_dir/log.txt"
|
||||
_run-one-instance "$@" >$log_dir/log.txt 2>&1
|
||||
log "Test case $test_case_id (instance $test_case_run) done"
|
||||
_run-one-instance "$@" >$log_dir/log.txt 2>&1 \
|
||||
&& log "Test case $test_case_id (instance $test_case_run) done" \
|
||||
|| log "Test case $test_case_id (instance $test_case_run) failed"
|
||||
}
|
||||
|
||||
show-help() {
|
||||
|
@ -274,11 +255,6 @@ make-summary() {
|
|||
log "URL: file://$PWD/$dir/$filename"
|
||||
}
|
||||
|
||||
# Helper to parse spec input with xargs
|
||||
multi() {
|
||||
xargs -n $NUM_SPEC_COLS --no-run-if-empty --verbose "$@"
|
||||
}
|
||||
|
||||
test-error() {
|
||||
local spec_regex=${1:-}
|
||||
log "Some test cases failed"
|
||||
|
@ -289,76 +265,95 @@ test-error() {
|
|||
# exit 1
|
||||
}
|
||||
|
||||
# Assuming the spec file, write a list of test case names (first column). This
|
||||
# is read by make_summary.py.
|
||||
write-test-cases() {
|
||||
cut -d ' ' -f 1,2 $REGTEST_DIR/spec-list.txt > $REGTEST_DIR/test-cases.txt
|
||||
# Assuming the spec file, write a list of test case names (first column) with
|
||||
# the instance ids (second column), where instance ids run from 1 to $1.
|
||||
# Third column is fast_counts (T/F).
|
||||
_setup-test-instances() {
|
||||
local instances=$1
|
||||
local fast_counts=$2
|
||||
|
||||
while read line; do
|
||||
for ((i=1; i<=$instances; i++))
|
||||
do
|
||||
read case_name _ <<< $line # extract the first token
|
||||
echo $case_name $i $fast_counts
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
# run-all should take regex?
|
||||
run-seq() {
|
||||
# Args:
|
||||
# regexp: A pattern selecting the subset of tests to run
|
||||
# instances: A number of times each test case is run
|
||||
# parallel: Whether the tests are run in parallel (T/F)
|
||||
# fast_counts: Whether counts are sampled directly (T/F)
|
||||
#
|
||||
_run-tests() {
|
||||
local spec_regex=$1 # grep -E format on the spec
|
||||
local html_filename=${2:-results.html} # demo.sh changes it to demo.sh
|
||||
local instances=$2
|
||||
local parallel=$3
|
||||
local fast_counts=$4
|
||||
|
||||
rm -r --verbose $REGTEST_DIR
|
||||
|
||||
mkdir --verbose -p $REGTEST_DIR
|
||||
|
||||
local spec_list=$REGTEST_DIR/spec-list.txt
|
||||
tests/regtest_spec.py | grep -E $spec_regex > $spec_list
|
||||
|
||||
write-test-cases
|
||||
|
||||
# Generate parameters for all test cases.
|
||||
cat $spec_list \
|
||||
| multi -- $0 _generate-one-case || test-error
|
||||
|
||||
cat $spec_list \
|
||||
| multi -- $0 _run-one-instance || test-error $spec_regex
|
||||
|
||||
log "Done running all test cases"
|
||||
|
||||
make-summary $REGTEST_DIR $html_filename
|
||||
}
|
||||
|
||||
run-all() {
|
||||
# Number of iterations of each test.
|
||||
local repetitions=${1:-1}
|
||||
|
||||
# Limit it to this number of test cases. By default we run all of them.
|
||||
local max_cases=${2:-1000000}
|
||||
local verbose=${3:-F}
|
||||
|
||||
mkdir --verbose -p $REGTEST_DIR
|
||||
# Print the spec
|
||||
#
|
||||
# -n3 has to match the number of arguments in the spec.
|
||||
|
||||
#local func=_run-one-case-logged
|
||||
local func
|
||||
if test $verbose = T; then
|
||||
func=_run-one-instance # parallel process output mixed on the console
|
||||
local processors=1
|
||||
|
||||
if test $parallel = F; then
|
||||
func=_run-one-instance # output to the console
|
||||
else
|
||||
func=_run-one-instance-logged # one line
|
||||
func=_run-one-instance-logged
|
||||
processors=$(grep -c ^processor /proc/cpuinfo)
|
||||
processors=$(expr $processors - 1)
|
||||
log "Running $processors parallel processes"
|
||||
fi
|
||||
|
||||
log "Using $NUM_PROCS parallel processes"
|
||||
|
||||
local spec_list=$REGTEST_DIR/spec-list.txt
|
||||
tests/regtest_spec.py -r $repetitions > $spec_list
|
||||
|
||||
write-test-cases
|
||||
local cases_list=$REGTEST_DIR/test-cases.txt
|
||||
tests/regtest_spec.py | grep -E $spec_regex > $cases_list
|
||||
|
||||
# Generate parameters for all test cases.
|
||||
head -n $max_cases $spec_list \
|
||||
| multi -P $NUM_PROCS -- $0 _generate-one-case || test-error
|
||||
cat $cases_list \
|
||||
| xargs -l -P $processors -- $0 _setup-one-case \
|
||||
|| test-error
|
||||
|
||||
log "Done generating parameters for all test cases"
|
||||
|
||||
head -n $max_cases $spec_list \
|
||||
| multi -P $NUM_PROCS -- $0 $func || test-error
|
||||
local instances_list=$REGTEST_DIR/test-instances.txt
|
||||
_setup-test-instances $instances $fast_counts < $cases_list > $instances_list
|
||||
|
||||
log "Done running all test cases"
|
||||
cat $instances_list \
|
||||
| xargs -l -P $processors -- $0 $func || test-error
|
||||
|
||||
log "Done running all test instances"
|
||||
|
||||
make-summary $REGTEST_DIR
|
||||
}
|
||||
|
||||
# Run tests sequentially
|
||||
run-seq() {
|
||||
local spec_regex=${1:-'^r-'} # grep -E format on the spec
|
||||
local instances=${2:-1}
|
||||
local fast_counts=${3:-T}
|
||||
|
||||
_run-tests $spec_regex $instances F $fast_counts
|
||||
}
|
||||
|
||||
# Run tests in parallel
|
||||
run() {
|
||||
local spec_regex=${1:-'^r-'} # grep -E format on the spec
|
||||
local instances=${2:-1}
|
||||
local fast_counts=${3:-T}
|
||||
|
||||
_run-tests $spec_regex $instances T $fast_counts
|
||||
}
|
||||
|
||||
# Run tests in parallel
|
||||
run-all() {
|
||||
local instances=${1:-1}
|
||||
|
||||
log "Running all tests. Can take a while."
|
||||
_run-tests '^r-' $instances T T
|
||||
}
|
||||
|
||||
"$@"
|
||||
|
|
|
@ -59,24 +59,33 @@ Log <- function(...) {
|
|||
cat('\n')
|
||||
}
|
||||
|
||||
LoadInputs <- function(prefix_params, prefix_counts, ctx) {
|
||||
# prefix: path prefix, e.g. '_tmp/exp'
|
||||
p <- paste0(prefix_params, '_params.csv')
|
||||
c <- paste0(prefix_counts, '_counts.csv')
|
||||
m <- paste0(prefix_params, '_map.csv')
|
||||
h <- paste0(prefix_counts, '_hist.csv')
|
||||
LoadInputs <- function(prefix_case, prefix_instance, ctx) {
|
||||
# prefix_case: path prefix to the test case, e.g. '_tmp/exp'
|
||||
# prefix_instance: path prefix to the test instance, e.g., '_tmp/exp/1'
|
||||
p <- paste0(prefix_case, '_params.csv')
|
||||
m <- paste0(prefix_case, '_map.csv')
|
||||
|
||||
c <- paste0(prefix_instance, '_counts.csv')
|
||||
h <- paste0(prefix_instance, '_hist.csv')
|
||||
|
||||
params <- ReadParameterFile(p)
|
||||
counts <- ReadCountsFile(c)
|
||||
map <- ReadMapFile(m)
|
||||
|
||||
# Calls AnalyzeRAPPOR to run the analysis code
|
||||
rappor <- AnalyzeRAPPOR(params, counts, map$map, "FDR", 0.05,
|
||||
timing <- system.time({
|
||||
# Calls AnalyzeRAPPOR to run the analysis code
|
||||
rappor <- AnalyzeRAPPOR(params, counts, map$map, "FDR", 0.05,
|
||||
date="01/01/01", date_num="100001")
|
||||
})
|
||||
|
||||
if (is.null(rappor)) {
|
||||
stop("RAPPOR analysis failed.")
|
||||
}
|
||||
|
||||
|
||||
# The line is searched for, and elapsed time is extracted, by make_summary.py.
|
||||
# Should the formating or wording change, make_summary must be updated too.
|
||||
Log(c("Inference took", timing["elapsed"], "seconds"))
|
||||
|
||||
Log("Analysis Results:")
|
||||
str(rappor)
|
||||
|
||||
|
@ -195,8 +204,8 @@ main <- function(parsed) {
|
|||
args <- parsed$args
|
||||
options <- parsed$options
|
||||
|
||||
input_params_prefix <- args[[1]]
|
||||
input_counts_prefix <- args[[2]]
|
||||
input_case_prefix <- args[[1]]
|
||||
input_instance_prefix <- args[[2]]
|
||||
output_dir <- args[[3]]
|
||||
|
||||
# increase ggplot font size globally
|
||||
|
@ -207,7 +216,7 @@ main <- function(parsed) {
|
|||
# NOTE: It takes more than 2000+ ms to get here, while the analysis only
|
||||
# takes 500 ms or so (as measured by system.time).
|
||||
|
||||
LoadInputs(input_params_prefix, input_counts_prefix, ctx)
|
||||
LoadInputs(input_case_prefix, input_instance_prefix, ctx)
|
||||
d <- ProcessAll(ctx)
|
||||
p <- PlotAll(d$plot_data, options$title)
|
||||
|
||||
|
|
|
@ -16,8 +16,6 @@
|
|||
|
||||
source('analysis/R/read_input.R')
|
||||
|
||||
library(zipfR)
|
||||
|
||||
RandomPartition <- function(total, weights) {
|
||||
# Outputs a random partition according to a specified distribution
|
||||
# Args:
|
||||
|
@ -64,13 +62,13 @@ RandomPartition <- function(total, weights) {
|
|||
return(result)
|
||||
}
|
||||
|
||||
GenerateCounts <- function(params, true_map, partition) {
|
||||
GenerateCounts <- function(params, true_map, partition, reports_per_client) {
|
||||
# Fast simulation of the marginal table for RAPPOR reports
|
||||
# Args:
|
||||
# params - parameters of the RAPPOR reporting process
|
||||
# total - number of reports
|
||||
# true_map - hashed true inputs
|
||||
# weights - vector encoding the probability that a ball lands into a bin
|
||||
# partition - allocation of clients between true values
|
||||
# reports_per_client - number of reports (IRRs) per client
|
||||
if (nrow(true_map$map) != (params$m * params$k)) {
|
||||
stop(cat("Map does not match the params file!",
|
||||
"mk =", params$m * params$k,
|
||||
|
@ -87,96 +85,115 @@ GenerateCounts <- function(params, true_map, partition) {
|
|||
# Expands to (m x k) x strs matrix, where each element (corresponding to the
|
||||
# bit in the aggregate Bloom filter) is repeated k times.
|
||||
expanded <- apply(cohorts, 2, function(vec) rep(vec, each = params$k))
|
||||
|
||||
# Computes the number of bits set to one BEFORE privacy-preserving transform.
|
||||
counts_ones <- apply(expanded * true_map$map, 1, sum)
|
||||
|
||||
# Computes the number of bits set to zero BEFORE privacy-preserving transform.
|
||||
counts_zeros <- rep(apply(cohorts, 1, sum), each = params$k) - counts_ones
|
||||
|
||||
p <- params$p
|
||||
q <- params$q
|
||||
f <- params$f
|
||||
|
||||
# probability that a true 1 is reported as "1"
|
||||
pstar <- (1 - f / 2) * q + (f / 2) * p
|
||||
# probability that a true 0 is reported as "1"
|
||||
qstar <- (1 - f / 2) * p + (f / 2) * q
|
||||
# For each bit, the number of clients reporting this bit:
|
||||
clients_per_bit <- rep(apply(cohorts, 1, sum), each = params$k)
|
||||
|
||||
reported_ones <-
|
||||
unlist(lapply(counts_ones,
|
||||
function(x) rbinom(n = 1, size = x, prob = pstar))) +
|
||||
unlist(lapply(counts_zeros,
|
||||
function(x) rbinom(n = 1, size = x, prob = qstar)))
|
||||
# Computes the true number of bits set to one BEFORE PRR.
|
||||
true_ones <- apply(expanded * true_map$map, 1, sum)
|
||||
|
||||
ones_in_prr <-
|
||||
unlist(lapply(true_ones,
|
||||
function(x) rbinom(n = 1, size = x, prob = 1 - params$f / 2))) +
|
||||
unlist(lapply(clients_per_bit - true_ones, # clients where the bit is 0
|
||||
function(x) rbinom(n = 1, size = x, prob = params$f / 2)))
|
||||
|
||||
counts <- cbind(apply(cohorts, 1, sum),
|
||||
matrix(reported_ones, nrow = params$m, ncol = params$k, byrow = TRUE))
|
||||
# Number of IRRs where each bit is reported (either as 0 or as 1)
|
||||
reports_per_bit <- clients_per_bit * reports_per_client
|
||||
|
||||
ones_before_irr <- ones_in_prr * reports_per_client
|
||||
|
||||
ones_after_irr <-
|
||||
unlist(lapply(ones_before_irr,
|
||||
function(x) rbinom(n = 1, size = x, prob = params$q))) +
|
||||
unlist(lapply(reports_per_bit - ones_before_irr,
|
||||
function(x) rbinom(n = 1, size = x, prob = params$p)))
|
||||
|
||||
counts <- cbind(apply(cohorts, 1, sum) * reports_per_client,
|
||||
matrix(ones_after_irr, nrow = params$m, ncol = params$k, byrow = TRUE))
|
||||
|
||||
if(any(is.na(counts)))
|
||||
stop("Failed to generate bit counts. Likely due to integer overflow.")
|
||||
|
||||
counts
|
||||
}
|
||||
|
||||
ComputePdf <- function(distr, range) {
|
||||
# Outputs discrete probability density function for a given distribution
|
||||
|
||||
# These are the five distributions in gen_sim_input.py
|
||||
if (distr == 'exp') {
|
||||
pdf <- dexp(1:range, rate = 5 / range)
|
||||
} else if (distr == 'gauss') {
|
||||
half <- range / 2
|
||||
left <- -half + 1
|
||||
pdf <- dnorm(left : half, sd = range / 6)
|
||||
} else if (distr == 'unif') {
|
||||
# e.g. for N = 4, weights are [0.25, 0.25, 0.25, 0.25]
|
||||
pdf <- dunif(1:range, max = range)
|
||||
} else if (distr == 'zipf1') {
|
||||
# Since the distrubition defined over a finite set, we allow the parameter
|
||||
# of the Zipf distribution to be 1.
|
||||
pdf <- sapply(1:range, function(x) 1/x)
|
||||
} else if (distr == 'zipf1.5') {
|
||||
pdf <- sapply(1:range, function(x) 1/x^1.5)
|
||||
}
|
||||
else {
|
||||
stop(sprintf("Invalid distribution '%s'", distr))
|
||||
}
|
||||
|
||||
pdf <- pdf / sum(pdf) # normalize
|
||||
|
||||
pdf
|
||||
}
|
||||
|
||||
# Usage:
|
||||
#
|
||||
# $ ./gen_counts.R foo_params.csv foo_true_map.csv exp 10000 \
|
||||
# foo_counts.csv
|
||||
# $ ./gen_counts.R exp 10000 1 foo_params.csv foo_true_map.csv foo
|
||||
#
|
||||
# 4 inputs and 1 output.
|
||||
# Inputs:
|
||||
# distribution name
|
||||
# number of clients
|
||||
# reports per client
|
||||
# parameters file
|
||||
# map file
|
||||
# prefix for output files
|
||||
# Outputs:
|
||||
# foo_counts.csv
|
||||
# foo_hist.csv
|
||||
#
|
||||
# Warning: the number of reports in any cohort must be less than
|
||||
# .Machine$integer.max
|
||||
|
||||
main <- function(argv) {
|
||||
params_file <- argv[[1]]
|
||||
true_map_file <- argv[[2]]
|
||||
dist <- argv[[3]]
|
||||
num_reports <- as.integer(argv[[4]])
|
||||
out_prefix <- argv[[5]]
|
||||
distr <- argv[[1]]
|
||||
num_clients <- as.integer(argv[[2]])
|
||||
reports_per_client <- as.integer(argv[[3]])
|
||||
params_file <- argv[[4]]
|
||||
true_map_file <- argv[[5]]
|
||||
out_prefix <- argv[[6]]
|
||||
|
||||
params <- ReadParameterFile(params_file)
|
||||
|
||||
true_map <- ReadMapFile(true_map_file)
|
||||
# print(true_map$strs)
|
||||
|
||||
num_unique_values <- length(true_map$strs)
|
||||
|
||||
# These are the four distributions in gen_sim_input.py
|
||||
if (dist == 'exp') {
|
||||
# NOTE: gen_sim_input.py hard-codes lambda = N/5 for 'exp'
|
||||
weights <- dexp(1:num_unique_values, rate = 5 / num_unique_values)
|
||||
} else if (dist == 'gauss') {
|
||||
# NOTE: gen_sim_input.py hard-codes stddev = N/6 for 'exp'
|
||||
half <- num_unique_values / 2
|
||||
left <- -half + 1
|
||||
weights <- dnorm(left : half, sd = num_unique_values / 6)
|
||||
} else if (dist == 'unif') {
|
||||
# e.g. for N = 4, weights are [0.25, 0.25, 0.25, 0.25]
|
||||
weights <- dunif(1:num_unique_values, max = num_unique_values)
|
||||
} else if (dist == 'zipf1') {
|
||||
# Since the distrubition defined over a finite set, we allow the parameter
|
||||
# of the Zipf distribution to be 1.
|
||||
weights <- sapply(1:num_unique_values, function(x) 1/x)
|
||||
} else if (dist == 'zipf1.5') {
|
||||
weights <- sapply(1:num_unique_values, function(x) 1/x^1.5)
|
||||
}
|
||||
else {
|
||||
stop(sprintf("Invalid distribution '%s'", dist))
|
||||
}
|
||||
print("weights")
|
||||
print(weights)
|
||||
pdf <- ComputePdf(distr, num_unique_values)
|
||||
|
||||
if (length(true_map$strs) != length(weights)) {
|
||||
stop(cat("Dimensions of weights do not match:",
|
||||
"m =", length(true_map$strs), "weights col:", length(weights),
|
||||
sep = " "))
|
||||
}
|
||||
print("Distribution")
|
||||
print(pdf)
|
||||
|
||||
# Computes the number of clients reporting each string
|
||||
# according to the pre-specified distribution.
|
||||
partition <- RandomPartition(num_reports, weights)
|
||||
partition <- RandomPartition(num_clients, pdf)
|
||||
print('PARTITION')
|
||||
print(partition)
|
||||
|
||||
# Histogram
|
||||
true_hist <- data.frame(string = true_map$strs, count = partition)
|
||||
|
||||
counts <- GenerateCounts(params, true_map, partition)
|
||||
counts <- GenerateCounts(params, true_map, partition, reports_per_client)
|
||||
|
||||
# Now create a CSV file
|
||||
|
||||
|
|
|
@ -15,9 +15,67 @@
|
|||
# limitations under the License.
|
||||
|
||||
library(RUnit)
|
||||
library(Matrix) # for sparse matrices
|
||||
|
||||
source('tests/gen_counts.R')
|
||||
|
||||
TestGenerateCounts <- function() {
|
||||
report_params <- list(k = 4, m = 2) # 2 cohorts, 4 bits each
|
||||
map <- Matrix(0, nrow = 8, ncol = 3, sparse = TRUE) # 3 possible values
|
||||
map[1,] <- c(1, 0, 0)
|
||||
map[2,] <- c(0, 1, 0)
|
||||
map[3,] <- c(0, 0, 1)
|
||||
map[4,] <- c(1, 1, 1) # 4th bit of the first cohort gets signal only from all
|
||||
map[5,] <- c(0, 0, 1) # 1st bit of the second cohort gets signal from v3
|
||||
|
||||
strs <- c('v1', 'v2', 'v3')
|
||||
|
||||
maps <- list(map = map, strs = strs)
|
||||
|
||||
partition <- c(3, 2, 1) * 10000
|
||||
v <- 100 # reports per client
|
||||
|
||||
noise0 <- list(p = 0, q = 1, f = 0) # no noise at all
|
||||
counts0 <- GenerateCounts(c(report_params, noise0), maps, partition, v)
|
||||
|
||||
checkEqualsNumeric(sum(counts0[1,2:4]), counts0[1,1])
|
||||
checkEqualsNumeric(counts0[1,5], counts0[1,1])
|
||||
checkEqualsNumeric(partition[3] * v, counts0[1,4] + counts0[2,2])
|
||||
checkEqualsNumeric(sum(partition) * v, counts0[1,1] + counts0[2,1])
|
||||
|
||||
pvalues <- chisq.test(counts0[,1] / v, p = c(.5, .5))$p.value
|
||||
for(i in 2:4)
|
||||
pvalues <- c(pvalues,
|
||||
chisq.test(
|
||||
c(counts0[1,i] / v, partition[i - 1] - counts0[1,i] / v),
|
||||
p = c(.5, .5))$p.value)
|
||||
|
||||
noise1 <- list(p = .5, q = .5, f = 0) # truly random IRRs
|
||||
counts1 <- GenerateCounts(c(report_params, noise1), maps, partition, v)
|
||||
|
||||
for(i in 2:4)
|
||||
for(j in 1:2)
|
||||
pvalues <- c(pvalues,
|
||||
chisq.test(c(counts1[j,1] - counts1[j,i], counts1[j,i]),
|
||||
p = c(.5, .5))$p.value)
|
||||
|
||||
noise2 <- list(p = 0, q = 1, f = 1.0) # truly random PRRs
|
||||
counts2 <- GenerateCounts(c(report_params, noise2), maps, partition, v)
|
||||
|
||||
checkEqualsNumeric(0, max(counts2 %% v)) # all entries must be divisible by v
|
||||
|
||||
counts2 <- counts2 / v
|
||||
|
||||
for(i in 2:4)
|
||||
for(j in 1:2)
|
||||
pvalues <- c(pvalues,
|
||||
chisq.test(c(counts2[j,1] - counts2[j,i], counts2[j,i]),
|
||||
p = c(.5, .5))$p.value)
|
||||
|
||||
checkTrue(min(pvalues) > 1E-9 && max(pvalues) < 1 - 1E-9,
|
||||
"Chi-squared test failed")
|
||||
}
|
||||
|
||||
TestRandomPartition <- function() {
|
||||
|
||||
p1 <- RandomPartition(total = 100, dgeom(0:999, prob = .1))
|
||||
|
@ -46,4 +104,9 @@ TestRandomPartition <- function() {
|
|||
checkTrue((p.value > 1E-9) && (p.value < 1 - 1E-9))
|
||||
}
|
||||
|
||||
TestRandomPartition()
|
||||
TestAll <- function(){
|
||||
TestRandomPartition()
|
||||
TestGenerateCounts()
|
||||
}
|
||||
|
||||
TestAll()
|
|
@ -0,0 +1,70 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#
|
||||
# Copyright 2015 Google Inc. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
source('tests/gen_counts.R')
|
||||
|
||||
# Usage:
|
||||
#
|
||||
# $ ./gen_reports.R exp 100 10000 1 foo.csv
|
||||
#
|
||||
# Inputs:
|
||||
# distribution name
|
||||
# size of the distribution's support
|
||||
# number of clients
|
||||
# reports per client
|
||||
# name of the output file
|
||||
# Output:
|
||||
# csv file with reports sampled according to the specified distribution.
|
||||
|
||||
main <- function(argv) {
|
||||
distr <- argv[[1]]
|
||||
distr_range <- as.integer(argv[[2]])
|
||||
num_clients <- as.integer(argv[[3]])
|
||||
reports_per_client <- as.integer(argv[[4]])
|
||||
out_file <- argv[[5]]
|
||||
|
||||
pdf <- ComputePdf(distr, distr_range)
|
||||
|
||||
print("Distribution")
|
||||
print(pdf)
|
||||
|
||||
# Computes the number of clients reporting each value, where the numbers are
|
||||
# sampled according to pdf.
|
||||
partition <- RandomPartition(num_clients, pdf)
|
||||
|
||||
print('PARTITION')
|
||||
print(partition)
|
||||
|
||||
values <- rep(1:distr_range, partition) # expand partition
|
||||
|
||||
stopifnot(length(values) == num_clients)
|
||||
|
||||
# Shuffle values randomly (make take a few sec for > 10^8 inputs)
|
||||
values <- sample(values)
|
||||
|
||||
# Obtain reports by prefixing values with "v"s. Even slower than shuffling.
|
||||
reports <- paste("v", format(values, trim = TRUE), sep = "")
|
||||
|
||||
reports <- cbind(1:num_clients, reports) # paste together "1 v342"
|
||||
reports <- reports[rep(1:nrow(reports), each = reports_per_client), ]
|
||||
|
||||
write.table(reports, file = out_file, row.names = FALSE, col.names = FALSE,
|
||||
sep = ",", quote = FALSE)
|
||||
}
|
||||
|
||||
if (length(sys.frames()) == 0) {
|
||||
main(commandArgs(TRUE))
|
||||
}
|
|
@ -1,194 +0,0 @@
|
|||
#!/usr/bin/python
|
||||
#
|
||||
# Copyright 2014 Google Inc. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# Copyright 2014 Google Inc. All Rights Reserved.
|
||||
|
||||
"""Generated simulated input data for RAPPOR."""
|
||||
|
||||
import csv
|
||||
import getopt
|
||||
import math
|
||||
import optparse
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
import time
|
||||
|
||||
|
||||
def log(msg, *args):
|
||||
if args:
|
||||
msg = msg % args
|
||||
print >>sys.stderr, msg
|
||||
|
||||
|
||||
class RandUniform(object):
|
||||
"""Returns a value drawn from the uniform distribution."""
|
||||
|
||||
def __init__(self, num_unique_values):
|
||||
self.num_unique_values = num_unique_values
|
||||
|
||||
def __call__(self):
|
||||
return random.randrange(1, self.num_unique_values + 1)
|
||||
|
||||
|
||||
class RandGauss(object):
|
||||
"""Returns a value drawn from a Gaussian."""
|
||||
|
||||
def __init__(self, num_unique_values, dist_param):
|
||||
self.num_unique_values = num_unique_values
|
||||
self.stddev = dist_param or float(num_unique_values) / 6
|
||||
|
||||
def __call__(self):
|
||||
mean = float(self.num_unique_values + 1) / 2
|
||||
while True:
|
||||
r = random.normalvariate(mean, self.stddev)
|
||||
value = int(round(r))
|
||||
# Rejection sampling to cut off Gaussian to within [1, num_unique_values]
|
||||
if 1 <= value <= self.num_unique_values:
|
||||
break
|
||||
|
||||
return value # true client value
|
||||
|
||||
|
||||
class RandExp(object):
|
||||
"""Returns a value drawn from an exponential distribution."""
|
||||
|
||||
def __init__(self, num_unique_values, dist_param):
|
||||
self.num_unique_values = num_unique_values
|
||||
self.lambd = dist_param or 1
|
||||
|
||||
def __call__(self):
|
||||
while True:
|
||||
r = random.expovariate(self.lambd)
|
||||
value = int(round(r))
|
||||
# Rejection sampling to drop outputs outside [1, num_unique_values]
|
||||
if 1 <= value <= self.num_unique_values:
|
||||
break
|
||||
|
||||
return value # true client value
|
||||
|
||||
class RandZipf(object):
|
||||
"""Returns a value drawn from a Zipf distribution."""
|
||||
|
||||
def __init__(self, num_unique_values, dist_param):
|
||||
self.num_unique_values = num_unique_values
|
||||
self.alpha = dist_param or 1
|
||||
|
||||
def __call__(self):
|
||||
while True:
|
||||
r = random.paretovariate(self.alpha) # Zipf is a discrete Pareto
|
||||
value = int(round(r))
|
||||
# Rejection sampling to drop outputs outside [1, num_unique_values]
|
||||
if 1 <= value <= self.num_unique_values:
|
||||
break
|
||||
|
||||
return value # true client value
|
||||
|
||||
|
||||
def CreateOptionsParser():
|
||||
p = optparse.OptionParser()
|
||||
|
||||
# This will be used for the C++ client
|
||||
p.add_option(
|
||||
'-l', type='int', metavar='INT', dest='num_lines', default=0,
|
||||
help='Instead of a CSV file, output a text file with a value on each '
|
||||
'line, and this number of lines.')
|
||||
|
||||
choices = ['exp', 'gauss', 'unif', 'zipf1', 'zipf1.5']
|
||||
p.add_option(
|
||||
'-d', type='choice', dest='dist', default='exp', choices=choices,
|
||||
help='Distribution to draw values from (%s)' % '|'.join(choices))
|
||||
|
||||
p.add_option(
|
||||
'-u', type='int', metavar='INT',
|
||||
dest='num_unique_values', default=100,
|
||||
help='Number of unique values to generate.')
|
||||
p.add_option(
|
||||
'-c', type='int', metavar='INT', dest='num_clients', default=100000,
|
||||
help='Number of clients.')
|
||||
p.add_option(
|
||||
'-v', type='int', metavar='INT', dest='values_per_client', default=1,
|
||||
help='Number of values to generate per client.')
|
||||
|
||||
p.add_option(
|
||||
'-p', type='float', metavar='FLOAT', dest='dist_param', default=None,
|
||||
help='Parameter to distribution. Ignored for uniform; Std-dev '
|
||||
'for Gaussian; Lambda for Exponential; Alpha for Zipf.')
|
||||
|
||||
return p
|
||||
|
||||
|
||||
def main(argv):
|
||||
(opts, argv) = CreateOptionsParser().parse_args(argv)
|
||||
|
||||
if opts.num_unique_values < 2:
|
||||
raise RuntimeError('-u should be at least 2.')
|
||||
|
||||
if opts.num_clients < 10:
|
||||
raise RuntimeError("RAPPOR won't work with fewer than 10 clients")
|
||||
|
||||
random.seed()
|
||||
|
||||
# Choose a function that yields the desired distrubtion. Each of these
|
||||
# functions returns a randomly sampled integer between 1 and
|
||||
# opts.num_unique_values.
|
||||
if opts.dist == 'unif':
|
||||
rand_sample = RandUniform(opts.num_unique_values)
|
||||
elif opts.dist == 'gauss':
|
||||
rand_sample = RandGauss(opts.num_unique_values, opts.dist_param)
|
||||
elif opts.dist == 'exp':
|
||||
rand_sample = RandExp(opts.num_unique_values, opts.dist_param)
|
||||
elif opts.dist == 'zipf1':
|
||||
rand_sample = RandZipf(opts.num_unique_values, 1.0)
|
||||
elif opts.dist == 'zipf1.5':
|
||||
rand_sample = RandZipf(opts.num_unique_values, 1.5)
|
||||
else:
|
||||
raise AssertionError(opts.dist)
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# Printing values into file OUTFILE
|
||||
f = sys.stdout
|
||||
|
||||
if opts.num_lines: # line mode, not writing the client column
|
||||
for i in xrange(opts.num_lines):
|
||||
if i % 10000 == 0:
|
||||
elapsed = time.time() - start_time
|
||||
log('Generated %d rows in %.2f seconds', i, elapsed)
|
||||
|
||||
true_value = 'v%d' % rand_sample()
|
||||
print >>f, true_value
|
||||
|
||||
else: # csv mode
|
||||
c = csv.writer(f)
|
||||
c.writerow(('client', 'true_value'))
|
||||
for i in xrange(1, opts.num_clients + 1):
|
||||
if i % 10000 == 0:
|
||||
elapsed = time.time() - start_time
|
||||
log('Generated %d rows in %.2f seconds', i, elapsed)
|
||||
|
||||
# A fixed number of values per user
|
||||
for _ in xrange(opts.values_per_client):
|
||||
true_value = 'v%d' % rand_sample()
|
||||
c.writerow((i, true_value))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main(sys.argv)
|
||||
except RuntimeError, e:
|
||||
print >>sys.stderr, e.args[0]
|
||||
sys.exit(1)
|
|
@ -9,18 +9,6 @@ import re
|
|||
import sys
|
||||
|
||||
|
||||
# Simulation parameters and result metrics.
|
||||
EMPTY_ROW = """\
|
||||
<tr>
|
||||
<td>
|
||||
%(name)s
|
||||
</td>
|
||||
<td colspan = 3>
|
||||
missing
|
||||
</td>
|
||||
</tr>
|
||||
"""
|
||||
|
||||
SUMMARY_ROW = """\
|
||||
<tfoot style="font-weight: bold; text-align: right">
|
||||
<tr>
|
||||
|
@ -84,10 +72,10 @@ def FormatFloat(x, percent):
|
|||
return '{:.3f}'.format(x)
|
||||
|
||||
|
||||
def FormatEstimate(m_std_error, percent=False):
|
||||
def FormatMeanWithSem(m_std_error, percent=False):
|
||||
"""Formats an estimate with standard error."""
|
||||
if m_std_error is None:
|
||||
return ""
|
||||
return ''
|
||||
m, std_error = m_std_error
|
||||
if std_error is None:
|
||||
return FormatFloat(m, percent)
|
||||
|
@ -101,14 +89,18 @@ def Mean(l):
|
|||
"""Computes the mean (average) for a list of numbers."""
|
||||
if l:
|
||||
return float(sum(l)) / len(l)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def SampleVar(l):
|
||||
"""Computes the sample variance for a list of numbers."""
|
||||
if len(l) > 1:
|
||||
mean = Mean(l)
|
||||
var = sum([(x - mean)**2 for x in l]) / (len(l) - 1)
|
||||
var = sum([(x - mean) ** 2 for x in l]) / (len(l) - 1)
|
||||
return var
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def StandardErrorEstimate(l):
|
||||
|
@ -117,56 +109,26 @@ def StandardErrorEstimate(l):
|
|||
For a singleton the standard error is assumed to be 10% of its value.
|
||||
"""
|
||||
if len(l) > 1:
|
||||
return (SampleVar(l) / len(l))**.5
|
||||
return (SampleVar(l) / len(l)) ** .5
|
||||
elif l:
|
||||
return l[0] / 10.0
|
||||
|
||||
|
||||
def WeightedAverageOfAverages(list_of_lists, cap):
|
||||
"""Computes the average of averages, weighted by accuracy.
|
||||
|
||||
Given a list of lists of numbers, computes a weighted average of averages
|
||||
together the standard error of the estimate. Contribution from each list is
|
||||
weighted proportionally to the standard error of its sample mean.
|
||||
(Sublists with lower accuracy contribute less to the total). The cap limits
|
||||
the weight of any one's list.
|
||||
Args:
|
||||
list_of_list: A list of list of floats.
|
||||
cap: Limit on any list's weight
|
||||
Returns:
|
||||
A pair of floats - average and its standard error.
|
||||
"""
|
||||
l = [sublist for sublist in list_of_lists if sublist]
|
||||
if not l:
|
||||
else:
|
||||
return None
|
||||
|
||||
total = 0
|
||||
total_weights = 0
|
||||
total_sem = 0 # SEM - Standard Error of the Mean
|
||||
|
||||
for sublist in l:
|
||||
std_error = StandardErrorEstimate(sublist)
|
||||
weight = 1 / std_error if std_error > 1.0 / cap else cap
|
||||
|
||||
total += Mean(sublist) * weight
|
||||
total_weights += weight
|
||||
total_sem += std_error**2 * weight**2 # == 1 when the weight is < cap
|
||||
|
||||
std_error_estimate = total_sem**.5 / total_weights
|
||||
|
||||
return total / total_weights, std_error_estimate
|
||||
|
||||
|
||||
def AverageOfAverages(list_of_lists):
|
||||
def MeanOfMeans(dict_of_lists):
|
||||
"""Returns the average of averages with the standard error of the estimate.
|
||||
"""
|
||||
means = [Mean(l) for l in list_of_lists if l]
|
||||
means = [Mean(dict_of_lists[key]) for key in dict_of_lists
|
||||
if dict_of_lists[key]]
|
||||
if means:
|
||||
# Compute variances of the estimate for each sublist.
|
||||
se = [StandardErrorEstimate(l)**2 for l in list_of_lists if l]
|
||||
|
||||
se = [StandardErrorEstimate(dict_of_lists[key]) ** 2 for key
|
||||
in dict_of_lists if dict_of_lists[key]]
|
||||
return (Mean(means), # Mean over all sublists
|
||||
sum(se)**.5 / len(se)) # Standard deviation of the mean
|
||||
sum(se) ** .5 / len(se)) # Standard deviation of the mean
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def ParseSpecFile(spec_filename):
|
||||
|
@ -179,8 +141,6 @@ def ParseSpecFile(spec_filename):
|
|||
with open(spec_filename) as s:
|
||||
spec_row = s.readline().split()
|
||||
|
||||
spec_row.pop(1) # drop the run_id (must be 1 if correctly generated)
|
||||
|
||||
# Second to last column is 'num_additional' -- the number of bogus
|
||||
# candidates added
|
||||
num_additional = int(spec_row[-2])
|
||||
|
@ -190,39 +150,37 @@ def ParseSpecFile(spec_filename):
|
|||
return num_additional, spec_in_html
|
||||
|
||||
|
||||
def ParseLogFile(log_filename):
|
||||
def ExtractTime(log_filename):
|
||||
"""Extracts the elapsed time information from the log file.
|
||||
|
||||
Returns:
|
||||
A float or None in case of failure.
|
||||
Elapsed time (in seconds) or None in case of failure.
|
||||
"""
|
||||
if os.path.isfile(log_filename):
|
||||
with open(log_filename) as log:
|
||||
log_str = log.read()
|
||||
match = re.search(r'took ([0-9.]+) seconds', log_str)
|
||||
# Matching a line output by analyze.R.
|
||||
match = re.search(r'Inference took ([0-9.]+) seconds', log_str)
|
||||
if match:
|
||||
return float(match.group(1))
|
||||
return None
|
||||
|
||||
|
||||
def ParseMetrics(report_dir, num_additional, metrics_lists):
|
||||
def ParseMetrics(metrics_file, log_file, num_additional):
|
||||
"""Processes the metrics file.
|
||||
|
||||
Args:
|
||||
report_dir: A directory name containing metrics.csv and log.txt.
|
||||
num_additional: A number of bogus candidates added to the candidate list.
|
||||
metrics_lists: A dictionary containing lists (one for each metric) of
|
||||
lists (one for each test case) of metrics (one for each test run).
|
||||
Returns:
|
||||
Part of the report row formatted in HTML. metrics_lists is updated with
|
||||
new metrics.
|
||||
"""
|
||||
metrics_filename = os.path.join(report_dir, 'metrics.csv')
|
||||
|
||||
with open(metrics_filename) as m:
|
||||
Returns a pair:
|
||||
- A dictionary of metrics (some can be []).
|
||||
- An HTML-formatted portion of the report row.
|
||||
"""
|
||||
with open(metrics_file) as m:
|
||||
m.readline()
|
||||
metrics_row = m.readline().split(',')
|
||||
|
||||
# Format numbers and sum
|
||||
(num_actual, num_rappor, num_false_pos, num_false_neg, total_variation,
|
||||
allocated_mass) = metrics_row
|
||||
|
||||
|
@ -235,8 +193,7 @@ def ParseMetrics(report_dir, num_additional, metrics_lists):
|
|||
total_variation = float(total_variation)
|
||||
allocated_mass = float(allocated_mass)
|
||||
|
||||
log_filename = os.path.join(report_dir, 'log.txt')
|
||||
elapsed_time = ParseLogFile(log_filename)
|
||||
elapsed_time = ExtractTime(log_file)
|
||||
|
||||
# e.g. if there are 20 additional candidates added, and 1 false positive,
|
||||
# the false positive rate is 5%.
|
||||
|
@ -249,75 +206,86 @@ def ParseMetrics(report_dir, num_additional, metrics_lists):
|
|||
metrics_row_str = [
|
||||
str(num_actual),
|
||||
str(num_rappor),
|
||||
'%.1f%% (%d)' % (fp_rate * 100, num_false_pos)
|
||||
if num_additional else '',
|
||||
'%.1f%% (%d)' % (fp_rate * 100, num_false_pos) if num_additional else '',
|
||||
'%.1f%% (%d)' % (fn_rate * 100, num_false_neg),
|
||||
'%.3f' % total_variation,
|
||||
'%.3f' % allocated_mass,
|
||||
'%.2f' % elapsed_time if elapsed_time is not None else '',
|
||||
]
|
||||
|
||||
if num_additional:
|
||||
metrics_lists['fpr'][-1].append(fp_rate)
|
||||
metrics_lists['fnr'][-1].append(fn_rate)
|
||||
metrics_lists['tv'][-1].append(total_variation)
|
||||
metrics_lists['am'][-1].append(allocated_mass)
|
||||
if elapsed_time is not None:
|
||||
metrics_lists['time'][-1].append(elapsed_time)
|
||||
metrics_row_dict = {
|
||||
'tv': [total_variation],
|
||||
'fpr': [fp_rate] if num_additional else [],
|
||||
'fnr': [fn_rate],
|
||||
'am': [allocated_mass],
|
||||
'time': [elapsed_time] if elapsed_time is not None else [],
|
||||
}
|
||||
|
||||
# return metrics formatted as HTML table entries
|
||||
return ' '.join('<td>%s</td>' % cell for cell in metrics_row_str)
|
||||
return (metrics_row_dict,
|
||||
' '.join('<td>%s</td>' % cell for cell in metrics_row_str))
|
||||
|
||||
|
||||
def FormatRowName(case_name, run_id_str, metrics_name, link_to_plots):
|
||||
"""Outputs an HTML table entry.
|
||||
def FormatCell1(test_case, test_instance, metrics_file, log_file, plot_file,
|
||||
link_to_plots):
|
||||
"""Outputs an HTML table entry for the first cell of the row.
|
||||
|
||||
The row is filled if the metrics file exist. The first cell contains a link
|
||||
that for short tables points to a plot file inline, for large tables to an
|
||||
external file.
|
||||
|
||||
If the metrics file is missing, the link points to the log file (if one
|
||||
exists)
|
||||
"""
|
||||
relpath_report = '{}/{}_report'.format(case_name, run_id_str)
|
||||
if os.path.isfile(metrics_name):
|
||||
relpath_report = '{}/{}_report'.format(test_case, test_instance)
|
||||
if os.path.isfile(metrics_file):
|
||||
external_file = plot_file
|
||||
if link_to_plots:
|
||||
link = '#' + case_name + '_' + run_id_str # anchor
|
||||
link = '#{}_{}'.format(test_case, test_instance) # anchor
|
||||
else:
|
||||
link = relpath_report + '/' + 'dist.png'
|
||||
else: # no results likely due to an error, puts a link to the log file
|
||||
external_file = log_file
|
||||
link = relpath_report + '/' + 'log.txt'
|
||||
|
||||
return '<td><a href="{}">{}</a></td>'.format(link, case_name)
|
||||
if os.path.isfile(external_file):
|
||||
return '<td><a href="{}">{}</a></td>'.format(link, test_case)
|
||||
else: # if no file to link to
|
||||
return '<td>{}</td>'.format(test_case)
|
||||
|
||||
|
||||
def FormatSummaryRows(metrics_lists):
|
||||
"""Outputs an HTML-formatted summary row.
|
||||
"""
|
||||
def FormatSummaryRow(metrics_lists):
|
||||
"""Outputs an HTML-formatted summary row."""
|
||||
means_with_sem = {} # SEM - standard error of the mean
|
||||
|
||||
for key in metrics_lists:
|
||||
means_with_sem[key] = AverageOfAverages(metrics_lists[key])
|
||||
means_with_sem[key] = MeanOfMeans(metrics_lists[key])
|
||||
# If none of the lists is longer than one element, drop the SEM component.
|
||||
if means_with_sem[key] and max([len(l) for l in metrics_lists[key]]) < 2:
|
||||
means_with_sem[key] = [means_with_sem[key][0], None]
|
||||
|
||||
summary = {
|
||||
'name': 'Means',
|
||||
'mean_fpr': FormatEstimate(means_with_sem['fpr'], percent=True),
|
||||
'mean_fnr': FormatEstimate(means_with_sem['fnr'], percent=True),
|
||||
'mean_tv': FormatEstimate(means_with_sem['tv'], percent=True),
|
||||
'mean_am': FormatEstimate(means_with_sem['am'], percent=True),
|
||||
'mean_time': FormatEstimate(means_with_sem['time']),
|
||||
'mean_fpr': FormatMeanWithSem(means_with_sem['fpr'], percent=True),
|
||||
'mean_fnr': FormatMeanWithSem(means_with_sem['fnr'], percent=True),
|
||||
'mean_tv': FormatMeanWithSem(means_with_sem['tv'], percent=True),
|
||||
'mean_am': FormatMeanWithSem(means_with_sem['am'], percent=True),
|
||||
'mean_time': FormatMeanWithSem(means_with_sem['time']),
|
||||
}
|
||||
return SUMMARY_ROW % summary
|
||||
|
||||
|
||||
def FormatPlots(base_dir, test_instances):
|
||||
"""Outputs HTML-formatted plots.
|
||||
"""
|
||||
"""Outputs HTML-formatted plots."""
|
||||
result = ''
|
||||
for instance in test_instances:
|
||||
# A test instance is idenfied by the test name and the test run.
|
||||
case_name, run_id_str = instance.split(' ')
|
||||
instance_dir = case_name + '/' + run_id_str + '_report'
|
||||
# A test instance is identified by the test name and the test run.
|
||||
test_case, test_instance, _ = instance.split(' ')
|
||||
instance_dir = test_case + '/' + test_instance + '_report'
|
||||
if os.path.isfile(os.path.join(base_dir, instance_dir, 'dist.png')):
|
||||
result += DETAILS % {'anchor': case_name + '_' + run_id_str,
|
||||
'name': '{} (instance {})'.format(case_name,
|
||||
run_id_str),
|
||||
result += DETAILS % {'anchor': test_case + '_' + test_instance,
|
||||
'name': '{} (instance {})'.format(test_case,
|
||||
test_instance),
|
||||
'instance_dir': instance_dir}
|
||||
return result
|
||||
|
||||
|
@ -327,16 +295,19 @@ def main(argv):
|
|||
|
||||
# This file has the test case names, in the order that they should be
|
||||
# displayed.
|
||||
path = os.path.join(base_dir, 'test-cases.txt')
|
||||
path = os.path.join(base_dir, 'test-instances.txt')
|
||||
with open(path) as f:
|
||||
test_instances = [line.strip() for line in f]
|
||||
|
||||
metrics_lists = {
|
||||
'tv': [], # total_variation for all test cases
|
||||
'fpr': [], # list of false positive rates
|
||||
'fnr': [], # list of false negative rates
|
||||
'am': [], # list of total allocated masses
|
||||
'time': [], # list of total elapsed time measurements
|
||||
# Metrics are assembled into a dictionary of dictionaries. The top-level
|
||||
# key is the metric name ('tv', 'fpr', etc.), the second level key is
|
||||
# the test case. These keys reference a list of floats, which can be empty.
|
||||
metrics = {
|
||||
'tv': {}, # total_variation for all test cases
|
||||
'fpr': {}, # dictionary of false positive rates
|
||||
'fnr': {}, # dictionary of false negative rates
|
||||
'am': {}, # dictionary of total allocated masses
|
||||
'time': {}, # dictionary of total elapsed time measurements
|
||||
}
|
||||
|
||||
# If there are too many tests, the plots are not included in the results
|
||||
|
@ -345,36 +316,39 @@ def main(argv):
|
|||
|
||||
for instance in test_instances:
|
||||
# A test instance is idenfied by the test name and the test run.
|
||||
case_name, run_id_str = instance.split(' ')
|
||||
# if this is the first run of a test case, start anew
|
||||
if run_id_str == '1':
|
||||
for metric in metrics_lists:
|
||||
metrics_lists[metric].append([])
|
||||
test_case, test_instance, _ = instance.split(' ')
|
||||
|
||||
spec = os.path.join(base_dir, case_name, 'spec.txt')
|
||||
if os.path.isfile(spec):
|
||||
num_additional, row_spec = ParseSpecFile(spec)
|
||||
spec_file = os.path.join(base_dir, test_case, 'spec.txt')
|
||||
if not os.path.isfile(spec_file):
|
||||
raise RuntimeError('{} is missing'.format(spec_file))
|
||||
|
||||
report_dir = os.path.join(base_dir, case_name, run_id_str + '_report')
|
||||
if os.path.isdir(report_dir):
|
||||
metrics = os.path.join(report_dir, 'metrics.csv')
|
||||
num_additional, spec_html = ParseSpecFile(spec_file)
|
||||
metrics_html = '' # will be filled in later on, if metrics exist
|
||||
|
||||
row_name = FormatRowName(case_name, run_id_str, metrics, include_plots)
|
||||
report_dir = os.path.join(base_dir, test_case, test_instance + '_report')
|
||||
|
||||
if os.path.isfile(metrics):
|
||||
# ParseMetrics outputs an HTML table row and also updates lists
|
||||
row_metrics = ParseMetrics(report_dir, num_additional, metrics_lists)
|
||||
metrics_file = os.path.join(report_dir, 'metrics.csv')
|
||||
log_file = os.path.join(report_dir, 'log.txt')
|
||||
plot_file = os.path.join(report_dir, 'dist.png')
|
||||
|
||||
cell1_html = FormatCell1(test_case, test_instance, metrics_file, log_file,
|
||||
plot_file, include_plots)
|
||||
|
||||
if os.path.isfile(metrics_file):
|
||||
# ParseMetrics outputs an HTML table row and also updates lists
|
||||
metrics_dict, metrics_html = ParseMetrics(metrics_file, log_file,
|
||||
num_additional)
|
||||
|
||||
# Update the metrics structure. Initialize dictionaries if necessary.
|
||||
for m in metrics:
|
||||
if not test_case in metrics[m]:
|
||||
metrics[m][test_case] = metrics_dict[m]
|
||||
else:
|
||||
row_metrics = ''
|
||||
else:
|
||||
row_name = '<td>{}<td>'.format(case_name)
|
||||
row_metrics = ''
|
||||
metrics[m][test_case] += metrics_dict[m]
|
||||
|
||||
print '<tr>{}{}{}</tr>'.format(row_name, row_spec, row_metrics)
|
||||
else:
|
||||
print EMPTY_ROW % {'name': case_name}
|
||||
print '<tr>{}{}{}</tr>'.format(cell1_html, spec_html, metrics_html)
|
||||
|
||||
print FormatSummaryRows(metrics_lists)
|
||||
print FormatSummaryRow(metrics)
|
||||
|
||||
print '</tbody>'
|
||||
print '</table>'
|
||||
|
@ -384,8 +358,8 @@ def main(argv):
|
|||
if include_plots:
|
||||
print FormatPlots(base_dir, test_instances)
|
||||
else:
|
||||
print '<p>Too many tests to include plots.\
|
||||
Click links within rows for details.</p>'
|
||||
print ('<p>Too many tests to include plots. '
|
||||
'Click links within rows for details.</p>')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -99,30 +99,11 @@ def CreateOptionsParser():
|
|||
return p
|
||||
|
||||
|
||||
def print_params(params, csv_out, json_out):
|
||||
"""Print Rappor parameters to a text file."""
|
||||
c = csv.writer(csv_out)
|
||||
c.writerow(('k', 'h', 'm', 'p', 'q', 'f')) # header
|
||||
row = (
|
||||
params.num_bloombits,
|
||||
params.num_hashes,
|
||||
params.num_cohorts,
|
||||
params.prob_p,
|
||||
params.prob_q,
|
||||
params.prob_f)
|
||||
|
||||
c.writerow(row)
|
||||
|
||||
print >>json_out, params.to_json()
|
||||
|
||||
|
||||
def make_histogram(csv_in):
|
||||
"""Make a histogram of the simulated input file."""
|
||||
# TODO: It would be better to share parsing with rappor_encode()
|
||||
counter = collections.Counter()
|
||||
for i, (_, word) in enumerate(csv_in):
|
||||
if i == 0:
|
||||
continue
|
||||
for (_, word) in csv_in:
|
||||
counter[word] += 1
|
||||
return dict(counter.most_common())
|
||||
|
||||
|
@ -171,14 +152,6 @@ def main(argv):
|
|||
|
||||
outfile = prefix + "_out.csv"
|
||||
histfile = prefix + "_hist.csv"
|
||||
true_inputs_file = prefix + "_true_inputs.txt"
|
||||
params_csv = prefix + "_params.csv"
|
||||
params_json = prefix + '_params.json'
|
||||
|
||||
# Print parameters to parameters file -- needed for the R analysis tool.
|
||||
with open(params_csv, 'w') as csv_out:
|
||||
with open(params_json, 'w') as json_out:
|
||||
print_params(params, csv_out, json_out)
|
||||
|
||||
with open(opts.infile) as f:
|
||||
csv_in = csv.reader(f)
|
||||
|
@ -190,12 +163,6 @@ def main(argv):
|
|||
|
||||
all_words = sorted(word_hist) # unique words
|
||||
|
||||
# Print all true values, one per line. This file can be further processed to
|
||||
# simulate inaccurate candidate lists.
|
||||
with open(true_inputs_file, 'w') as f:
|
||||
for word in all_words:
|
||||
print >>f, word
|
||||
|
||||
rand = random.Random() # default Mersenne Twister randomness
|
||||
#rand = random.SystemRandom() # cryptographic randomness from OS
|
||||
|
||||
|
@ -228,9 +195,6 @@ def main(argv):
|
|||
start_time = time.time()
|
||||
|
||||
for i, (client, true_value) in enumerate(csv_in):
|
||||
if i == 0:
|
||||
continue # skip header line
|
||||
|
||||
if i % 10000 == 0:
|
||||
elapsed = time.time() - start_time
|
||||
log('Processed %d inputs in %.2f seconds', i, elapsed)
|
||||
|
|
|
@ -14,6 +14,17 @@ import sys
|
|||
# TEST CONFIGURATION
|
||||
#
|
||||
|
||||
DEMO = (
|
||||
# (case_name distr num_unique_values num_clients values_per_client)
|
||||
# (num_bits num_hashes num_cohorts)
|
||||
# (p q f) (num_additional regexp_to_remove)
|
||||
('demo1 unif 100 10000 10', '16 2 64', '0.1 0.9 0.2', '10 v[0-9]*9$'),
|
||||
('demo2 gauss 100 10000 10', '16 2 64', '0.1 0.9 0.2', '10 v[0-9]*9$'),
|
||||
('demo3 exp 100 10000 10', '16 2 64', '0.1 0.9 0.2', '10 v[0-9]*9$'),
|
||||
('demo4 zipf1 100 10000 10', '16 2 64', '0.1 0.9 0.2', '10 v[0-9]*9$'),
|
||||
('demo5 zipf1.5 100 10000 10', '16 2 64', '0.1 0.9 0.2', '10 v[0-9]*9$'),
|
||||
)
|
||||
|
||||
DISTRIBUTIONS = (
|
||||
'unif',
|
||||
'exp',
|
||||
|
@ -24,7 +35,8 @@ DISTRIBUTIONS = (
|
|||
|
||||
DISTRIBUTION_PARAMS = (
|
||||
# name, num unique values, num clients, values per client
|
||||
('small', 100, 1000000, 1),
|
||||
('tiny', 100, 1000, 1), # test for insufficient data
|
||||
('small', 100, 100000, 1),
|
||||
('medium', 1000, 10000000, 1),
|
||||
('large', 10000, 100000000, 1),
|
||||
)
|
||||
|
@ -39,9 +51,8 @@ BLOOMFILTER_PARAMS = {
|
|||
|
||||
# 'p, q, f' as in params file.
|
||||
PRIVACY_PARAMS = {
|
||||
'eps_1_1': (0.44, 0.56, 0), # eps_1 = 1, no eps_inf
|
||||
'eps_1_1': (0.39, 0.61, 0.45), # eps_1 = 1, eps_inf = 5:
|
||||
'eps_1_5': (0.225, 0.775, 0.0), # eps_1 = 5, no eps_inf
|
||||
'eps_inf_5': (0.39, 0.61, 0.45), # eps_1 = 1, eps_inf = 5:
|
||||
}
|
||||
|
||||
# For deriving candidates from true inputs.
|
||||
|
@ -55,54 +66,42 @@ MAP_REGEX_MISSING = {
|
|||
# regex missing)
|
||||
TEST_CONFIGS = [
|
||||
('typical', '128x128', 'eps_1_1', .2, '10%'),
|
||||
('categorical', '128x128', 'eps_1_1', .0, 'sharp'), # no extra candidates
|
||||
('sharp', '128x128', 'eps_1_1', .0, 'sharp'), # no extra candidates
|
||||
('loose', '128x128', 'eps_1_5', .2, '10%'), # loose privacy
|
||||
('over_x2', '128x128', 'eps_1_1', 2.0, '10%'), # overshoot by x2
|
||||
('over_x10', '128x128', 'eps_1_1', 10.0, '10%'), # overshoot by x10
|
||||
]
|
||||
|
||||
DEMO_CASES = [
|
||||
# The 5 cases run by the demo.sh script
|
||||
('demo-small-exp', 'exp_a', '8x128', 'eps_1_1', 20, '10%'),
|
||||
('demo-small-gauss', 'gauss_a', '8x128', 'eps_1_1', 20, '10%'),
|
||||
]
|
||||
|
||||
#
|
||||
# END TEST CONFIGURATION
|
||||
#
|
||||
|
||||
def CreateOptionsParser():
|
||||
p = optparse.OptionParser()
|
||||
|
||||
p.add_option(
|
||||
'-r', dest='runs', metavar='INT', type='int', default=1,
|
||||
help='Number of runs for each test.')
|
||||
|
||||
return p
|
||||
|
||||
def main(argv):
|
||||
(opts, argv) = CreateOptionsParser().parse_args(argv)
|
||||
rows = []
|
||||
|
||||
|
||||
test_case = []
|
||||
for (distr_params, num_values, num_clients,
|
||||
num_reports_per_client) in DISTRIBUTION_PARAMS:
|
||||
for (distr_params, num_values, num_clients,
|
||||
num_reports_per_client) in DISTRIBUTION_PARAMS:
|
||||
for distribution in DISTRIBUTIONS:
|
||||
for (config_name, bloom_name, privacy_params, fr_extra,
|
||||
regex_missing) in TEST_CONFIGS:
|
||||
test_name = '{}-{}-{}'.format(distribution,
|
||||
distr_params, config_name)
|
||||
for (config_name, bloom_name, privacy_params, fr_extra,
|
||||
regex_missing) in TEST_CONFIGS:
|
||||
test_name = 'r-{}-{}-{}'.format(distribution, distr_params,
|
||||
config_name)
|
||||
|
||||
params = (BLOOMFILTER_PARAMS[bloom_name]
|
||||
+ PRIVACY_PARAMS[privacy_params]
|
||||
+ tuple([int(num_values * fr_extra)])
|
||||
+ tuple([MAP_REGEX_MISSING[regex_missing]]))
|
||||
+ PRIVACY_PARAMS[privacy_params]
|
||||
+ tuple([int(num_values * fr_extra)])
|
||||
+ tuple([MAP_REGEX_MISSING[regex_missing]]))
|
||||
|
||||
for r in range(1, opts.runs + 1):
|
||||
test_run = (test_name, r, distribution, num_values, num_clients,
|
||||
num_reports_per_client)
|
||||
row_str = [str(element) for element in test_run + params]
|
||||
rows.append(row_str)
|
||||
test_case = (test_name, distribution, num_values, num_clients,
|
||||
num_reports_per_client) + params
|
||||
row_str = [str(element) for element in test_case]
|
||||
rows.append(row_str)
|
||||
|
||||
for params in DEMO:
|
||||
rows.append(params)
|
||||
print >>sys.stderr, params
|
||||
|
||||
for row in rows:
|
||||
print ' '.join(row)
|
||||
|
|
Загрузка…
Ссылка в новой задаче