rappor/regtest.sh

#!/bin/bash
#
# Run end-to-end tests in parallel.
#
# Usage:
#   ./regtest.sh <function name>

# Examples:
#
# $ export NUM_PROCS=20  # 12 by default
# $ ./regtest.sh run-all  # run all reg tests with 20 parallel processes
#
# At the end, it will print an HTML summary.

# To run a subset of tests or debug a specific test case, use the 'run-seq'
# function:
#
# $ ./regtest.sh run-seq demo-exp  # Sequential run, matches 1 case
# $ ./regtest.sh run-seq demo-     # Sequential run, matches multiple cases
#
# The first argument to run-seq is a regex in 'grep -E' format.  (Detail: Don't
# use $ in the pattern, since it matches the whole spec line and not just the
# test case name.)

# Future speedups:
# - Reuse the same input -- come up with naming scheme based on params
# - Reuse the same maps -- ditto, rappor library can cache it

set -o nounset
set -o pipefail
set -o errexit

. util.sh

readonly THIS_DIR=$(dirname $0)
readonly REPO_ROOT=$THIS_DIR
readonly CLIENT_DIR=$REPO_ROOT/client/python
readonly REGTEST_DIR=_tmp/regtest

# All the Python tools need this
export PYTHONPATH=$CLIENT_DIR

readonly NUM_SPEC_COLS=13

# TODO: Get num cpus
readonly NUM_PROCS=${NUM_PROCS:-12}


# Add some more candidates here.  We hope these are estimated at 0.
# e.g. if add_start=51, and num_additional is 20, show v51-v70
more-candidates() {
  local last_true=$1
  local num_additional=$2

  local begin
  local end
  begin=$(expr $last_true + 1)
  end=$(expr $last_true + $num_additional)

  seq $begin $end | awk '{print "v" $1}'
}

# Args:
#   true_inputs: File of true inputs
#   last_true: last true input, e.g. 50 if we generated "v1" .. "v50".
#   num_additional: additional candidates to generate (starting at 'last_true')
#   to_remove: Regex of true values to omit from the candidates list, or the
#     string 'NONE' if none should be.  (Our values look like 'v1', 'v2', etc. so
#     there isn't any ambiguity.)
print-candidates() {
  local true_inputs=$1
  local last_true=$2
  local num_additional=$3 
  local to_remove=$4

  if test $to_remove = NONE; then
    cat $true_inputs  # include all true inputs
  else
    egrep -v $to_remove $true_inputs  # remove some true inputs
  fi
  more-candidates $last_true $num_additional
}

# Run a single test case, specified by a line of the test spec.
# This is a helper function for 'run-all'.

_run-one-case() {
  local test_case_id=$1

  # input params
  local dist=$2
  local num_unique_values=$3
  local num_clients=$4
  local values_per_client=$5

  # RAPPOR params
  local num_bits=$6
  local num_hashes=$7
  local num_cohorts=$8
  local p=$9
  local q=${10}
  local f=${11}  # need curly braces to get 10th arg

  # map params
  local num_additional=${12}
  local to_remove=${13}

  # NOTE: NUM_SPEC_COLS == 13

  local case_dir=$REGTEST_DIR/$test_case_id
  mkdir --verbose -p $case_dir

  banner "Saving spec"

  # The arguments are the test case spec
  echo "$@" > $case_dir/spec.txt

  banner "Generating input"

  tests/gen_sim_input.py \
    -d $dist \
    -c $num_clients \
    -u $num_unique_values \
    -v $values_per_client \
    -o $case_dir/case.csv

  banner "Running RAPPOR client"

  tests/rappor_sim.py \
    --num-bits $num_bits \
    --num-hashes $num_hashes \
    --num-cohorts $num_cohorts \
    -p $p \
    -q $q \
    -f $f \
    -i $case_dir/case.csv \
    -o $case_dir/out.csv

  banner "Constructing candidates"

  # Reuse demo.sh function
  print-candidates \
    $case_dir/case_true_inputs.txt $num_unique_values \
    $num_additional "$to_remove" \
    > $case_dir/case_candidates.txt

  banner "Hashing candidates to get 'map'"

  analysis/tools/hash_candidates.py \
    $case_dir/case_params.csv \
    < $case_dir/case_candidates.txt \
    > $case_dir/case_map.csv

  banner "Summing bits to get 'counts'"

  analysis/tools/sum_bits.py \
    $case_dir/case_params.csv \
    < $case_dir/out.csv \
    > $case_dir/case_counts.csv

  local out_dir=$REGTEST_DIR/${test_case_id}_report
  mkdir --verbose -p $out_dir

  # Input prefix, output dir
  tests/analyze.R -t "Test case: $test_case_id" "$case_dir/case" $out_dir
}

# Like _run-once-case, but log to a file.
_run-one-case-logged() {
  local test_case_id=$1

  local case_dir=$REGTEST_DIR/$test_case_id
  mkdir --verbose -p $case_dir

  log "Started '$test_case_id' -- logging to $case_dir/log.txt"
  _run-one-case "$@" >$case_dir/log.txt 2>&1
  log "Test case $test_case_id done"
}

show-help() {
  tests/gen_sim_input.py || true
  tests/rappor_sim.py -h || true
}

make-summary() {
  local dir=$1
  local filename=${2:-results.html}

  tests/make_summary.py $dir > $dir/rows.html

  pushd $dir >/dev/null

  cat ../../tests/regtest.html \
    | sed -e '/TABLE_ROWS/ r rows.html' \
    > $filename

  popd >/dev/null

  log "Wrote $dir/$filename"
  log "URL: file://$PWD/$dir/$filename"
}

# Helper to parse spec input with xargs
multi() {
  xargs -n $NUM_SPEC_COLS --no-run-if-empty --verbose "$@"
}

test-error() {
  local spec_regex=${1:-}
  log "Some test cases failed"
  if test -n "$spec_regex"; then
    log "(Perhaps none matched pattern '$spec_regex')"
  fi
  exit 1
}

# Assuming the spec file, write a list of test case names (first column).  This
# is read by make_summary.py.
write-test-cases() {
  cut -d ' ' -f 1 $REGTEST_DIR/spec-list.txt > $REGTEST_DIR/test-cases.txt
}

# run-all should take regex?
run-seq() {
  local spec_regex=$1  # grep -E format on the spec
  local html_filename=${2:-results.html}  # demo.sh changes it to demo.sh

  local spec_list=$REGTEST_DIR/spec-list.txt
  tests/regtest_spec.py | grep -E $spec_regex > $spec_list

  write-test-cases

  cat $spec_list \
    | multi -- $0 _run-one-case || test-error $spec_regex

  log "Done running all test cases"

  make-summary $REGTEST_DIR $html_filename
}

run-all() {
  # Limit it to this number of test cases.  By default we run all of them.
  local max_cases=${1:-1000000}
  local verbose=${2:-F} 

  mkdir --verbose -p $REGTEST_DIR
  # Print the spec
  #
  # -n3 has to match the number of arguments in the spec.

  #local func=_run-one-case-logged
  local func
  if test $verbose = T; then
    func=_run-one-case  # parallel process output mixed on the console
  else
    func=_run-one-case-logged  # one line
  fi

  log "Using $NUM_PROCS parallel processes"

  local spec_list=$REGTEST_DIR/spec-list.txt
  tests/regtest_spec.py > $spec_list

  write-test-cases

  head -n $max_cases $spec_list \
    | multi -P $NUM_PROCS -- $0 $func || test-error

  log "Done running all test cases"

  make-summary $REGTEST_DIR
}

"$@"
Add a regression test harness. The test parameters are defined in tests/regtest_spec.py. Basic usage is: $ ./regtest.sh run-all This runs all tests in parallel, and results in an HTML table with results. - Calculate both false positives and false negatives in analyze.R. Refactor the function to be more symmetric. - Refactor demo.sh a bit - In gen_sim_input.py, get rid of hard-coded 7 values per client, and make it a parameter - Change rappor_sim.py to use a -d <dist> flag, rather than separate flags - Add test cases based on Chrome params - Factor out util.sh script 2015-03-17 00:19:39 +03:00			`#!/bin/bash`
			`#`
			`# Run end-to-end tests in parallel.`
			`#`
			`# Usage:`
			`# ./regtest.sh <function name>`

			`# Examples:`
			`#`
			`# $ export NUM_PROCS=20 # 12 by default`
			`# $ ./regtest.sh run-all # run all reg tests with 20 parallel processes`
			`#`
			`# At the end, it will print an HTML summary.`

			`# To run a subset of tests or debug a specific test case, use the 'run-seq'`
			`# function:`
			`#`
			`# $ ./regtest.sh run-seq demo-exp # Sequential run, matches 1 case`
			`# $ ./regtest.sh run-seq demo- # Sequential run, matches multiple cases`
			`#`
			`# The first argument to run-seq is a regex in 'grep -E' format. (Detail: Don't`
			`# use $ in the pattern, since it matches the whole spec line and not just the`
			`# test case name.)`

			`# Future speedups:`
			`# - Reuse the same input -- come up with naming scheme based on params`
			`# - Reuse the same maps -- ditto, rappor library can cache it`

			`set -o nounset`
			`set -o pipefail`
			`set -o errexit`

			`. util.sh`

			`readonly THIS_DIR=$(dirname $0)`
			`readonly REPO_ROOT=$THIS_DIR`
			`readonly CLIENT_DIR=$REPO_ROOT/client/python`
			`readonly REGTEST_DIR=_tmp/regtest`

			`# All the Python tools need this`
			`export PYTHONPATH=$CLIENT_DIR`

Remove duplication by making demo.sh call regtest.sh, and fix lint errors. 2015-04-02 00:09:44 +03:00			`readonly NUM_SPEC_COLS=13`
Add a regression test harness. The test parameters are defined in tests/regtest_spec.py. Basic usage is: $ ./regtest.sh run-all This runs all tests in parallel, and results in an HTML table with results. - Calculate both false positives and false negatives in analyze.R. Refactor the function to be more symmetric. - Refactor demo.sh a bit - In gen_sim_input.py, get rid of hard-coded 7 values per client, and make it a parameter - Change rappor_sim.py to use a -d <dist> flag, rather than separate flags - Add test cases based on Chrome params - Factor out util.sh script 2015-03-17 00:19:39 +03:00
			`# TODO: Get num cpus`
			`readonly NUM_PROCS=${NUM_PROCS:-12}`


Remove duplication by making demo.sh call regtest.sh, and fix lint errors. 2015-04-02 00:09:44 +03:00			`# Add some more candidates here. We hope these are estimated at 0.`
			`# e.g. if add_start=51, and num_additional is 20, show v51-v70`
			`more-candidates() {`
			`local last_true=$1`
			`local num_additional=$2`

			`local begin`
			`local end`
			`begin=$(expr $last_true + 1)`
			`end=$(expr $last_true + $num_additional)`

			`seq $begin $end \| awk '{print "v" $1}'`
			`}`

			`# Args:`
			`# true_inputs: File of true inputs`
			`# last_true: last true input, e.g. 50 if we generated "v1" .. "v50".`
			`# num_additional: additional candidates to generate (starting at 'last_true')`
			`# to_remove: Regex of true values to omit from the candidates list, or the`
			`# string 'NONE' if none should be. (Our values look like 'v1', 'v2', etc. so`
			`# there isn't any ambiguity.)`
			`print-candidates() {`
			`local true_inputs=$1`
			`local last_true=$2`
			`local num_additional=$3`
			`local to_remove=$4`

			`if test $to_remove = NONE; then`
			`cat $true_inputs # include all true inputs`
			`else`
			`egrep -v $to_remove $true_inputs # remove some true inputs`
			`fi`
			`more-candidates $last_true $num_additional`
			`}`

Add a regression test harness. The test parameters are defined in tests/regtest_spec.py. Basic usage is: $ ./regtest.sh run-all This runs all tests in parallel, and results in an HTML table with results. - Calculate both false positives and false negatives in analyze.R. Refactor the function to be more symmetric. - Refactor demo.sh a bit - In gen_sim_input.py, get rid of hard-coded 7 values per client, and make it a parameter - Change rappor_sim.py to use a -d <dist> flag, rather than separate flags - Add test cases based on Chrome params - Factor out util.sh script 2015-03-17 00:19:39 +03:00			`# Run a single test case, specified by a line of the test spec.`
			`# This is a helper function for 'run-all'.`

			`_run-one-case() {`
			`local test_case_id=$1`

			`# input params`
			`local dist=$2`
			`local num_unique_values=$3`
			`local num_clients=$4`
			`local values_per_client=$5`

			`# RAPPOR params`
			`local num_bits=$6`
			`local num_hashes=$7`
			`local num_cohorts=$8`
			`local p=$9`
			`local q=${10}`
			`local f=${11} # need curly braces to get 10th arg`

			`# map params`
			`local num_additional=${12}`
			`local to_remove=${13}`

			`# NOTE: NUM_SPEC_COLS == 13`

			`local case_dir=$REGTEST_DIR/$test_case_id`
			`mkdir --verbose -p $case_dir`

			`banner "Saving spec"`

			`# The arguments are the test case spec`
			`echo "$@" > $case_dir/spec.txt`

			`banner "Generating input"`

			`tests/gen_sim_input.py \`
			`-d $dist \`
Refactoring of gen_sim_input.py and rappor_sim.py: - Use optparse, so we have auto-generated help, type conversion, and error checking - Get rid of global vars - In gen_sim_input, make flag names match the variable names in regtest.html 2015-04-02 00:34:58 +03:00			`-c $num_clients \`
			`-u $num_unique_values \`
			`-v $values_per_client \`
Add a regression test harness. The test parameters are defined in tests/regtest_spec.py. Basic usage is: $ ./regtest.sh run-all This runs all tests in parallel, and results in an HTML table with results. - Calculate both false positives and false negatives in analyze.R. Refactor the function to be more symmetric. - Refactor demo.sh a bit - In gen_sim_input.py, get rid of hard-coded 7 values per client, and make it a parameter - Change rappor_sim.py to use a -d <dist> flag, rather than separate flags - Add test cases based on Chrome params - Factor out util.sh script 2015-03-17 00:19:39 +03:00			`-o $case_dir/case.csv`

			`banner "Running RAPPOR client"`

			`tests/rappor_sim.py \`
Refactoring of gen_sim_input.py and rappor_sim.py: - Use optparse, so we have auto-generated help, type conversion, and error checking - Get rid of global vars - In gen_sim_input, make flag names match the variable names in regtest.html 2015-04-02 00:34:58 +03:00			`--num-bits $num_bits \`
			`--num-hashes $num_hashes \`
			`--num-cohorts $num_cohorts \`
Add a regression test harness. The test parameters are defined in tests/regtest_spec.py. Basic usage is: $ ./regtest.sh run-all This runs all tests in parallel, and results in an HTML table with results. - Calculate both false positives and false negatives in analyze.R. Refactor the function to be more symmetric. - Refactor demo.sh a bit - In gen_sim_input.py, get rid of hard-coded 7 values per client, and make it a parameter - Change rappor_sim.py to use a -d <dist> flag, rather than separate flags - Add test cases based on Chrome params - Factor out util.sh script 2015-03-17 00:19:39 +03:00			`-p $p \`
			`-q $q \`
			`-f $f \`
			`-i $case_dir/case.csv \`
			`-o $case_dir/out.csv`

			`banner "Constructing candidates"`

			`# Reuse demo.sh function`
Remove duplication by making demo.sh call regtest.sh, and fix lint errors. 2015-04-02 00:09:44 +03:00			`print-candidates \`
Fix bug where print-candidates was duplicating values. We were assuming there were always 50 candidates as in demo.sh, instead of using the test parameters. 2015-03-24 02:43:30 +03:00			`$case_dir/case_true_inputs.txt $num_unique_values \`
			`$num_additional "$to_remove" \`
Add a regression test harness. The test parameters are defined in tests/regtest_spec.py. Basic usage is: $ ./regtest.sh run-all This runs all tests in parallel, and results in an HTML table with results. - Calculate both false positives and false negatives in analyze.R. Refactor the function to be more symmetric. - Refactor demo.sh a bit - In gen_sim_input.py, get rid of hard-coded 7 values per client, and make it a parameter - Change rappor_sim.py to use a -d <dist> flag, rather than separate flags - Add test cases based on Chrome params - Factor out util.sh script 2015-03-17 00:19:39 +03:00			`> $case_dir/case_candidates.txt`

			`banner "Hashing candidates to get 'map'"`

			`analysis/tools/hash_candidates.py \`
			`$case_dir/case_params.csv \`
			`< $case_dir/case_candidates.txt \`
			`> $case_dir/case_map.csv`

			`banner "Summing bits to get 'counts'"`

			`analysis/tools/sum_bits.py \`
			`$case_dir/case_params.csv \`
			`< $case_dir/out.csv \`
			`> $case_dir/case_counts.csv`

			`local out_dir=$REGTEST_DIR/${test_case_id}_report`
			`mkdir --verbose -p $out_dir`

			`# Input prefix, output dir`
			`tests/analyze.R -t "Test case: $test_case_id" "$case_dir/case" $out_dir`
			`}`

			`# Like _run-once-case, but log to a file.`
			`_run-one-case-logged() {`
			`local test_case_id=$1`

			`local case_dir=$REGTEST_DIR/$test_case_id`
			`mkdir --verbose -p $case_dir`

			`log "Started '$test_case_id' -- logging to $case_dir/log.txt"`
			`_run-one-case "$@" >$case_dir/log.txt 2>&1`
			`log "Test case $test_case_id done"`
			`}`

			`show-help() {`
			`tests/gen_sim_input.py \|\| true`
			`tests/rappor_sim.py -h \|\| true`
			`}`

			`make-summary() {`
			`local dir=$1`
Make filename a param 2015-04-02 00:03:04 +03:00			`local filename=${2:-results.html}`
Add a regression test harness. The test parameters are defined in tests/regtest_spec.py. Basic usage is: $ ./regtest.sh run-all This runs all tests in parallel, and results in an HTML table with results. - Calculate both false positives and false negatives in analyze.R. Refactor the function to be more symmetric. - Refactor demo.sh a bit - In gen_sim_input.py, get rid of hard-coded 7 values per client, and make it a parameter - Change rappor_sim.py to use a -d <dist> flag, rather than separate flags - Add test cases based on Chrome params - Factor out util.sh script 2015-03-17 00:19:39 +03:00
			`tests/make_summary.py $dir > $dir/rows.html`

			`pushd $dir >/dev/null`

			`cat ../../tests/regtest.html \`
			`\| sed -e '/TABLE_ROWS/ r rows.html' \`
Make filename a param 2015-04-02 00:03:04 +03:00			`> $filename`
Add a regression test harness. The test parameters are defined in tests/regtest_spec.py. Basic usage is: $ ./regtest.sh run-all This runs all tests in parallel, and results in an HTML table with results. - Calculate both false positives and false negatives in analyze.R. Refactor the function to be more symmetric. - Refactor demo.sh a bit - In gen_sim_input.py, get rid of hard-coded 7 values per client, and make it a parameter - Change rappor_sim.py to use a -d <dist> flag, rather than separate flags - Add test cases based on Chrome params - Factor out util.sh script 2015-03-17 00:19:39 +03:00
			`popd >/dev/null`

Make filename a param 2015-04-02 00:03:04 +03:00			`log "Wrote $dir/$filename"`
			`log "URL: file://$PWD/$dir/$filename"`
Add a regression test harness. The test parameters are defined in tests/regtest_spec.py. Basic usage is: $ ./regtest.sh run-all This runs all tests in parallel, and results in an HTML table with results. - Calculate both false positives and false negatives in analyze.R. Refactor the function to be more symmetric. - Refactor demo.sh a bit - In gen_sim_input.py, get rid of hard-coded 7 values per client, and make it a parameter - Change rappor_sim.py to use a -d <dist> flag, rather than separate flags - Add test cases based on Chrome params - Factor out util.sh script 2015-03-17 00:19:39 +03:00			`}`

			`# Helper to parse spec input with xargs`
			`multi() {`
			`xargs -n $NUM_SPEC_COLS --no-run-if-empty --verbose "$@"`
			`}`

			`test-error() {`
Fix error in test path 2015-04-01 09:33:29 +03:00			`local spec_regex=${1:-}`
			`log "Some test cases failed"`
			`if test -n "$spec_regex"; then`
			`log "(Perhaps none matched pattern '$spec_regex')"`
			`fi`
Add a regression test harness. The test parameters are defined in tests/regtest_spec.py. Basic usage is: $ ./regtest.sh run-all This runs all tests in parallel, and results in an HTML table with results. - Calculate both false positives and false negatives in analyze.R. Refactor the function to be more symmetric. - Refactor demo.sh a bit - In gen_sim_input.py, get rid of hard-coded 7 values per client, and make it a parameter - Change rappor_sim.py to use a -d <dist> flag, rather than separate flags - Add test cases based on Chrome params - Factor out util.sh script 2015-03-17 00:19:39 +03:00			`exit 1`
			`}`

			`# Assuming the spec file, write a list of test case names (first column). This`
			`# is read by make_summary.py.`
			`write-test-cases() {`
			`cut -d ' ' -f 1 $REGTEST_DIR/spec-list.txt > $REGTEST_DIR/test-cases.txt`
			`}`

			`# run-all should take regex?`
			`run-seq() {`
			`local spec_regex=$1 # grep -E format on the spec`
Remove duplication by making demo.sh call regtest.sh, and fix lint errors. 2015-04-02 00:09:44 +03:00			`local html_filename=${2:-results.html} # demo.sh changes it to demo.sh`
Add a regression test harness. The test parameters are defined in tests/regtest_spec.py. Basic usage is: $ ./regtest.sh run-all This runs all tests in parallel, and results in an HTML table with results. - Calculate both false positives and false negatives in analyze.R. Refactor the function to be more symmetric. - Refactor demo.sh a bit - In gen_sim_input.py, get rid of hard-coded 7 values per client, and make it a parameter - Change rappor_sim.py to use a -d <dist> flag, rather than separate flags - Add test cases based on Chrome params - Factor out util.sh script 2015-03-17 00:19:39 +03:00
			`local spec_list=$REGTEST_DIR/spec-list.txt`
			`tests/regtest_spec.py \| grep -E $spec_regex > $spec_list`

			`write-test-cases`

			`cat $spec_list \`
			`\| multi -- $0 _run-one-case \|\| test-error $spec_regex`

			`log "Done running all test cases"`

Remove duplication by making demo.sh call regtest.sh, and fix lint errors. 2015-04-02 00:09:44 +03:00			`make-summary $REGTEST_DIR $html_filename`
Add a regression test harness. The test parameters are defined in tests/regtest_spec.py. Basic usage is: $ ./regtest.sh run-all This runs all tests in parallel, and results in an HTML table with results. - Calculate both false positives and false negatives in analyze.R. Refactor the function to be more symmetric. - Refactor demo.sh a bit - In gen_sim_input.py, get rid of hard-coded 7 values per client, and make it a parameter - Change rappor_sim.py to use a -d <dist> flag, rather than separate flags - Add test cases based on Chrome params - Factor out util.sh script 2015-03-17 00:19:39 +03:00			`}`

			`run-all() {`
			`# Limit it to this number of test cases. By default we run all of them.`
			`local max_cases=${1:-1000000}`
			`local verbose=${2:-F}`

			`mkdir --verbose -p $REGTEST_DIR`
			`# Print the spec`
			`#`
			`# -n3 has to match the number of arguments in the spec.`

			`#local func=_run-one-case-logged`
			`local func`
			`if test $verbose = T; then`
			`func=_run-one-case # parallel process output mixed on the console`
			`else`
			`func=_run-one-case-logged # one line`
			`fi`

			`log "Using $NUM_PROCS parallel processes"`

			`local spec_list=$REGTEST_DIR/spec-list.txt`
			`tests/regtest_spec.py > $spec_list`

			`write-test-cases`

			`head -n $max_cases $spec_list \`
Fix error in test path 2015-04-01 09:33:29 +03:00			`\| multi -P $NUM_PROCS -- $0 $func \|\| test-error`
Add a regression test harness. The test parameters are defined in tests/regtest_spec.py. Basic usage is: $ ./regtest.sh run-all This runs all tests in parallel, and results in an HTML table with results. - Calculate both false positives and false negatives in analyze.R. Refactor the function to be more symmetric. - Refactor demo.sh a bit - In gen_sim_input.py, get rid of hard-coded 7 values per client, and make it a parameter - Change rappor_sim.py to use a -d <dist> flag, rather than separate flags - Add test cases based on Chrome params - Factor out util.sh script 2015-03-17 00:19:39 +03:00
			`log "Done running all test cases"`

			`make-summary $REGTEST_DIR`
			`}`

			`"$@"`