2015-03-17 00:19:39 +03:00
|
|
|
#!/bin/bash
|
2016-02-03 23:06:03 +03:00
|
|
|
usage() {
|
|
|
|
echo "
|
|
|
|
Run end-to-end tests in parallel.
|
|
|
|
|
|
|
|
Usage:
|
|
|
|
./regtest.sh <function name>
|
|
|
|
At the end, it will print an HTML summary.
|
|
|
|
|
|
|
|
Three main functions are
|
2016-02-04 00:48:45 +03:00
|
|
|
run [<pattern> [<lang>]] - run tests matching <pattern> in
|
|
|
|
parallel, each <num> times. The language
|
|
|
|
of the client to use.
|
|
|
|
run-seq [<pattern> [<lang>]] - ditto, except that tests are run
|
2016-02-03 23:06:03 +03:00
|
|
|
sequentially
|
2016-02-04 00:48:45 +03:00
|
|
|
run-all - run all tests, in parallel, each <num> times
|
2016-02-03 23:06:03 +03:00
|
|
|
|
|
|
|
Examples:
|
2016-02-04 00:48:45 +03:00
|
|
|
$ ./regtest.sh run-seq unif-small-typical # Run, the unif-small-typical test
|
|
|
|
$ ./regtest.sh run-seq unif-small- # Sequential, the tests containing:
|
|
|
|
# 'unif-small-'
|
2016-02-03 23:06:03 +03:00
|
|
|
$ ./regtest.sh run unif- # Parallel run, matches multiple cases
|
2016-02-04 00:48:45 +03:00
|
|
|
$ ./regtest.sh run-all # Run all tests
|
2016-02-03 23:06:03 +03:00
|
|
|
|
|
|
|
The <pattern> argument is a regex in 'grep -E' format. (Detail: Don't
|
|
|
|
use $ in the pattern, since it matches the whole spec line and not just the
|
|
|
|
test case name.) The number of processors used in a parallel run is one less
|
|
|
|
than the number of CPUs on the machine.
|
|
|
|
|
|
|
|
Future speedups:
|
|
|
|
- Reuse the same input -- come up with naming scheme based on params
|
|
|
|
- Reuse the same maps -- ditto, rappor library can cache it
|
|
|
|
"
|
|
|
|
}
|
2015-03-17 00:19:39 +03:00
|
|
|
|
|
|
|
set -o nounset
|
|
|
|
set -o pipefail
|
|
|
|
set -o errexit
|
|
|
|
|
|
|
|
. util.sh
|
|
|
|
|
|
|
|
readonly THIS_DIR=$(dirname $0)
|
|
|
|
readonly REPO_ROOT=$THIS_DIR
|
|
|
|
readonly CLIENT_DIR=$REPO_ROOT/client/python
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
# subdirs are in _tmp/$impl, which shouldn't overlap with anything else in _tmp
|
|
|
|
readonly REGTEST_BASE_DIR=_tmp
|
2015-03-17 00:19:39 +03:00
|
|
|
|
|
|
|
# All the Python tools need this
|
|
|
|
export PYTHONPATH=$CLIENT_DIR
|
|
|
|
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
print-unique-values() {
|
2015-04-02 02:00:35 +03:00
|
|
|
local num_unique_values=$1
|
|
|
|
seq 1 $num_unique_values | awk '{print "v" $1}'
|
|
|
|
}
|
2015-03-17 00:19:39 +03:00
|
|
|
|
2015-04-02 00:09:44 +03:00
|
|
|
# Add some more candidates here. We hope these are estimated at 0.
|
|
|
|
# e.g. if add_start=51, and num_additional is 20, show v51-v70
|
|
|
|
more-candidates() {
|
|
|
|
local last_true=$1
|
|
|
|
local num_additional=$2
|
|
|
|
|
|
|
|
local begin
|
|
|
|
local end
|
|
|
|
begin=$(expr $last_true + 1)
|
|
|
|
end=$(expr $last_true + $num_additional)
|
|
|
|
|
|
|
|
seq $begin $end | awk '{print "v" $1}'
|
|
|
|
}
|
|
|
|
|
|
|
|
# Args:
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
# unique_values: File of unique true values
|
2015-04-02 00:09:44 +03:00
|
|
|
# last_true: last true input, e.g. 50 if we generated "v1" .. "v50".
|
|
|
|
# num_additional: additional candidates to generate (starting at 'last_true')
|
|
|
|
# to_remove: Regex of true values to omit from the candidates list, or the
|
|
|
|
# string 'NONE' if none should be. (Our values look like 'v1', 'v2', etc. so
|
|
|
|
# there isn't any ambiguity.)
|
|
|
|
print-candidates() {
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
local unique_values=$1
|
2015-04-02 00:09:44 +03:00
|
|
|
local last_true=$2
|
|
|
|
local num_additional=$3
|
|
|
|
local to_remove=$4
|
|
|
|
|
|
|
|
if test $to_remove = NONE; then
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
cat $unique_values # include all true inputs
|
2015-04-02 00:09:44 +03:00
|
|
|
else
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
egrep -v $to_remove $unique_values # remove some true inputs
|
2015-04-02 00:09:44 +03:00
|
|
|
fi
|
|
|
|
more-candidates $last_true $num_additional
|
|
|
|
}
|
|
|
|
|
2015-04-15 02:01:32 +03:00
|
|
|
# Generate a single test case, specified by a line of the test spec.
|
2015-04-17 10:23:14 +03:00
|
|
|
# This is a helper function for _run_tests().
|
|
|
|
_setup-one-case() {
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
local impl=$1
|
|
|
|
shift # impl is not part of the spec; the next 13 params are
|
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
local test_case=$1
|
2015-03-17 00:19:39 +03:00
|
|
|
|
|
|
|
# input params
|
2015-04-17 10:23:14 +03:00
|
|
|
local dist=$2
|
|
|
|
local num_unique_values=$3
|
|
|
|
local num_clients=$4
|
|
|
|
local values_per_client=$5
|
2015-03-17 00:19:39 +03:00
|
|
|
|
|
|
|
# RAPPOR params
|
2015-04-17 10:23:14 +03:00
|
|
|
local num_bits=$6
|
|
|
|
local num_hashes=$7
|
|
|
|
local num_cohorts=$8
|
|
|
|
local p=$9
|
|
|
|
local q=${10} # need curly braces to get the 10th arg
|
|
|
|
local f=${11}
|
2015-03-17 00:19:39 +03:00
|
|
|
|
|
|
|
# map params
|
2015-04-17 10:23:14 +03:00
|
|
|
local num_additional=${12}
|
|
|
|
local to_remove=${13}
|
2015-03-17 00:19:39 +03:00
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
banner 'Setting up parameters and candidate files for '$test_case
|
2015-03-17 00:19:39 +03:00
|
|
|
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
local case_dir=$REGTEST_BASE_DIR/$impl/$test_case
|
2015-04-17 10:23:14 +03:00
|
|
|
mkdir --verbose -p $case_dir
|
2015-03-17 00:19:39 +03:00
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
# Save the "spec"
|
|
|
|
echo "$@" > $case_dir/spec.txt
|
2015-03-17 00:19:39 +03:00
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
local params_path=$case_dir/case_params.csv
|
2015-04-02 02:00:35 +03:00
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
echo 'k,h,m,p,q,f' > $params_path
|
|
|
|
echo "$num_bits,$num_hashes,$num_cohorts,$p,$q,$f" >> $params_path
|
2015-04-15 02:01:32 +03:00
|
|
|
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
print-unique-values $num_unique_values > $case_dir/case_unique_values.txt
|
2015-04-02 02:00:35 +03:00
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
local true_map_path=$case_dir/case_true_map.csv
|
2015-04-02 02:00:35 +03:00
|
|
|
|
2015-11-10 07:30:30 +03:00
|
|
|
bin/hash_candidates.py \
|
2015-04-17 10:23:14 +03:00
|
|
|
$params_path \
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
< $case_dir/case_unique_values.txt \
|
2015-04-17 10:23:14 +03:00
|
|
|
> $true_map_path
|
2015-04-15 02:01:32 +03:00
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
# banner "Constructing candidates"
|
2015-04-02 02:00:35 +03:00
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
print-candidates \
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
$case_dir/case_unique_values.txt $num_unique_values \
|
2015-04-17 10:23:14 +03:00
|
|
|
$num_additional "$to_remove" \
|
|
|
|
> $case_dir/case_candidates.txt
|
2015-04-15 02:01:32 +03:00
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
# banner "Hashing candidates to get 'map'"
|
2015-04-15 02:01:32 +03:00
|
|
|
|
2015-11-10 07:30:30 +03:00
|
|
|
bin/hash_candidates.py \
|
2015-05-28 09:31:32 +03:00
|
|
|
$params_path \
|
2015-04-17 10:23:14 +03:00
|
|
|
< $case_dir/case_candidates.txt \
|
|
|
|
> $case_dir/case_map.csv
|
2015-04-15 02:01:32 +03:00
|
|
|
}
|
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
# Run a single test instance, specified by <test_name, instance_num>.
|
|
|
|
# This is a helper function for _run_tests().
|
2015-04-15 02:01:32 +03:00
|
|
|
_run-one-instance() {
|
2015-04-17 10:23:14 +03:00
|
|
|
local test_case=$1
|
|
|
|
local test_instance=$2
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
local impl=$3
|
2015-04-15 02:01:32 +03:00
|
|
|
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
local case_dir=$REGTEST_BASE_DIR/$impl/$test_case
|
2015-04-17 10:23:14 +03:00
|
|
|
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
read -r \
|
|
|
|
case_name distr num_unique_values num_clients values_per_client \
|
|
|
|
num_bits num_hashes num_cohorts p q f \
|
|
|
|
num_additional to_remove \
|
|
|
|
< $case_dir/spec.txt
|
2015-04-15 02:01:32 +03:00
|
|
|
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
local instance_dir=$case_dir/$test_instance
|
2015-04-15 02:01:32 +03:00
|
|
|
mkdir --verbose -p $instance_dir
|
|
|
|
|
2016-02-04 00:48:45 +03:00
|
|
|
banner "Generating reports (gen_reports.R)"
|
|
|
|
|
|
|
|
# the TRUE_VALUES_PATH environment variable can be used to avoid
|
|
|
|
# generating new values every time. NOTE: You are responsible for making
|
|
|
|
# sure the params match!
|
|
|
|
|
|
|
|
local true_values=${TRUE_VALUES_PATH:-}
|
|
|
|
if test -z "$true_values"; then
|
|
|
|
true_values=$instance_dir/case_true_values.csv
|
|
|
|
tests/gen_true_values.R $distr $num_unique_values $num_clients \
|
|
|
|
$values_per_client $num_cohorts \
|
|
|
|
$true_values
|
|
|
|
else
|
|
|
|
# TEMP hack: Make it visible to plot.
|
|
|
|
# TODO: Fix compare_dist.R
|
|
|
|
ln -s -f --verbose \
|
|
|
|
$PWD/$true_values \
|
|
|
|
$instance_dir/case_true_values.csv
|
|
|
|
fi
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
|
|
|
|
case $impl in
|
2016-02-04 00:48:45 +03:00
|
|
|
python)
|
|
|
|
banner "Running RAPPOR Python client"
|
|
|
|
|
|
|
|
# Writes encoded "out" file, true histogram, true inputs to
|
|
|
|
# $instance_dir.
|
|
|
|
time tests/rappor_sim.py \
|
|
|
|
--num-bits $num_bits \
|
|
|
|
--num-hashes $num_hashes \
|
|
|
|
--num-cohorts $num_cohorts \
|
|
|
|
-p $p \
|
|
|
|
-q $q \
|
|
|
|
-f $f \
|
|
|
|
< $true_values \
|
|
|
|
> "$instance_dir/case_reports.csv"
|
|
|
|
;;
|
|
|
|
|
|
|
|
cpp)
|
|
|
|
banner "Running RAPPOR C++ client (see rappor_sim.log for errors)"
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
|
2016-02-04 00:48:45 +03:00
|
|
|
time client/cpp/_tmp/rappor_sim \
|
|
|
|
$num_bits \
|
|
|
|
$num_hashes \
|
|
|
|
$num_cohorts \
|
|
|
|
$p \
|
|
|
|
$q \
|
|
|
|
$f \
|
|
|
|
< $true_values \
|
|
|
|
> "$instance_dir/case_reports.csv" \
|
|
|
|
2>"$instance_dir/rappor_sim.log"
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
|
|
|
|
;;
|
|
|
|
|
|
|
|
*)
|
2016-02-04 00:48:45 +03:00
|
|
|
log "Invalid impl $impl (should be one of python|cpp)"
|
|
|
|
exit 1
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
;;
|
|
|
|
esac
|
2015-03-17 00:19:39 +03:00
|
|
|
|
2016-02-04 00:48:45 +03:00
|
|
|
banner "Summing RAPPOR IRR bits to get 'counts'"
|
|
|
|
|
|
|
|
bin/sum_bits.py \
|
|
|
|
$case_dir/case_params.csv \
|
|
|
|
< $instance_dir/case_reports.csv \
|
|
|
|
> $instance_dir/case_counts.csv
|
|
|
|
;;
|
|
|
|
|
2015-04-15 02:01:32 +03:00
|
|
|
local out_dir=${instance_dir}_report
|
2015-03-17 00:19:39 +03:00
|
|
|
mkdir --verbose -p $out_dir
|
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
# Currently, the summary file shows and aggregates timing of the inference
|
|
|
|
# engine, which excludes R's loading time and reading of the (possibly
|
|
|
|
# substantial) map file. Timing below is more inclusive.
|
2015-07-08 01:34:56 +03:00
|
|
|
TIMEFORMAT='Running compare_dist.R took %R seconds'
|
2015-04-15 02:01:32 +03:00
|
|
|
time {
|
|
|
|
# Input prefix, output dir
|
2015-07-08 01:34:56 +03:00
|
|
|
tests/compare_dist.R -t "Test case: $test_case (instance $test_instance)" \
|
|
|
|
"$case_dir/case" "$instance_dir/case" $out_dir
|
2015-04-15 02:01:32 +03:00
|
|
|
}
|
2015-03-17 00:19:39 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
# Like _run-once-case, but log to a file.
|
2015-04-15 02:01:32 +03:00
|
|
|
_run-one-instance-logged() {
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
local test_case=$1
|
|
|
|
local test_instance=$2
|
|
|
|
local impl=$3
|
2015-03-17 00:19:39 +03:00
|
|
|
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
local log_dir=$REGTEST_BASE_DIR/$impl/$test_case/${test_instance}_report
|
2015-04-15 02:01:32 +03:00
|
|
|
mkdir --verbose -p $log_dir
|
2015-03-17 00:19:39 +03:00
|
|
|
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
log "Started '$test_case' (instance $test_instance) -- logging to $log_dir/log.txt"
|
2015-04-17 10:23:14 +03:00
|
|
|
_run-one-instance "$@" >$log_dir/log.txt 2>&1 \
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
&& log "Test case $test_case (instance $test_instance) done" \
|
|
|
|
|| log "Test case $test_case (instance $test_instance) failed"
|
2015-03-17 00:19:39 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
make-summary() {
|
|
|
|
local dir=$1
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
local impl=$2
|
|
|
|
|
|
|
|
local filename=results.html
|
2015-03-17 00:19:39 +03:00
|
|
|
|
2015-05-05 05:09:56 +03:00
|
|
|
tests/make_summary.py $dir $dir/rows.html
|
2015-03-17 00:19:39 +03:00
|
|
|
|
|
|
|
pushd $dir >/dev/null
|
|
|
|
|
|
|
|
cat ../../tests/regtest.html \
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
| sed -e '/__TABLE_ROWS__/ r rows.html' -e "s/_IMPL_/$impl/g" \
|
2015-04-02 00:03:04 +03:00
|
|
|
> $filename
|
2015-03-17 00:19:39 +03:00
|
|
|
|
|
|
|
popd >/dev/null
|
|
|
|
|
2015-04-02 00:03:04 +03:00
|
|
|
log "Wrote $dir/$filename"
|
|
|
|
log "URL: file://$PWD/$dir/$filename"
|
2015-03-17 00:19:39 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
test-error() {
|
2015-04-01 09:33:29 +03:00
|
|
|
local spec_regex=${1:-}
|
|
|
|
log "Some test cases failed"
|
|
|
|
if test -n "$spec_regex"; then
|
|
|
|
log "(Perhaps none matched pattern '$spec_regex')"
|
|
|
|
fi
|
2015-04-15 02:01:32 +03:00
|
|
|
# don't quit just yet
|
|
|
|
# exit 1
|
2015-03-17 00:19:39 +03:00
|
|
|
}
|
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
# Assuming the spec file, write a list of test case names (first column) with
|
|
|
|
# the instance ids (second column), where instance ids run from 1 to $1.
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
# Third column is impl.
|
2015-04-17 10:23:14 +03:00
|
|
|
_setup-test-instances() {
|
|
|
|
local instances=$1
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
local impl=$2
|
2015-04-17 10:23:14 +03:00
|
|
|
|
|
|
|
while read line; do
|
2015-04-18 00:23:20 +03:00
|
|
|
for i in $(seq 1 $instances); do
|
2015-04-17 10:23:14 +03:00
|
|
|
read case_name _ <<< $line # extract the first token
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
echo $case_name $i $impl
|
2015-04-17 10:23:14 +03:00
|
|
|
done
|
|
|
|
done
|
2015-03-17 00:19:39 +03:00
|
|
|
}
|
|
|
|
|
2015-05-28 09:31:32 +03:00
|
|
|
# Print the default number of parallel processes, which is max(#CPUs - 1, 1)
|
|
|
|
default-processes() {
|
|
|
|
processors=$(grep -c ^processor /proc/cpuinfo || echo 4) # Linux-specific
|
|
|
|
if test $processors -gt 1; then # leave one CPU for the OS
|
|
|
|
processors=$(expr $processors - 1)
|
|
|
|
fi
|
|
|
|
echo $processors
|
|
|
|
}
|
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
# Args:
|
2015-05-28 09:31:32 +03:00
|
|
|
# spec_gen: A program to execute to generate the spec.
|
|
|
|
# spec_regex: A pattern selecting the subset of tests to run
|
|
|
|
# parallel: Whether the tests are run in parallel (T/F). Sequential
|
|
|
|
# runs log to the console; parallel runs log to files.
|
2016-02-04 00:48:45 +03:00
|
|
|
# impl: one of python, or cpp
|
2015-05-28 09:31:32 +03:00
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
_run-tests() {
|
2015-05-28 09:31:32 +03:00
|
|
|
local spec_gen=$1
|
|
|
|
local spec_regex="$2" # grep -E format on the spec, can be empty
|
2016-02-04 00:48:45 +03:00
|
|
|
local parallel=$3
|
|
|
|
local impl=$4
|
|
|
|
local instances=1
|
2015-03-17 00:19:39 +03:00
|
|
|
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
local regtest_dir=$REGTEST_BASE_DIR/$impl
|
|
|
|
rm -r -f --verbose $regtest_dir
|
2015-04-17 10:23:14 +03:00
|
|
|
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
mkdir --verbose -p $regtest_dir
|
2015-04-02 02:00:35 +03:00
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
local func
|
2015-05-28 09:31:32 +03:00
|
|
|
local processors
|
2015-03-17 00:19:39 +03:00
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
if test $parallel = F; then
|
|
|
|
func=_run-one-instance # output to the console
|
2015-05-28 09:31:32 +03:00
|
|
|
processors=1
|
2015-04-17 10:23:14 +03:00
|
|
|
else
|
|
|
|
func=_run-one-instance-logged
|
2015-05-28 09:31:32 +03:00
|
|
|
# Let the user override with MAX_PROC, in case they don't have enough
|
|
|
|
# memory.
|
|
|
|
processors=${MAX_PROC:-$(default-processes)}
|
2015-04-17 10:23:14 +03:00
|
|
|
log "Running $processors parallel processes"
|
|
|
|
fi
|
2015-03-17 00:19:39 +03:00
|
|
|
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
local cases_list=$regtest_dir/test-cases.txt
|
2015-05-28 09:31:32 +03:00
|
|
|
# Need -- for regexes that start with -
|
|
|
|
$spec_gen | grep -E -- "$spec_regex" > $cases_list
|
2015-04-15 02:01:32 +03:00
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
# Generate parameters for all test cases.
|
|
|
|
cat $cases_list \
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
| xargs -l -P $processors -- $0 _setup-one-case $impl \
|
2015-04-17 10:23:14 +03:00
|
|
|
|| test-error
|
2015-03-17 00:19:39 +03:00
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
log "Done generating parameters for all test cases"
|
2015-03-17 00:19:39 +03:00
|
|
|
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
local instances_list=$regtest_dir/test-instances.txt
|
|
|
|
_setup-test-instances $instances $impl < $cases_list > $instances_list
|
2015-03-17 00:19:39 +03:00
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
cat $instances_list \
|
|
|
|
| xargs -l -P $processors -- $0 $func || test-error
|
2015-03-17 00:19:39 +03:00
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
log "Done running all test instances"
|
2015-03-17 00:19:39 +03:00
|
|
|
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
make-summary $regtest_dir $impl
|
2015-04-17 10:23:14 +03:00
|
|
|
}
|
2015-03-17 00:19:39 +03:00
|
|
|
|
2015-05-28 09:31:32 +03:00
|
|
|
# used for most tests
|
|
|
|
readonly REGTEST_SPEC=tests/regtest_spec.py
|
|
|
|
|
|
|
|
# Run tests sequentially. NOTE: called by demo.sh.
|
2015-04-17 10:23:14 +03:00
|
|
|
run-seq() {
|
|
|
|
local spec_regex=${1:-'^r-'} # grep -E format on the spec
|
2016-02-04 00:48:45 +03:00
|
|
|
local impl=$2
|
2015-04-15 02:01:32 +03:00
|
|
|
|
2016-02-04 00:48:45 +03:00
|
|
|
time _run-tests $REGTEST_SPEC $spec_regex F $impl
|
2015-04-17 10:23:14 +03:00
|
|
|
}
|
2015-04-15 02:01:32 +03:00
|
|
|
|
2015-04-17 10:23:14 +03:00
|
|
|
# Run tests in parallel
|
|
|
|
run() {
|
|
|
|
local spec_regex=${1:-'^r-'} # grep -E format on the spec
|
2016-02-04 00:48:45 +03:00
|
|
|
local impl=$3
|
2015-04-17 10:23:14 +03:00
|
|
|
|
2016-02-04 00:48:45 +03:00
|
|
|
time _run-tests $REGTEST_SPEC $spec_regex T $impl
|
2015-04-17 10:23:14 +03:00
|
|
|
}
|
2015-03-17 00:19:39 +03:00
|
|
|
|
2015-05-28 09:31:32 +03:00
|
|
|
# Run tests in parallel (7+ minutes on 8 cores)
|
2015-04-17 10:23:14 +03:00
|
|
|
run-all() {
|
|
|
|
log "Running all tests. Can take a while."
|
2016-02-04 00:48:45 +03:00
|
|
|
time _run-tests $REGTEST_SPEC '^r-' T cpp
|
2015-05-28 09:31:32 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
run-user() {
|
|
|
|
local spec_regex=${1:-}
|
|
|
|
local parallel=T # too much memory
|
2016-02-04 00:48:45 +03:00
|
|
|
time _run-tests tests/user_spec.py "$spec_regex" $parallel cpp
|
Make the Python simulation into a Python client library.
- It can be used in applications, and follows the same choices as the
C++ (and Java) client libraries: MD5 to choose the bloom filter bits,
and HMAC-SHA256 with client secret for the PRR. (rappor.py)
- hash_candidates.py and rappor_sim.py use the rappor.get_bloom_bits
API, which is more efficient since it calls the hash function once
instead of 'h' times.
- Make the randomness interface a simpler "IrrRand" interface, which has
2 implementations: pure Python and C. (The PRR always uses the same
randomness.)
- regtest.sh: Instead of a boolean for 'fast_counts', we have 3 modes:
Python client, C++ client, and 'fast_counts'. The same tests can be
run against each implementation. The results are put in different
directories, so they don't overwite each other.
- Rename gen_reports.R to gen_true_values.R (since the "reports" are
typically the data transformed by RAPPOR).
- Change it to output the client and assign the cohort like this:
client,cohort,value
c1,1,v26
c1,1,v27
c2,2,v8
c2,2,v9
The cohort is stable per client, but the value is different.
- Add unit test
- tests/rappor_sim.py: New test protocol. Read true_values CSV input.
The CSV output contains the bloom filter, PRR, and IRR for easier
debugging.
- compare_dist.R loads the true values and computes the histograms,
rather than rappor_sim.py having to do it. This makes it easier to
use the test harness with C++ and other clients.
- demo.sh: quickly run equivalent tests for each mode.
- regtest.sh: show the implementation tested
2015-07-14 01:56:28 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
# Use stable true values
|
|
|
|
compare-python-cpp() {
|
|
|
|
local num_unique_values=100
|
|
|
|
local num_clients=10000
|
|
|
|
local values_per_client=10
|
|
|
|
local num_cohorts=64
|
|
|
|
|
|
|
|
local true_values=$REGTEST_BASE_DIR/stable_true_values.csv
|
|
|
|
|
|
|
|
tests/gen_true_values.R \
|
|
|
|
exp $num_unique_values $num_clients $values_per_client $num_cohorts \
|
|
|
|
$true_values
|
|
|
|
|
|
|
|
wc -l $true_values
|
|
|
|
|
|
|
|
# Run Python and C++ simulation on the same input
|
|
|
|
|
|
|
|
./build.sh cpp-client
|
|
|
|
|
|
|
|
TRUE_VALUES_PATH=$true_values \
|
|
|
|
./regtest.sh run-seq '^demo3' 1 python
|
|
|
|
|
|
|
|
TRUE_VALUES_PATH=$true_values \
|
|
|
|
./regtest.sh run-seq '^demo3' 1 cpp
|
|
|
|
|
|
|
|
head _tmp/{python,cpp}/demo3/1/case_reports.csv
|
2015-03-17 00:19:39 +03:00
|
|
|
}
|
|
|
|
|
2016-02-03 23:06:03 +03:00
|
|
|
if [ $# -eq 0 ]
|
|
|
|
then
|
|
|
|
usage
|
|
|
|
else
|
|
|
|
"$@"
|
|
|
|
fi
|