зеркало из https://github.com/mozilla/rappor.git
Multiprocess in RAPPOR, and write data needed to generate plot to a csv file
This commit is contained in:
Родитель
ea46c460af
Коммит
32f7890c49
|
@ -26,6 +26,7 @@ import json
|
|||
import struct
|
||||
import sys
|
||||
import os
|
||||
import math
|
||||
|
||||
from random import SystemRandom
|
||||
from hmac_drbg import HMAC_DRBG
|
||||
|
@ -198,8 +199,15 @@ def get_prr_masks(secret, word, prob_f, num_bits):
|
|||
#log('word %s, secret %s, HMAC-SHA256 %s', word, secret, h.hexdigest())
|
||||
|
||||
# Now go through each byte
|
||||
digest_bytes = h.generate(num_bits)
|
||||
assert len(digest_bytes) == num_bits
|
||||
digest_bytes = []
|
||||
iters = int(math.ceil(num_bits / 900.))
|
||||
for i in range(iters):
|
||||
if i == iters - 1:
|
||||
digest_bytes += h.generate(num_bits % 900)
|
||||
else:
|
||||
digest_bytes += h.generate(900)
|
||||
|
||||
#assert len(digest_bytes) == num_bits
|
||||
|
||||
# Use 32 bits. If we want 64 bits, it may be fine to generate another 32
|
||||
# bytes by repeated HMAC. For arbitrary numbers of bytes it's probably
|
||||
|
|
|
@ -167,7 +167,7 @@ _run-one-instance() {
|
|||
< $case_dir/spec.txt
|
||||
|
||||
local instance_dir=$case_dir/$test_instance
|
||||
mkdir -p $instance_dir
|
||||
#mkdir -p $instance_dir
|
||||
|
||||
banner "Generating reports (gen_reports.R)"
|
||||
|
||||
|
|
|
@ -236,6 +236,12 @@ WriteSummary <- function(metrics, outdir) {
|
|||
Log('Wrote %s', filename)
|
||||
}
|
||||
|
||||
WritePlotData <- function(plot_data, outdir) {
|
||||
filename <- file.path(outdir, 'plot_data.csv')
|
||||
write.csv(plot_data, file = filename, row.names = FALSE)
|
||||
Log('Wrote %s', filename)
|
||||
}
|
||||
|
||||
main <- function(parsed) {
|
||||
args <- parsed$args
|
||||
options <- parsed$options
|
||||
|
@ -261,6 +267,8 @@ main <- function(parsed) {
|
|||
|
||||
WriteSummary(d$metrics, output_dir)
|
||||
WritePlot(p, output_dir)
|
||||
WritePlotData(d$plot_data, output_dir)
|
||||
|
||||
}
|
||||
|
||||
if (is_main) {
|
||||
|
|
|
@ -35,7 +35,7 @@ import os
|
|||
import random
|
||||
import sys
|
||||
import time
|
||||
|
||||
from multiprocessing import Pool, Lock
|
||||
import rappor # client library
|
||||
try:
|
||||
import fastrand
|
||||
|
@ -44,7 +44,6 @@ except ImportError:
|
|||
"Native fastrand module not imported; see README for speedups")
|
||||
fastrand = None
|
||||
|
||||
|
||||
def log(msg, *args):
|
||||
if args:
|
||||
msg = msg % args
|
||||
|
@ -143,7 +142,7 @@ def GenAssocTestdata(params1, params2, irr_rand, assoc_testdata_count,
|
|||
irr2_str = rappor.bit_string(irr2, params2.num_bloombits)
|
||||
|
||||
csv_out.writerow((client_str, cohort, irr1_str, irr2_str))
|
||||
|
||||
print "NEVER"
|
||||
report_index += 1
|
||||
|
||||
|
||||
|
@ -155,7 +154,10 @@ def RapporClientSim(params, irr_rand, csv_in, csv_out, max_lines):
|
|||
# TODO: It would be more instructive/efficient to construct an encoder
|
||||
# instance up front per client, rather than one per row below.
|
||||
start_time = time.time()
|
||||
|
||||
lock = Lock()
|
||||
poolsize = 8
|
||||
pool = Pool(processes=poolsize, initializer=init, initargs=(lock,))
|
||||
args = []
|
||||
for i, (client_str, cohort_str, true_value) in enumerate(csv_in):
|
||||
if i > max_lines:
|
||||
break
|
||||
|
@ -178,6 +180,22 @@ def RapporClientSim(params, irr_rand, csv_in, csv_out, max_lines):
|
|||
|
||||
cohort = int(cohort_str)
|
||||
secret = client_str
|
||||
args.append((params, cohort, secret, irr_rand, true_value))
|
||||
|
||||
rows = pool.map(create_report, args)
|
||||
for r in rows:
|
||||
csv_out.writerow(r)
|
||||
|
||||
|
||||
|
||||
|
||||
def create_report(input):
|
||||
params = input[0]
|
||||
cohort = input[1]
|
||||
secret = input[2]
|
||||
irr_rand = input[3]
|
||||
true_value = input[4]
|
||||
#csv_out = input[4]
|
||||
e = rappor.Encoder(params, cohort, secret, irr_rand)
|
||||
|
||||
# Real users should call e.encode(). For testing purposes, we also want
|
||||
|
@ -188,10 +206,19 @@ def RapporClientSim(params, irr_rand, csv_in, csv_out, max_lines):
|
|||
prr_str = rappor.bit_string(prr, params.num_bloombits)
|
||||
irr_str = rappor.bit_string(irr, params.num_bloombits)
|
||||
|
||||
out_row = (client_str, cohort_str, bloom_str, prr_str, irr_str)
|
||||
csv_out.writerow(out_row)
|
||||
#with lock:
|
||||
# out_row = (secret, str(cohort), bloom_str, prr_str, irr_str)
|
||||
# csv_out.writerow(out_row)
|
||||
out_row = (secret, str(cohort), bloom_str, prr_str, irr_str)
|
||||
return out_row
|
||||
|
||||
#return out_row
|
||||
|
||||
|
||||
def init(l):
|
||||
global lock
|
||||
lock = l
|
||||
|
||||
def main(argv):
|
||||
(opts, argv) = CreateOptionsParser().parse_args(argv)
|
||||
|
||||
|
@ -212,7 +239,7 @@ def main(argv):
|
|||
if opts.random_mode == 'simple':
|
||||
irr_rand = rappor.SecureIrrRand(params)
|
||||
elif opts.random_mode == 'fast':
|
||||
if fastrand:
|
||||
if False:
|
||||
log('Using fastrand extension')
|
||||
# NOTE: This doesn't take 'rand'. It's seeded in C with srand().
|
||||
irr_rand = fastrand.FastIrrRand(params)
|
||||
|
@ -228,6 +255,7 @@ def main(argv):
|
|||
# - or srand(0) might do it.
|
||||
|
||||
csv_in = csv.reader(sys.stdin)
|
||||
global csv_out
|
||||
csv_out = csv.writer(sys.stdout)
|
||||
|
||||
if opts.assoc_testdata:
|
||||
|
|
|
@ -35,11 +35,13 @@ DISTRIBUTIONS = (
|
|||
|
||||
DISTRIBUTION_PARAMS = (
|
||||
# name, num unique values, num clients, values per client
|
||||
('tiny', 100, 1000, 1), # test for insufficient data
|
||||
('tiny', 100, 10000, 1), # test for insufficient data
|
||||
('small', 100, 1000000, 1),
|
||||
('medium', 1000, 10000000, 1),
|
||||
('medium2', 100, 10000000, 1),
|
||||
('medium3', 10000, 10000000, 1),
|
||||
('large', 10000, 100000000, 1),
|
||||
|
||||
# Params for testing how varying the number of clients affects the results
|
||||
('clients1', 100, 10000000, 1),
|
||||
('clients2', 100, 1000000, 1),
|
||||
|
@ -74,6 +76,9 @@ DISTRIBUTION_PARAMS = (
|
|||
('unique16', 1000, 10000000, 1),
|
||||
('unique17', 2000, 10000000, 1),
|
||||
('unique18', 5000, 10000000, 1),
|
||||
|
||||
('cohort', 10000, 10000000, 1),
|
||||
|
||||
)
|
||||
|
||||
# 'k, h, m' as in params file.
|
||||
|
@ -83,9 +88,10 @@ BLOOMFILTER_PARAMS = {
|
|||
'8x128x2': (8, 2, 128), # 128 cohorts, 8 bits each, 2 bits set in each
|
||||
'128x8x2': (128, 2, 8), # 8 cohorts, 128 bits each, 2 bits set in each
|
||||
'32x64x1': (32, 1, 64), # 64 cohorts, 32 bit each, 1 bits set in each
|
||||
'32x2x1': (32, 1, 2), # 1 cohort, 32 bit each, 1 bits set in each
|
||||
'32x2x1': (32, 1, 2), # 2 cohort, 32 bit each, 1 bits set in each
|
||||
'32x64x2': (32, 2, 64), # 64 cohorts, 32 bit each, 1 bits set in each
|
||||
|
||||
'10000x200x2': (128, 2, 100), # 64 cohorts, 32 bit each, 1 bits set in each
|
||||
|
||||
# params for testing the size of the bloom filter
|
||||
'4x32x2': (4, 2, 32), # 32 cohorts, 4 bits each, 2 bits set in each
|
||||
|
@ -113,27 +119,33 @@ BLOOMFILTER_PARAMS = {
|
|||
'8x256x2': (8, 2, 256),
|
||||
|
||||
# with different number of hash functions
|
||||
'4x32x4': (4, 4, 32), # 32 cohorts, 4 bits each, 2 bits set in each
|
||||
'8x32x4': (8, 4, 32), # 32 cohorts, 8 bits each, 2 bits set in each
|
||||
'16x32x4': (16, 4, 32), # 32 cohorts, 16 bits each, 2 bits set in each
|
||||
'32x32x4': (32, 4, 32), # 32 cohorts, 32 bits each, 2 bits set in each
|
||||
'64x32x4': (64, 4, 32), # 32 cohorts, 64 bits each, 2 bits set in each
|
||||
'128x32x4': (128, 4, 32), # 32 cohorts, 128 bits each, 2 bits set in each
|
||||
'256x32x4': (256, 4, 32), # 32 cohorts, 256 bits each, 2 bits set in each
|
||||
'4x32x4': (4, 4, 32), # 32 cohorts, 4 bits each, 4 bits set in each
|
||||
'8x32x4': (8, 4, 32), # 32 cohorts, 8 bits each, 4 bits set in each
|
||||
'16x32x4': (16, 4, 32), # 32 cohorts, 16 bits each, 4 bits set in each
|
||||
'32x32x4': (32, 4, 32), # 32 cohorts, 32 bits each, 4 bits set in each
|
||||
'64x32x4': (64, 4, 32), # 32 cohorts, 64 bits each, 4 bits set in each
|
||||
'128x32x4': (128, 4, 32), # 32 cohorts, 128 bits each, 4 bits set in each
|
||||
'256x32x4': (256, 4, 32), # 32 cohorts, 256 bits each, 4 bits set in each
|
||||
#
|
||||
'4x128x4': (4, 4, 128), # 128 cohorts, 4 bits each, 2 bits set in each
|
||||
'8x128x4': (8, 4, 128), # 128 cohorts, 8 bits each, 2 bits set in each
|
||||
'16x128x4': (16, 4, 128), # 128 cohorts, 16 bits each, 2 bits set in each
|
||||
'32x128x4': (32, 4, 128), # 128 cohorts, 32 bits each, 2 bits set in each
|
||||
'64x128x4': (64, 4, 128), # 128 cohorts, 64 bits each, 2 bits set in each
|
||||
'128x128x4': (128, 4, 128), # 128 cohorts, 128 bits each, 2 bits set in each
|
||||
'256x128x4': (256, 4, 128), # 128 cohorts, 256 bits each, 2 bits set in each
|
||||
'4x128x4': (4, 4, 128), # 128 cohorts, 4 bits each, 4 bits set in each
|
||||
'8x128x4': (8, 4, 128), # 128 cohorts, 8 bits each, 4 bits set in each
|
||||
'16x128x4': (16, 4, 128), # 128 cohorts, 16 bits each, 4 bits set in each
|
||||
'32x128x4': (32, 4, 128), # 128 cohorts, 32 bits each, 4 bits set in each
|
||||
'64x128x4': (64, 4, 128), # 128 cohorts, 64 bits each, 4 bits set in each
|
||||
'128x128x4': (128, 4, 128), # 128 cohorts, 128 bits each, 4 bits set in each
|
||||
'256x128x4': (256, 4, 128), # 128 cohorts, 256 bits each, 4 bits set in each
|
||||
|
||||
# params for testing the number of hash functions
|
||||
'8x128x1' : (8, 1, 128),
|
||||
'8x128x4' : (8, 4, 128),
|
||||
'8x128x8' : (8, 8, 128),
|
||||
'8x128x16' : (8, 16, 128),
|
||||
|
||||
'256x128x1':(256, 1, 128),
|
||||
'256x128x4':(256, 4, 128),
|
||||
'256x128x8':(256, 8, 128),
|
||||
'256x128x16':(256, 16, 128),
|
||||
|
||||
}
|
||||
|
||||
# 'p, q, f' as in params file.
|
||||
|
@ -160,6 +172,8 @@ PRIVACY_PARAMS = {
|
|||
#
|
||||
'params12': (0.5, 0.75, 0.75),
|
||||
'params13': (0.25, 0.75, 0.5),
|
||||
'params14': (0.35, 0.65, 0.0),
|
||||
|
||||
}
|
||||
|
||||
# For deriving candidates from true inputs.
|
||||
|
@ -232,6 +246,13 @@ TEST_CONFIGS = [
|
|||
('sim_hash1_4', '8x128x8', 'params6', .0, 'sharp'),
|
||||
('sim_hash1_5', '8x128x16', 'params7', .0, 'sharp'),
|
||||
|
||||
|
||||
('sim_hash2_1', '256x128x1', 'params3', .0, 'sharp'),
|
||||
('sim_hash2_2', '256x128x2', 'params4', .0, 'sharp'),
|
||||
('sim_hash2_3', '256x128x4', 'params5', .0, 'sharp'),
|
||||
('sim_hash2_4', '256x128x8', 'params6', .0, 'sharp'),
|
||||
('sim_hash2_5', '256x128x16', 'params7', .0, 'sharp'),
|
||||
|
||||
# configuration for testing the number of cohorts
|
||||
('sim_cohort1_1', '8x2x2', 'params3', .0, 'sharp'),
|
||||
('sim_cohort1_2', '8x4x2', 'params3', .0, 'sharp'),
|
||||
|
@ -242,6 +263,14 @@ TEST_CONFIGS = [
|
|||
('sim_cohort1_7', '8x128x2', 'params3', .0, 'sharp'),
|
||||
('sim_cohort1_8', '8x256x2', 'params3', .0, 'sharp'),
|
||||
|
||||
('sim_cohort2_1', '8x2x2', 'params3', .0, 'sharp'),
|
||||
('sim_cohort2_2', '8x4x2', 'params3', .0, 'sharp'),
|
||||
('sim_cohort2_3', '8x8x2', 'params3', .0, 'sharp'),
|
||||
('sim_cohort2_4', '8x16x2', 'params3', .0, 'sharp'),
|
||||
('sim_cohort2_5', '8x32x2', 'params3', .0, 'sharp'),
|
||||
('sim_cohort2_6', '8x64x2', 'params3', .0, 'sharp'),
|
||||
('sim_cohort2_7', '8x128x2', 'params3', .0, 'sharp'),
|
||||
('sim_cohort2_8', '8x256x2', 'params3', .0, 'sharp'),
|
||||
# configuration for testing different probabilities p, q, f
|
||||
('sim_probs1_1', '8x128x2', 'params3', .0, 'sharp'),
|
||||
('sim_probs1_2', '8x128x2', 'params8', .0, 'sharp'),
|
||||
|
@ -253,6 +282,9 @@ TEST_CONFIGS = [
|
|||
('sim_case_scenario_1', '16x128x2', 'params12', .0, 'sharp'),
|
||||
('sim_case_scenario_2', '16x128x2', 'params13', .0, 'sharp'),
|
||||
|
||||
#
|
||||
('sim_final', '10000x200x2', 'params14', .0, 'sharp'),
|
||||
|
||||
]
|
||||
|
||||
#
|
||||
|
|
Загрузка…
Ссылка в новой задаче