From 669c53a8463b3d57810932273cf7ae948d4a76ea Mon Sep 17 00:00:00 2001 From: Rodrigo Racanicci Date: Thu, 16 Mar 2023 13:41:37 -0300 Subject: [PATCH] updating rust dependencies and updating the code accordingly --- Cargo.lock | 221 +++++- packages/cli/src/main.rs | 2 +- packages/core/Cargo.toml | 4 +- packages/core/src/dp/dp_parameters.rs | 22 +- packages/core/tests/dp/noise_aggregator.rs | 2 +- packages/lib-pacsynth/Cargo.toml | 2 +- ...dp_aggregate_seeded_detailed_example.ipynb | 660 +++++++++--------- .../dp_aggregate_seeded_short_example.ipynb | 154 ++-- .../src/aggregate_seeded/dp/synthesizer.rs | 2 +- packages/lib-python/Cargo.toml | 4 +- packages/lib-python/src/data_processor.rs | 2 +- .../lib-wasm/src/utils/js/ts_definitions.rs | 4 +- packages/python-pipeline/src/aggregator.py | 8 +- packages/python-pipeline/src/evaluator.py | 6 +- packages/python-pipeline/src/generator.py | 4 +- .../DataSynthesis/DataSynthesis.hooks.ts | 6 +- 16 files changed, 633 insertions(+), 470 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8e25650..060a0a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,11 +4,11 @@ version = 3 [[package]] name = "ahash" -version = "0.7.6" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" dependencies = [ - "getrandom", + "cfg-if", "once_cell", "version_check", ] @@ -46,7 +46,7 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", "winapi", ] @@ -93,6 +93,12 @@ version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2f5715e491b5a1598fc2bef5a606847b5dc1d48ea625bd3c02c00de8285591da" +[[package]] +name = "cc" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" + [[package]] name = "cfg-if" version = "1.0.0" @@ -154,7 +160,7 @@ dependencies = [ "cfg-if", "crossbeam-utils", "lazy_static", - "memoffset", + "memoffset 0.6.5", "scopeguard", ] @@ -215,6 +221,40 @@ dependencies = [ "termcolor", ] +[[package]] +name = "env_logger" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85cdab6a89accf66733ad5a1693a4dcced6aeff64602b634530dd73c1f3ee9f0" +dependencies = [ + "humantime", + "is-terminal", + "log", + "regex", + "termcolor", +] + +[[package]] +name = "errno" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +dependencies = [ + "errno-dragonfly", + "libc", + "winapi", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "fnv" version = "1.0.7" @@ -236,9 +276,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.12.3" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" dependencies = [ "ahash", ] @@ -261,6 +301,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" + [[package]] name = "humantime" version = "2.1.0" @@ -286,6 +332,28 @@ dependencies = [ "web-sys", ] +[[package]] +name = "io-lifetimes" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfa919a82ea574332e2de6e74b4c36e74d41982b335080fa59d4ef31be20fdf3" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "is-terminal" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b6b32576413a8e69b90e952e4a026476040d81017b80445deda5f2d3921857" +dependencies = [ + "hermit-abi 0.3.1", + "io-lifetimes", + "rustix", + "windows-sys", +] + [[package]] name = "itertools" version = "0.10.1" @@ -318,9 +386,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.108" +version = "0.2.140" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8521a1b57e76b1ec69af7599e75e38e7b7fad6610f037db8c79b127201b5d119" +checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" [[package]] name = "libm" @@ -328,6 +396,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7d73b3f436185384286bd8098d17ec07c9a7d2388a6599f824d8502b529702a" +[[package]] +name = "linux-raw-sys" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" + [[package]] name = "lock_api" version = "0.4.5" @@ -348,9 +422,9 @@ dependencies = [ [[package]] name = "lru" -version = "0.8.1" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6e8aaa3f231bb4bd57b84b2d5dc3ae7f350265df8aa96492e0bc394a1571909" +checksum = "03f1160296536f10c833a82dca22267d5486734230d47bf00bf435885814ba1e" dependencies = [ "hashbrown", ] @@ -379,6 +453,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "memoffset" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" +dependencies = [ + "autocfg", +] + [[package]] name = "nalgebra" version = "0.29.0" @@ -454,15 +537,15 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", ] [[package]] name = "once_cell" -version = "1.8.0" +version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" +checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" [[package]] name = "pac-synth" @@ -547,14 +630,14 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.17.2" +version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "201b6887e5576bf2f945fe65172c1fcbf3fcf285b23e4d71eb171d9736e38d32" +checksum = "06a3d8e8a46ab2738109347433cb7b96dffda2e4a218b03ef27090238886b147" dependencies = [ "cfg-if", "indoc", "libc", - "memoffset", + "memoffset 0.8.0", "parking_lot", "pyo3-build-config", "pyo3-ffi", @@ -564,9 +647,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.17.2" +version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf0708c9ed01692635cbf056e286008e5a2927ab1a5e48cdd3aeb1ba5a6fef47" +checksum = "75439f995d07ddfad42b192dfcf3bc66a7ecfd8b4a1f5f6f046aa5c2c5d7677d" dependencies = [ "once_cell", "target-lexicon", @@ -574,9 +657,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.17.2" +version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90352dea4f486932b72ddf776264d293f85b79a1d214de1d023927b41461132d" +checksum = "839526a5c07a17ff44823679b68add4a58004de00512a95b6c1c98a6dcac0ee5" dependencies = [ "libc", "pyo3-build-config", @@ -584,9 +667,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.17.2" +version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb24b804a2d9e88bfcc480a5a6dd76f006c1e3edaf064e8250423336e2cd79d" +checksum = "bd44cf207476c6a9760c4653559be4f206efafb924d3e4cbf2721475fc0d6cc5" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -596,9 +679,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.17.2" +version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f22bb49f6a7348c253d7ac67a6875f2dc65f36c2ae64a82c381d528972bea6d6" +checksum = "dc1f43d8e30460f36350d18631ccf85ded64c059829208fe680904c65bcd0a4c" dependencies = [ "proc-macro2", "quote", @@ -736,6 +819,20 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "0.36.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd5c6ff11fecd55b40746d1995a02f2eb375bf8c00d192d521ee09f42bef37bc" +dependencies = [ + "bitflags", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys", + "windows-sys", +] + [[package]] name = "ryu" version = "1.0.5" @@ -768,7 +865,7 @@ name = "sds-cli" version = "1.8.6" dependencies = [ "csv", - "env_logger", + "env_logger 0.9.0", "log", "sds-core", "statrs", @@ -799,7 +896,7 @@ name = "sds-pyo3" version = "1.8.6" dependencies = [ "csv", - "env_logger", + "env_logger 0.10.0", "log", "pyo3", "sds-core", @@ -1058,9 +1155,9 @@ checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" [[package]] name = "version_check" -version = "0.9.3" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "wasi" @@ -1210,3 +1307,69 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" diff --git a/packages/cli/src/main.rs b/packages/cli/src/main.rs index 2b568f4..4724098 100644 --- a/packages/cli/src/main.rs +++ b/packages/cli/src/main.rs @@ -423,9 +423,9 @@ fn main() { reporting_length, &DpParameters::new( noise_epsilon.unwrap(), - noise_delta, sensitivities_percentile.unwrap(), sensitivities_epsilon_proportion.unwrap(), + noise_delta, sigma_proportions, number_of_records_epsilon_proportion, ), diff --git a/packages/core/Cargo.toml b/packages/core/Cargo.toml index eee86cc..b8b77c2 100644 --- a/packages/core/Cargo.toml +++ b/packages/core/Cargo.toml @@ -13,12 +13,12 @@ crate-type = ["rlib"] rand = { version = "0.8" } fnv = { version = "1.0" } itertools = { version = "0.10" } -lru = { version = "0.8" } +lru = { version = "0.10" } getrandom = { version = "0.2", features = ["js"] } log = { version = "0.4", features = ["std"] } csv = { version = "1.1" } instant = { version = "0.1", features = [ "stdweb", "wasm-bindgen" ] } -pyo3 = { version = "0.17", features = ["extension-module"], optional = true } +pyo3 = { version = "0.18", features = ["extension-module"], optional = true } rayon = { version = "1.5", optional = true } serde = { version = "1.0", features = [ "derive", "rc" ] } serde_json = { version = "1.0" } diff --git a/packages/core/src/dp/dp_parameters.rs b/packages/core/src/dp/dp_parameters.rs index f5706f8..e1fc2ff 100644 --- a/packages/core/src/dp/dp_parameters.rs +++ b/packages/core/src/dp/dp_parameters.rs @@ -10,14 +10,14 @@ pub struct DpParameters { /// Overall privacy budget used between /// percentile filtering and noisy generation by combination length pub epsilon: f64, - /// Delta value used for noisy generation by combination length, if None will be set - /// in runtime to `1 / (ln(protected_number_of_records) * protected_number_of_records)` - pub delta: Option, /// Percentage used to calculate the percentile that filters sensitivity pub percentile_percentage: usize, /// Maximum proportion to consume of the total privacy budget (0.1 means 10%) /// during the sensitivity filter stage pub percentile_epsilon_proportion: f64, + /// Delta value used for noisy generation by combination length, if None will be set + /// in runtime to `1 / (ln(protected_number_of_records) * protected_number_of_records)` + pub delta: Option, /// `epsilon` and `percentile_epsilon_proportion` will be used to infer the /// sigma value by combination length. This parameters /// controls how the budget being split across combination lengths @@ -38,11 +38,11 @@ impl DpParameters { /// # Arguments /// * `epsilon` - Overall privacy budget used between /// percentile filtering and noisy generation by combination length - /// * `delta` - Delta value used for noisy generation by combination length, if None will be set - /// in runtime to `1 / (ln(protected_number_of_records) * protected_number_of_records)` /// * `percentile_percentage` - Percentage used to calculate the percentile that filters sensitivity /// * `percentile_epsilon_proportion` - Maximum proportion to consume of the total privacy budget (0.1 means 10%) /// during the sensitivity filter stage + /// * `delta` - Delta value used for noisy generation by combination length, if None will be set + /// in runtime to `1 / (ln(protected_number_of_records) * protected_number_of_records)` /// * `sigma_proportions` - `epsilon` and `percentile_epsilon_proportion` will be used to infer the /// sigma value by combination length. This parameters /// controls how the budget being split across combination lengths @@ -52,17 +52,17 @@ impl DpParameters { /// in the aggregated data (if None, no noise is added) pub fn new( epsilon: f64, - delta: Option, percentile_percentage: usize, percentile_epsilon_proportion: f64, + delta: Option, sigma_proportions: Option>, number_of_records_epsilon_proportion: Option, ) -> Self { DpParameters { epsilon, - delta, percentile_percentage, percentile_epsilon_proportion, + delta, sigma_proportions, number_of_records_epsilon_proportion, } @@ -74,11 +74,11 @@ impl DpParameters { /// # Arguments /// * `epsilon` - Overall privacy budget used between /// percentile filtering and noisy generation by combination length - /// * `delta` - Delta value used for noisy generation by combination length, if None will be set - /// in runtime to `1 / (ln(protected_number_of_records) * protected_number_of_records)` /// * `percentile_percentage` - Percentage used to calculate the percentile that filters sensitivity /// * `percentile_epsilon_proportion` - Maximum proportion to consume of the total privacy budget (0.1 means 10%) /// during the sensitivity filter stage + /// * `delta` - Delta value used for noisy generation by combination length, if None will be set + /// in runtime to `1 / (ln(protected_number_of_records) * protected_number_of_records)` /// * `sigma_proportions` - `epsilon` and `percentile_epsilon_proportion` will be used to infer the /// sigma value by combination length. This parameters /// controls how the budget being split across combination lengths @@ -88,17 +88,17 @@ impl DpParameters { /// in the aggregated data (if None, no noise is added) pub fn new( epsilon: f64, - delta: Option, percentile_percentage: usize, percentile_epsilon_proportion: f64, + delta: Option, sigma_proportions: Option>, number_of_records_epsilon_proportion: Option, ) -> Self { DpParameters { epsilon, - delta, percentile_percentage, percentile_epsilon_proportion, + delta, sigma_proportions, number_of_records_epsilon_proportion, } diff --git a/packages/core/tests/dp/noise_aggregator.rs b/packages/core/tests/dp/noise_aggregator.rs index b3b658e..76f9283 100644 --- a/packages/core/tests/dp/noise_aggregator.rs +++ b/packages/core/tests/dp/noise_aggregator.rs @@ -23,7 +23,7 @@ fn get_noise_aggregator() -> NoiseAggregator { 0, ), 3, - &DpParameters::new(1.0, Some(0.001), 99, 0.1, None, None), + &DpParameters::new(1.0, 99, 0.1, Some(0.001), None, None), NoisyCountThreshold::Fixed(InputValueByLen::default()), ) } diff --git a/packages/lib-pacsynth/Cargo.toml b/packages/lib-pacsynth/Cargo.toml index 72a2933..b5bc521 100644 --- a/packages/lib-pacsynth/Cargo.toml +++ b/packages/lib-pacsynth/Cargo.toml @@ -12,7 +12,7 @@ crate-type = ["cdylib"] [dependencies] log = { version = "0.4", features = ["std"] } -pyo3 = { version = "0.17", features = ["extension-module", "abi3-py37"] } +pyo3 = { version = "0.18", features = ["extension-module", "abi3-py37"] } sds-core = { path = "../core", features = ["pyo3", "rayon"] } serde = { version = "1.0", features = [ "derive", "rc" ] } serde_json = { version = "1.0" } \ No newline at end of file diff --git a/packages/lib-pacsynth/samples/dp_aggregate_seeded_detailed_example.ipynb b/packages/lib-pacsynth/samples/dp_aggregate_seeded_detailed_example.ipynb index fe486af..84c52c8 100644 --- a/packages/lib-pacsynth/samples/dp_aggregate_seeded_detailed_example.ipynb +++ b/packages/lib-pacsynth/samples/dp_aggregate_seeded_detailed_example.ipynb @@ -140,68 +140,68 @@ " \n", " \n", " 0\n", - " 1\n", - " 1\n", + " 2\n", + " \n", " 4\n", - " 1\n", - " 1\n", " 0\n", " 0\n", " 1\n", " 1\n", " 0\n", + " 1\n", + " 1\n", " \n", " \n", " 1\n", - " 1\n", - " 1\n", " \n", - " 1\n", + " 3\n", + " 2\n", " 1\n", " 0\n", " 1\n", " 1\n", " 1\n", " 0\n", + " 0\n", " \n", " \n", " 2\n", - " 2\n", - " \n", - " 4\n", - " 1\n", " 1\n", + " 3\n", + " 5\n", " 1\n", " 0\n", + " 0\n", " 1\n", + " 0\n", " 1\n", " 0\n", " \n", " \n", " 3\n", + " 2\n", + " 2\n", " 1\n", - " \n", - " 4\n", - " 0\n", - " 0\n", - " 0\n", " 0\n", " 0\n", " 1\n", + " 0\n", " 1\n", + " 1\n", + " 0\n", " \n", " \n", " 4\n", " 1\n", " 1\n", - " \n", - " 1\n", - " 0\n", + " 5\n", " 1\n", " 1\n", " 0\n", " 1\n", " 1\n", + " 0\n", + " 0\n", " \n", " \n", " ...\n", @@ -218,68 +218,68 @@ " \n", " \n", " 5995\n", - " 1\n", + " 2\n", " 4\n", - " 9\n", + " 8\n", + " 0\n", + " 0\n", + " 0\n", " 0\n", " 1\n", - " 1\n", - " 1\n", - " 0\n", " 0\n", " 0\n", " \n", " \n", " 5996\n", " \n", - " 6\n", - " 9\n", + " \n", + " \n", + " 1\n", " 1\n", " 0\n", - " 1\n", - " 1\n", + " 0\n", " 0\n", " 1\n", " 0\n", " \n", " \n", " 5997\n", - " 1\n", - " 4\n", + " 2\n", " 6\n", + " 9\n", + " 1\n", + " 1\n", " 0\n", - " 0\n", + " 1\n", " 0\n", " 1\n", " 1\n", - " 1\n", - " 0\n", " \n", " \n", " 5998\n", " \n", - " \n", - " 10\n", - " 1\n", - " 0\n", - " 1\n", + " 4\n", + " 6\n", " 1\n", " 1\n", " 0\n", " 0\n", + " 1\n", + " 0\n", + " 1\n", " \n", " \n", " 5999\n", " 2\n", " \n", - " 10\n", + " 7\n", + " 0\n", + " 0\n", " 0\n", " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", + " 0\n", + " 0\n", + " 0\n", " \n", " \n", "\n", @@ -287,18 +287,18 @@ "" ], "text/plain": [ - " H1 H2 H3 H4 H5 H6 H7 H8 H9 H10\n", - "0 1 1 4 1 1 0 0 1 1 0\n", - "1 1 1 1 1 0 1 1 1 0\n", - "2 2 4 1 1 1 0 1 1 0\n", - "3 1 4 0 0 0 0 0 1 1\n", - "4 1 1 1 0 1 1 0 1 1\n", - "... .. .. .. .. .. .. .. .. .. ..\n", - "5995 1 4 9 0 1 1 1 0 0 0\n", - "5996 6 9 1 0 1 1 0 1 0\n", - "5997 1 4 6 0 0 0 1 1 1 0\n", - "5998 10 1 0 1 1 1 0 0\n", - "5999 2 10 0 1 1 1 1 1 1\n", + " H1 H2 H3 H4 H5 H6 H7 H8 H9 H10\n", + "0 2 4 0 0 1 1 0 1 1\n", + "1 3 2 1 0 1 1 1 0 0\n", + "2 1 3 5 1 0 0 1 0 1 0\n", + "3 2 2 1 0 0 1 0 1 1 0\n", + "4 1 1 5 1 1 0 1 1 0 0\n", + "... .. .. .. .. .. .. .. .. .. ..\n", + "5995 2 4 8 0 0 0 0 1 0 0\n", + "5996 1 1 0 0 0 1 0\n", + "5997 2 6 9 1 1 0 1 0 1 1\n", + "5998 4 6 1 1 0 0 1 0 1\n", + "5999 2 7 0 0 0 1 0 0 0\n", "\n", "[6000 rows x 10 columns]" ] @@ -555,68 +555,68 @@ " \n", " \n", " 0\n", - " 1\n", - " 1\n", + " 2\n", + " \n", " 4\n", - " 1\n", - " 1\n", " \n", " \n", " 1\n", " 1\n", " \n", + " 1\n", + " 1\n", " \n", " \n", " 1\n", - " 1\n", - " 1\n", " \n", - " 1\n", + " 3\n", + " 2\n", " 1\n", " \n", " 1\n", " 1\n", " 1\n", " \n", + " \n", " \n", " \n", " 2\n", - " 2\n", - " \n", - " 4\n", - " 1\n", " 1\n", + " 3\n", + " 5\n", " 1\n", " \n", + " \n", " 1\n", + " \n", " 1\n", " \n", " \n", " \n", " 3\n", + " 2\n", + " 2\n", " 1\n", " \n", - " 4\n", - " \n", - " \n", - " \n", - " \n", " \n", " 1\n", + " \n", " 1\n", + " 1\n", + " \n", " \n", " \n", " 4\n", " 1\n", " 1\n", - " \n", - " 1\n", - " \n", + " 5\n", " 1\n", " 1\n", " \n", " 1\n", " 1\n", + " \n", + " \n", " \n", " \n", " ...\n", @@ -633,68 +633,68 @@ " \n", " \n", " 5995\n", - " 1\n", + " 2\n", " 4\n", - " 9\n", + " 8\n", + " \n", + " \n", + " \n", " \n", " 1\n", - " 1\n", - " 1\n", - " \n", " \n", " \n", " \n", " \n", " 5996\n", " \n", - " 6\n", - " 9\n", - " 1\n", + " \n", " \n", " 1\n", " 1\n", " \n", + " \n", + " \n", " 1\n", " \n", " \n", " \n", " 5997\n", - " 1\n", - " 4\n", + " 2\n", " 6\n", + " 9\n", + " 1\n", + " 1\n", " \n", - " \n", + " 1\n", " \n", " 1\n", " 1\n", - " 1\n", - " \n", " \n", " \n", " 5998\n", " \n", - " \n", - " 10\n", - " 1\n", - " \n", - " 1\n", + " 4\n", + " 6\n", " 1\n", " 1\n", " \n", " \n", + " 1\n", + " \n", + " 1\n", " \n", " \n", " 5999\n", " 2\n", " \n", - " 10\n", + " 7\n", + " \n", + " \n", " \n", " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", - " 1\n", + " \n", + " \n", + " \n", " \n", " \n", "\n", @@ -702,18 +702,18 @@ "" ], "text/plain": [ - " H1 H2 H3 H4 H5 H6 H7 H8 H9 H10\n", - "0 1 1 4 1 1 1 1 \n", - "1 1 1 1 1 1 1 1 \n", - "2 2 4 1 1 1 1 1 \n", - "3 1 4 1 1\n", - "4 1 1 1 1 1 1 1\n", - "... .. .. .. .. .. .. .. .. .. ..\n", - "5995 1 4 9 1 1 1 \n", - "5996 6 9 1 1 1 1 \n", - "5997 1 4 6 1 1 1 \n", - "5998 10 1 1 1 1 \n", - "5999 2 10 1 1 1 1 1 1\n", + " H1 H2 H3 H4 H5 H6 H7 H8 H9 H10\n", + "0 2 4 1 1 1 1\n", + "1 3 2 1 1 1 1 \n", + "2 1 3 5 1 1 1 \n", + "3 2 2 1 1 1 1 \n", + "4 1 1 5 1 1 1 1 \n", + "... .. .. .. .. .. .. .. .. .. ..\n", + "5995 2 4 8 1 \n", + "5996 1 1 1 \n", + "5997 2 6 9 1 1 1 1 1\n", + "5998 4 6 1 1 1 1\n", + "5999 2 7 1 \n", "\n", "[6000 rows x 10 columns]" ] @@ -885,7 +885,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Number of records protected with DP: 6004\n" + "Number of records protected with DP: 6024\n" ] }, { @@ -924,9 +924,9 @@ " \n", " \n", " 0\n", - " 1\n", - " 1\n", - " 3\n", + " \n", + " 2\n", + " 5\n", " 1\n", " 1\n", " 1\n", @@ -937,9 +937,9 @@ " \n", " \n", " 1\n", - " 1\n", - " 1\n", - " 3\n", + " \n", + " 2\n", + " 5\n", " 1\n", " 1\n", " 1\n", @@ -950,9 +950,9 @@ " \n", " \n", " 2\n", - " 1\n", - " 1\n", - " 3\n", + " \n", + " 2\n", + " 5\n", " 1\n", " 1\n", " 1\n", @@ -963,9 +963,9 @@ " \n", " \n", " 3\n", - " 1\n", - " 1\n", - " 3\n", + " \n", + " 2\n", + " 5\n", " 1\n", " 1\n", " 1\n", @@ -976,9 +976,9 @@ " \n", " \n", " 4\n", - " 1\n", - " 6\n", - " 10\n", + " \n", + " 2\n", + " 5\n", " 1\n", " 1\n", " 1\n", @@ -1001,12 +1001,12 @@ " ...\n", " \n", " \n", - " 5999\n", - " \n", - " 6\n", - " 3\n", + " 6019\n", " \n", " 1\n", + " 8\n", + " 1\n", + " \n", " \n", " \n", " \n", @@ -1014,23 +1014,23 @@ " \n", " \n", " \n", - " 6000\n", + " 6020\n", " \n", - " 6\n", + " 5\n", " 5\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", " 1\n", + " \n", + " \n", + " \n", " \n", " \n", - " 6001\n", + " 6021\n", + " 1\n", " 1\n", " 3\n", - " 6\n", " \n", " \n", " \n", @@ -1040,10 +1040,10 @@ " \n", " \n", " \n", - " 6002\n", + " 6022\n", + " 1\n", " 1\n", " 3\n", - " 7\n", " \n", " \n", " \n", @@ -1053,10 +1053,10 @@ " \n", " \n", " \n", - " 6003\n", + " 6023\n", + " 1\n", " 1\n", " 3\n", - " 7\n", " \n", " \n", " \n", @@ -1067,24 +1067,24 @@ " \n", " \n", "\n", - "

6004 rows × 10 columns

\n", + "

6024 rows × 10 columns

\n", "" ], "text/plain": [ - " H1 H2 H3 H4 H5 H6 H7 H8 H9 H10\n", - "0 1 1 3 1 1 1 1 1 1 1\n", - "1 1 1 3 1 1 1 1 1 1 1\n", - "2 1 1 3 1 1 1 1 1 1 1\n", - "3 1 1 3 1 1 1 1 1 1 1\n", - "4 1 6 10 1 1 1 1 1 1 1\n", - "... .. .. .. .. .. .. .. .. .. ..\n", - "5999 6 3 1 \n", - "6000 6 5 1\n", - "6001 1 3 6 \n", - "6002 1 3 7 \n", - "6003 1 3 7 \n", + " H1 H2 H3 H4 H5 H6 H7 H8 H9 H10\n", + "0 2 5 1 1 1 1 1 1 1\n", + "1 2 5 1 1 1 1 1 1 1\n", + "2 2 5 1 1 1 1 1 1 1\n", + "3 2 5 1 1 1 1 1 1 1\n", + "4 2 5 1 1 1 1 1 1 1\n", + "... .. .. .. .. .. .. .. .. .. ..\n", + "6019 1 8 1 \n", + "6020 5 5 1 \n", + "6021 1 1 3 \n", + "6022 1 1 3 \n", + "6023 1 1 3 \n", "\n", - "[6004 rows x 10 columns]" + "[6024 rows x 10 columns]" ] }, "execution_count": 9, @@ -1125,26 +1125,26 @@ { "data": { "text/plain": [ - "[('H2:6;H3:10;H5:1;H7:1', 68),\n", - " ('H2:5;H3:9;H6:1;H8:1', 13),\n", - " ('H1:1;H3:7;H6:1', 97),\n", - " ('H1:1;H2:1;H3:1;H4:1', 24),\n", - " ('H10:1;H2:1;H3:3;H4:1', 31),\n", - " ('H2:4;H3:8;H5:1;H9:1', 31),\n", - " ('H10:1;H3:5;H4:1;H6:1', 51),\n", - " ('H10:1;H1:1;H3:7;H8:1', 23),\n", - " ('H1:2;H2:6;H3:7;H4:1', 19),\n", - " ('H2:4;H3:10;H4:1;H9:1', 21),\n", - " ('H3:1;H4:1;H7:1;H8:1', 74),\n", - " ('H1:2;H2:2;H3:5;H9:1', 17),\n", - " ('H1:1;H2:2;H7:1;H8:1', 43),\n", - " ('H2:4;H3:7;H4:1;H6:1', 21),\n", - " ('H1:2;H2:4;H3:5', 5),\n", - " ('H10:1;H2:6;H3:8;H8:1', 22),\n", - " ('H10:1;H1:1;H6:1;H8:1', 274),\n", - " ('H1:1;H7:1;H8:1', 473),\n", - " ('H1:1;H3:7;H5:1;H9:1', 39),\n", - " ('H3:1;H6:1;H7:1;H9:1', 44)]" + "[('H2:6;H3:10;H5:1;H7:1', 47),\n", + " ('H1:1;H3:6;H4:1;H8:1', 12),\n", + " ('H1:1;H3:3;H6:1;H8:1', 50),\n", + " ('H2:5;H3:9;H6:1;H8:1', 21),\n", + " ('H1:1;H3:7;H6:1', 87),\n", + " ('H1:2;H2:5;H3:9;H9:1', 34),\n", + " ('H2:2;H3:9', 21),\n", + " ('H2:6;H3:9;H8:1', 73),\n", + " ('H10:1;H3:6;H8:1', 133),\n", + " ('H1:2;H2:5;H4:1', 141),\n", + " ('H1:1;H2:6;H3:2', 6),\n", + " ('H2:4;H3:8;H5:1;H9:1', 15),\n", + " ('H10:1;H2:1;H3:3;H4:1', 1),\n", + " ('H10:1;H3:5;H4:1;H6:1', 89),\n", + " ('H10:1;H1:1;H3:7;H8:1', 51),\n", + " ('H1:2;H2:4;H3:9;H9:1', 8),\n", + " ('H1:2;H2:6;H3:7;H4:1', 1),\n", + " ('H10:1;H1:2;H5:1;H6:1', 240),\n", + " ('H10:1;H3:7;H5:1', 74),\n", + " ('H2:4;H3:10;H4:1;H9:1', 52)]" ] }, "execution_count": 10, @@ -1249,37 +1249,37 @@ " \n", " 0\n", " 1\n", - " 1381.04 +/- 13.08\n", + " 1391.76 +/- 14.72\n", " 0.00 %\n", " 0.00 %\n", " \n", " \n", " 1\n", " 2\n", - " 425.15 +/- 13.38\n", + " 432.25 +/- 18.85\n", " 0.00 %\n", - " 8.73 %\n", + " 5.43 %\n", " \n", " \n", " 2\n", " 3\n", - " 148.52 +/- 12.62\n", - " 0.11 %\n", - " 8.30 %\n", + " 152.53 +/- 17.02\n", + " 0.22 %\n", + " 5.14 %\n", " \n", " \n", " 3\n", " 4\n", - " 55.79 +/- 12.89\n", - " 4.61 %\n", - " 3.35 %\n", + " 57.96 +/- 15.60\n", + " 7.42 %\n", + " 2.26 %\n", " \n", " \n", " 4\n", " All\n", - " 110.90 +/- 12.85\n", - " 3.15 %\n", - " 5.03 %\n", + " 113.90 +/- 16.18\n", + " 5.07 %\n", + " 3.24 %\n", " \n", " \n", "\n", @@ -1287,11 +1287,11 @@ ], "text/plain": [ " Length Count +/- Error Suppressed % Fabricated %\n", - "0 1 1381.04 +/- 13.08 0.00 % 0.00 %\n", - "1 2 425.15 +/- 13.38 0.00 % 8.73 %\n", - "2 3 148.52 +/- 12.62 0.11 % 8.30 %\n", - "3 4 55.79 +/- 12.89 4.61 % 3.35 %\n", - "4 All 110.90 +/- 12.85 3.15 % 5.03 %" + "0 1 1391.76 +/- 14.72 0.00 % 0.00 %\n", + "1 2 432.25 +/- 18.85 0.00 % 5.43 %\n", + "2 3 152.53 +/- 17.02 0.22 % 5.14 %\n", + "3 4 57.96 +/- 15.60 7.42 % 2.26 %\n", + "4 All 113.90 +/- 16.18 5.07 % 3.24 %" ] }, "execution_count": 12, @@ -1346,37 +1346,37 @@ " \n", " 0\n", " 1\n", - " 1381.04 +/- 69.92\n", + " 1391.76 +/- 75.48\n", " 0.00 %\n", " 0.00 %\n", " \n", " \n", " 1\n", " 2\n", - " 425.15 +/- 55.38\n", + " 432.25 +/- 62.37\n", " 0.00 %\n", - " 7.93 %\n", + " 5.43 %\n", " \n", " \n", " 2\n", " 3\n", - " 148.52 +/- 32.19\n", - " 0.11 %\n", - " 7.17 %\n", + " 152.53 +/- 37.86\n", + " 0.33 %\n", + " 4.74 %\n", " \n", " \n", " 3\n", " 4\n", - " 55.79 +/- 15.51\n", - " 4.61 %\n", - " 2.65 %\n", + " 57.96 +/- 19.45\n", + " 7.67 %\n", + " 1.87 %\n", " \n", " \n", " 4\n", " All\n", - " 110.90 +/- 22.77\n", - " 3.15 %\n", - " 4.21 %\n", + " 113.90 +/- 27.53\n", + " 5.27 %\n", + " 2.88 %\n", " \n", " \n", "\n", @@ -1384,11 +1384,11 @@ ], "text/plain": [ " Length Count +/- Error Suppressed % Fabricated %\n", - "0 1 1381.04 +/- 69.92 0.00 % 0.00 %\n", - "1 2 425.15 +/- 55.38 0.00 % 7.93 %\n", - "2 3 148.52 +/- 32.19 0.11 % 7.17 %\n", - "3 4 55.79 +/- 15.51 4.61 % 2.65 %\n", - "4 All 110.90 +/- 22.77 3.15 % 4.21 %" + "0 1 1391.76 +/- 75.48 0.00 % 0.00 %\n", + "1 2 432.25 +/- 62.37 0.00 % 5.43 %\n", + "2 3 152.53 +/- 37.86 0.33 % 4.74 %\n", + "3 4 57.96 +/- 19.45 7.67 % 1.87 %\n", + "4 All 113.90 +/- 27.53 5.27 % 2.88 %" ] }, "execution_count": 13, @@ -1451,7 +1451,7 @@ " \n", " 0\n", " \n", - " 1\n", + " 6\n", " \n", " 1\n", " 1\n", @@ -1464,7 +1464,7 @@ " \n", " 1\n", " \n", - " 1\n", + " 6\n", " \n", " 1\n", " 1\n", @@ -1477,7 +1477,7 @@ " \n", " 2\n", " \n", - " 1\n", + " 6\n", " \n", " 1\n", " 1\n", @@ -1490,7 +1490,7 @@ " \n", " 3\n", " \n", - " 1\n", + " 6\n", " \n", " 1\n", " 1\n", @@ -1503,7 +1503,7 @@ " \n", " 4\n", " \n", - " 1\n", + " 6\n", " \n", " 1\n", " 1\n", @@ -1527,10 +1527,10 @@ " ...\n", " \n", " \n", - " 5999\n", + " 6019\n", " 2\n", - " 3\n", - " 5\n", + " 6\n", + " 9\n", " \n", " \n", " \n", @@ -1540,10 +1540,10 @@ " \n", " \n", " \n", - " 6000\n", - " \n", - " 3\n", - " 7\n", + " 6020\n", + " 2\n", + " 6\n", + " 9\n", " \n", " \n", " \n", @@ -1553,10 +1553,10 @@ " \n", " \n", " \n", - " 6001\n", - " \n", - " 3\n", - " 7\n", + " 6021\n", + " 2\n", + " 6\n", + " 9\n", " \n", " \n", " \n", @@ -1566,10 +1566,10 @@ " \n", " \n", " \n", - " 6002\n", - " \n", - " 3\n", - " 7\n", + " 6022\n", + " 2\n", + " 6\n", + " 9\n", " \n", " \n", " \n", @@ -1579,10 +1579,10 @@ " \n", " \n", " \n", - " 6003\n", - " \n", - " 3\n", - " 7\n", + " 6023\n", + " 2\n", + " 6\n", + " 9\n", " \n", " \n", " \n", @@ -1593,24 +1593,24 @@ " \n", " \n", "\n", - "

6004 rows × 10 columns

\n", + "

6024 rows × 10 columns

\n", "" ], "text/plain": [ " H1 H2 H3 H4 H5 H6 H7 H8 H9 H10\n", - "0 1 1 1 1 1 1 1 1\n", - "1 1 1 1 1 1 1 1 1\n", - "2 1 1 1 1 1 1 1 1\n", - "3 1 1 1 1 1 1 1 1\n", - "4 1 1 1 1 1 1 1 1\n", + "0 6 1 1 1 1 1 1 1\n", + "1 6 1 1 1 1 1 1 1\n", + "2 6 1 1 1 1 1 1 1\n", + "3 6 1 1 1 1 1 1 1\n", + "4 6 1 1 1 1 1 1 1\n", "... .. .. .. .. .. .. .. .. .. ..\n", - "5999 2 3 5 \n", - "6000 3 7 \n", - "6001 3 7 \n", - "6002 3 7 \n", - "6003 3 7 \n", + "6019 2 6 9 \n", + "6020 2 6 9 \n", + "6021 2 6 9 \n", + "6022 2 6 9 \n", + "6023 2 6 9 \n", "\n", - "[6004 rows x 10 columns]" + "[6024 rows x 10 columns]" ] }, "execution_count": 14, @@ -1705,49 +1705,49 @@ " \n", " 0\n", " 1\n", - " 1381.04 +/- 9.68\n", + " 1391.76 +/- 16.96\n", " 0.00 %\n", " 0.00 %\n", " \n", " \n", " 1\n", " 2\n", - " 425.15 +/- 12.85\n", + " 432.25 +/- 18.61\n", + " 0.00 %\n", " 0.00 %\n", - " 0.48 %\n", " \n", " \n", " 2\n", " 3\n", - " 148.52 +/- 13.80\n", - " 10.47 %\n", + " 152.53 +/- 17.84\n", + " 19.96 %\n", " 0.00 %\n", " \n", " \n", " 3\n", " 4\n", - " 55.79 +/- 12.81\n", - " 51.70 %\n", + " 57.96 +/- 17.00\n", + " 68.08 %\n", " 0.00 %\n", " \n", " \n", " 4\n", " All\n", - " 110.90 +/- 13.14\n", - " 37.67 %\n", - " 0.05 %\n", + " 113.90 +/- 17.55\n", + " 51.19 %\n", + " 0.00 %\n", " \n", " \n", "\n", "" ], "text/plain": [ - " Length Count +/- Error Suppressed % Fabricated %\n", - "0 1 1381.04 +/- 9.68 0.00 % 0.00 %\n", - "1 2 425.15 +/- 12.85 0.00 % 0.48 %\n", - "2 3 148.52 +/- 13.80 10.47 % 0.00 %\n", - "3 4 55.79 +/- 12.81 51.70 % 0.00 %\n", - "4 All 110.90 +/- 13.14 37.67 % 0.05 %" + " Length Count +/- Error Suppressed % Fabricated %\n", + "0 1 1391.76 +/- 16.96 0.00 % 0.00 %\n", + "1 2 432.25 +/- 18.61 0.00 % 0.00 %\n", + "2 3 152.53 +/- 17.84 19.96 % 0.00 %\n", + "3 4 57.96 +/- 17.00 68.08 % 0.00 %\n", + "4 All 113.90 +/- 17.55 51.19 % 0.00 %" ] }, "execution_count": 16, @@ -1802,37 +1802,37 @@ " \n", " 0\n", " 1\n", - " 1381.04 +/- 199.60\n", + " 1391.76 +/- 293.40\n", " 0.00 %\n", " 0.00 %\n", " \n", " \n", " 1\n", " 2\n", - " 425.15 +/- 130.29\n", + " 432.25 +/- 179.53\n", + " 0.00 %\n", " 0.00 %\n", - " 0.48 %\n", " \n", " \n", " 2\n", " 3\n", - " 148.52 +/- 70.57\n", - " 10.47 %\n", + " 152.53 +/- 96.13\n", + " 20.07 %\n", " 0.00 %\n", " \n", " \n", " 3\n", " 4\n", - " 55.79 +/- 35.48\n", - " 51.70 %\n", + " 57.96 +/- 50.07\n", + " 68.08 %\n", " 0.00 %\n", " \n", " \n", " 4\n", " All\n", - " 110.90 +/- 59.32\n", - " 37.67 %\n", - " 0.05 %\n", + " 113.90 +/- 88.73\n", + " 51.22 %\n", + " 0.00 %\n", " \n", " \n", "\n", @@ -1840,11 +1840,11 @@ ], "text/plain": [ " Length Count +/- Error Suppressed % Fabricated %\n", - "0 1 1381.04 +/- 199.60 0.00 % 0.00 %\n", - "1 2 425.15 +/- 130.29 0.00 % 0.48 %\n", - "2 3 148.52 +/- 70.57 10.47 % 0.00 %\n", - "3 4 55.79 +/- 35.48 51.70 % 0.00 %\n", - "4 All 110.90 +/- 59.32 37.67 % 0.05 %" + "0 1 1391.76 +/- 293.40 0.00 % 0.00 %\n", + "1 2 432.25 +/- 179.53 0.00 % 0.00 %\n", + "2 3 152.53 +/- 96.13 20.07 % 0.00 %\n", + "3 4 57.96 +/- 50.07 68.08 % 0.00 %\n", + "4 All 113.90 +/- 88.73 51.22 % 0.00 %" ] }, "execution_count": 17, @@ -1983,10 +1983,10 @@ " ...\n", " \n", " \n", - " 5999\n", + " 6019\n", " \n", - " 6\n", - " 9\n", + " 4\n", + " 7\n", " \n", " \n", " \n", @@ -1996,10 +1996,10 @@ " \n", " \n", " \n", - " 6000\n", + " 6020\n", " \n", - " 6\n", - " 9\n", + " 5\n", + " 4\n", " \n", " \n", " \n", @@ -2009,10 +2009,10 @@ " \n", " \n", " \n", - " 6001\n", + " 6021\n", " \n", " 6\n", - " 9\n", + " 5\n", " \n", " \n", " \n", @@ -2022,10 +2022,10 @@ " \n", " \n", " \n", - " 6002\n", + " 6022\n", " \n", " 6\n", - " 9\n", + " 5\n", " \n", " \n", " \n", @@ -2035,10 +2035,10 @@ " \n", " \n", " \n", - " 6003\n", + " 6023\n", " \n", " 6\n", - " 9\n", + " 6\n", " \n", " \n", " \n", @@ -2049,7 +2049,7 @@ " \n", " \n", "\n", - "

6004 rows × 10 columns

\n", + "

6024 rows × 10 columns

\n", "" ], "text/plain": [ @@ -2060,13 +2060,13 @@ "3 1 1 1 1 1 1 1 1\n", "4 1 1 1 1 1 1 1 1\n", "... .. .. .. .. .. .. .. .. .. ..\n", - "5999 6 9 \n", - "6000 6 9 \n", - "6001 6 9 \n", - "6002 6 9 \n", - "6003 6 9 \n", + "6019 4 7 \n", + "6020 5 4 \n", + "6021 6 5 \n", + "6022 6 5 \n", + "6023 6 6 \n", "\n", - "[6004 rows x 10 columns]" + "[6024 rows x 10 columns]" ] }, "execution_count": 18, @@ -2163,37 +2163,37 @@ " \n", " 0\n", " 1\n", - " 1381.04 +/- 3.44\n", + " 1391.76 +/- 9.28\n", " 0.00 %\n", " 0.00 %\n", " \n", " \n", " 1\n", " 2\n", - " 425.15 +/- 9.10\n", + " 432.25 +/- 11.69\n", " 0.00 %\n", - " 6.70 %\n", + " 7.11 %\n", " \n", " \n", " 2\n", " 3\n", - " 148.52 +/- 22.06\n", - " 1.54 %\n", - " 7.84 %\n", + " 152.53 +/- 28.58\n", + " 1.21 %\n", + " 7.44 %\n", " \n", " \n", " 3\n", " 4\n", - " 55.79 +/- 26.13\n", - " 28.82 %\n", - " 4.66 %\n", + " 57.96 +/- 28.84\n", + " 32.59 %\n", + " 4.23 %\n", " \n", " \n", " 4\n", " All\n", - " 110.90 +/- 23.38\n", - " 19.90 %\n", - " 5.80 %\n", + " 113.90 +/- 27.27\n", + " 22.36 %\n", + " 5.49 %\n", " \n", " \n", "\n", @@ -2201,11 +2201,11 @@ ], "text/plain": [ " Length Count +/- Error Suppressed % Fabricated %\n", - "0 1 1381.04 +/- 3.44 0.00 % 0.00 %\n", - "1 2 425.15 +/- 9.10 0.00 % 6.70 %\n", - "2 3 148.52 +/- 22.06 1.54 % 7.84 %\n", - "3 4 55.79 +/- 26.13 28.82 % 4.66 %\n", - "4 All 110.90 +/- 23.38 19.90 % 5.80 %" + "0 1 1391.76 +/- 9.28 0.00 % 0.00 %\n", + "1 2 432.25 +/- 11.69 0.00 % 7.11 %\n", + "2 3 152.53 +/- 28.58 1.21 % 7.44 %\n", + "3 4 57.96 +/- 28.84 32.59 % 4.23 %\n", + "4 All 113.90 +/- 27.27 22.36 % 5.49 %" ] }, "execution_count": 20, @@ -2260,37 +2260,37 @@ " \n", " 0\n", " 1\n", - " 1381.04 +/- 77.32\n", + " 1391.76 +/- 83.32\n", " 0.00 %\n", " 0.00 %\n", " \n", " \n", " 1\n", " 2\n", - " 425.15 +/- 113.48\n", + " 432.25 +/- 106.77\n", " 0.00 %\n", " 6.28 %\n", " \n", " \n", " 2\n", " 3\n", - " 148.52 +/- 74.39\n", - " 1.54 %\n", - " 6.39 %\n", + " 152.53 +/- 76.03\n", + " 1.21 %\n", + " 4.78 %\n", " \n", " \n", " 3\n", " 4\n", - " 55.79 +/- 46.28\n", - " 29.95 %\n", - " 3.35 %\n", + " 57.96 +/- 50.00\n", + " 33.77 %\n", + " 2.77 %\n", " \n", " \n", " 4\n", " All\n", - " 110.90 +/- 60.54\n", - " 20.66 %\n", - " 4.54 %\n", + " 113.90 +/- 63.29\n", + " 23.16 %\n", + " 3.69 %\n", " \n", " \n", "\n", @@ -2298,11 +2298,11 @@ ], "text/plain": [ " Length Count +/- Error Suppressed % Fabricated %\n", - "0 1 1381.04 +/- 77.32 0.00 % 0.00 %\n", - "1 2 425.15 +/- 113.48 0.00 % 6.28 %\n", - "2 3 148.52 +/- 74.39 1.54 % 6.39 %\n", - "3 4 55.79 +/- 46.28 29.95 % 3.35 %\n", - "4 All 110.90 +/- 60.54 20.66 % 4.54 %" + "0 1 1391.76 +/- 83.32 0.00 % 0.00 %\n", + "1 2 432.25 +/- 106.77 0.00 % 6.28 %\n", + "2 3 152.53 +/- 76.03 1.21 % 4.78 %\n", + "3 4 57.96 +/- 50.00 33.77 % 2.77 %\n", + "4 All 113.90 +/- 63.29 23.16 % 3.69 %" ] }, "execution_count": 21, @@ -2510,7 +2510,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.10.6" }, "orig_nbformat": 4, "vscode": { diff --git a/packages/lib-pacsynth/samples/dp_aggregate_seeded_short_example.ipynb b/packages/lib-pacsynth/samples/dp_aggregate_seeded_short_example.ipynb index b76ac7f..b578c35 100644 --- a/packages/lib-pacsynth/samples/dp_aggregate_seeded_short_example.ipynb +++ b/packages/lib-pacsynth/samples/dp_aggregate_seeded_short_example.ipynb @@ -161,7 +161,7 @@ " count\n", " 6000.000000\n", " 6000.000000\n", - " 6000.000000\n", + " 6000.00000\n", " 6000.000000\n", " 6000.000000\n", " 6000.000000\n", @@ -172,35 +172,35 @@ " \n", " \n", " mean\n", - " 1.003000\n", - " 2.612500\n", - " 4.557333\n", - " 0.494000\n", - " 0.517833\n", - " 0.501667\n", - " 0.497333\n", - " 0.494333\n", - " 0.513833\n", - " 0.496167\n", + " 0.992333\n", + " 2.646333\n", + " 4.57900\n", + " 0.502333\n", + " 0.492667\n", + " 0.497000\n", + " 0.498833\n", + " 0.501500\n", + " 0.487500\n", + " 0.493000\n", " \n", " \n", " std\n", - " 0.810206\n", - " 2.123228\n", - " 3.324338\n", - " 0.500006\n", - " 0.499724\n", + " 0.816324\n", + " 2.107129\n", + " 3.32665\n", + " 0.500036\n", + " 0.499988\n", + " 0.500033\n", + " 0.500040\n", " 0.500039\n", - " 0.500035\n", - " 0.500010\n", - " 0.499850\n", - " 0.500027\n", + " 0.499885\n", + " 0.499993\n", " \n", " \n", " min\n", " 0.000000\n", " 0.000000\n", - " 0.000000\n", + " 0.00000\n", " 0.000000\n", " 0.000000\n", " 0.000000\n", @@ -212,8 +212,8 @@ " \n", " 25%\n", " 0.000000\n", - " 0.000000\n", " 1.000000\n", + " 1.00000\n", " 0.000000\n", " 0.000000\n", " 0.000000\n", @@ -225,21 +225,21 @@ " \n", " 50%\n", " 1.000000\n", - " 2.000000\n", - " 4.000000\n", - " 0.000000\n", - " 1.000000\n", + " 3.000000\n", + " 5.00000\n", " 1.000000\n", " 0.000000\n", " 0.000000\n", + " 0.000000\n", " 1.000000\n", " 0.000000\n", + " 0.000000\n", " \n", " \n", " 75%\n", " 2.000000\n", " 5.000000\n", - " 7.000000\n", + " 7.00000\n", " 1.000000\n", " 1.000000\n", " 1.000000\n", @@ -252,7 +252,7 @@ " max\n", " 2.000000\n", " 6.000000\n", - " 10.000000\n", + " 10.00000\n", " 1.000000\n", " 1.000000\n", " 1.000000\n", @@ -266,23 +266,23 @@ "" ], "text/plain": [ - " H1 H2 H3 H4 H5 \\\n", - "count 6000.000000 6000.000000 6000.000000 6000.000000 6000.000000 \n", - "mean 1.003000 2.612500 4.557333 0.494000 0.517833 \n", - "std 0.810206 2.123228 3.324338 0.500006 0.499724 \n", - "min 0.000000 0.000000 0.000000 0.000000 0.000000 \n", - "25% 0.000000 0.000000 1.000000 0.000000 0.000000 \n", - "50% 1.000000 2.000000 4.000000 0.000000 1.000000 \n", - "75% 2.000000 5.000000 7.000000 1.000000 1.000000 \n", - "max 2.000000 6.000000 10.000000 1.000000 1.000000 \n", + " H1 H2 H3 H4 H5 \\\n", + "count 6000.000000 6000.000000 6000.00000 6000.000000 6000.000000 \n", + "mean 0.992333 2.646333 4.57900 0.502333 0.492667 \n", + "std 0.816324 2.107129 3.32665 0.500036 0.499988 \n", + "min 0.000000 0.000000 0.00000 0.000000 0.000000 \n", + "25% 0.000000 1.000000 1.00000 0.000000 0.000000 \n", + "50% 1.000000 3.000000 5.00000 1.000000 0.000000 \n", + "75% 2.000000 5.000000 7.00000 1.000000 1.000000 \n", + "max 2.000000 6.000000 10.00000 1.000000 1.000000 \n", "\n", " H6 H7 H8 H9 H10 \n", "count 6000.000000 6000.000000 6000.000000 6000.000000 6000.000000 \n", - "mean 0.501667 0.497333 0.494333 0.513833 0.496167 \n", - "std 0.500039 0.500035 0.500010 0.499850 0.500027 \n", + "mean 0.497000 0.498833 0.501500 0.487500 0.493000 \n", + "std 0.500033 0.500040 0.500039 0.499885 0.499993 \n", "min 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "25% 0.000000 0.000000 0.000000 0.000000 0.000000 \n", - "50% 1.000000 0.000000 0.000000 1.000000 0.000000 \n", + "50% 0.000000 0.000000 1.000000 0.000000 0.000000 \n", "75% 1.000000 1.000000 1.000000 1.000000 1.000000 \n", "max 1.000000 1.000000 1.000000 1.000000 1.000000 " ] @@ -337,42 +337,42 @@ " \n", " \n", " count\n", - " 6001.000000\n", - " 6001.000000\n", - " 6001.000000\n", - " 6001.000000\n", - " 6001.000000\n", - " 6001.000000\n", - " 6001.000000\n", - " 6001.000000\n", - " 6001.000000\n", - " 6001.000000\n", + " 6030.000000\n", + " 6030.000000\n", + " 6030.000000\n", + " 6030.000000\n", + " 6030.000000\n", + " 6030.000000\n", + " 6030.000000\n", + " 6030.000000\n", + " 6030.000000\n", + " 6030.000000\n", " \n", " \n", " mean\n", - " 0.976671\n", - " 2.495084\n", - " 4.334778\n", - " 0.453091\n", - " 0.478587\n", - " 0.457424\n", - " 0.461256\n", - " 0.462923\n", - " 0.473254\n", - " 0.465756\n", + " 0.937977\n", + " 2.462023\n", + " 4.250083\n", + " 0.477944\n", + " 0.470149\n", + " 0.462355\n", + " 0.478441\n", + " 0.475788\n", + " 0.465008\n", + " 0.469818\n", " \n", " \n", " std\n", - " 0.815346\n", - " 2.140020\n", - " 3.378620\n", - " 0.497836\n", - " 0.499583\n", - " 0.498225\n", - " 0.498538\n", - " 0.498665\n", - " 0.499326\n", - " 0.498868\n", + " 0.825132\n", + " 2.132173\n", + " 3.401991\n", + " 0.499555\n", + " 0.499150\n", + " 0.498622\n", + " 0.499576\n", + " 0.499455\n", + " 0.498815\n", + " 0.499130\n", " \n", " \n", " min\n", @@ -445,9 +445,9 @@ ], "text/plain": [ " H1 H2 H3 H4 H5 \\\n", - "count 6001.000000 6001.000000 6001.000000 6001.000000 6001.000000 \n", - "mean 0.976671 2.495084 4.334778 0.453091 0.478587 \n", - "std 0.815346 2.140020 3.378620 0.497836 0.499583 \n", + "count 6030.000000 6030.000000 6030.000000 6030.000000 6030.000000 \n", + "mean 0.937977 2.462023 4.250083 0.477944 0.470149 \n", + "std 0.825132 2.132173 3.401991 0.499555 0.499150 \n", "min 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "25% 0.000000 0.000000 1.000000 0.000000 0.000000 \n", "50% 1.000000 2.000000 4.000000 0.000000 0.000000 \n", @@ -455,9 +455,9 @@ "max 2.000000 6.000000 10.000000 1.000000 1.000000 \n", "\n", " H6 H7 H8 H9 H10 \n", - "count 6001.000000 6001.000000 6001.000000 6001.000000 6001.000000 \n", - "mean 0.457424 0.461256 0.462923 0.473254 0.465756 \n", - "std 0.498225 0.498538 0.498665 0.499326 0.498868 \n", + "count 6030.000000 6030.000000 6030.000000 6030.000000 6030.000000 \n", + "mean 0.462355 0.478441 0.475788 0.465008 0.469818 \n", + "std 0.498622 0.499576 0.499455 0.498815 0.499130 \n", "min 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "25% 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "50% 0.000000 0.000000 0.000000 0.000000 0.000000 \n", @@ -491,7 +491,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.10.6" }, "orig_nbformat": 4, "vscode": { diff --git a/packages/lib-pacsynth/src/aggregate_seeded/dp/synthesizer.rs b/packages/lib-pacsynth/src/aggregate_seeded/dp/synthesizer.rs index ffe7739..6efe9ee 100644 --- a/packages/lib-pacsynth/src/aggregate_seeded/dp/synthesizer.rs +++ b/packages/lib-pacsynth/src/aggregate_seeded/dp/synthesizer.rs @@ -71,9 +71,9 @@ impl DpAggregateSeededSynthesizer { self._parameters.reporting_length, &DpParameters::new( self._parameters.epsilon, - self._parameters.delta, self._parameters.percentile_percentage, self._parameters.percentile_epsilon_proportion, + self._parameters.delta, Some(self._parameters.sigma_proportions.clone()), Some(self._parameters.number_of_records_epsilon_proportion), ), diff --git a/packages/lib-python/Cargo.toml b/packages/lib-python/Cargo.toml index 68dc4dd..0455c16 100644 --- a/packages/lib-python/Cargo.toml +++ b/packages/lib-python/Cargo.toml @@ -13,6 +13,6 @@ crate-type = ["cdylib"] [dependencies] log = { version = "0.4", features = ["std"] } csv = { version = "1.1" } -pyo3 = { version = "0.17", features = ["extension-module"] } +pyo3 = { version = "0.18", features = ["extension-module"] } sds-core = { path = "../core", features = ["pyo3", "rayon"] } -env_logger = { version = "0.9" } \ No newline at end of file +env_logger = { version = "0.10" } \ No newline at end of file diff --git a/packages/lib-python/src/data_processor.rs b/packages/lib-python/src/data_processor.rs index cf41b93..bfa5195 100644 --- a/packages/lib-python/src/data_processor.rs +++ b/packages/lib-python/src/data_processor.rs @@ -48,11 +48,11 @@ impl SDSProcessor { pub fn new( path: &str, delimiter: char, - subject_id: Option, use_columns: Vec, multi_value_columns: HashMap, sensitive_zeros: Vec, record_limit: usize, + subject_id: Option, ) -> Result { CsvDataBlockCreator::create( ReaderBuilder::new() diff --git a/packages/lib-wasm/src/utils/js/ts_definitions.rs b/packages/lib-wasm/src/utils/js/ts_definitions.rs index af5723f..1d0d6a7 100644 --- a/packages/lib-wasm/src/utils/js/ts_definitions.rs +++ b/packages/lib-wasm/src/utils/js/ts_definitions.rs @@ -46,11 +46,11 @@ export interface INoisyCountThreshold { export interface IDpParameters { epsilon: number - delta: number percentilePercentage: number percentileEpsilonProportion: number - numberOfRecordsEpsilonProportion?: number + delta?: number sigmaProportions?: number[] + numberOfRecordsEpsilonProportion?: number } export interface IOversamplingParameters { diff --git a/packages/python-pipeline/src/aggregator.py b/packages/python-pipeline/src/aggregator.py index 117789d..c1dcd9c 100644 --- a/packages/python-pipeline/src/aggregator.py +++ b/packages/python-pipeline/src/aggregator.py @@ -53,11 +53,11 @@ def aggregate(config): sds_processor = sds.SDSProcessor( sensitive_microdata_path, sensitive_microdata_delimiter, - subject_id, use_columns, multi_value_columns, sensitive_zeros, - max(record_limit, 0) + max(record_limit, 0), + subject_id ) aggregated_data = sds_processor.aggregate( @@ -86,9 +86,9 @@ def aggregate(config): reporting_length, sds.DpParameters( noise_epsilon, - noise_delta, percentile_percentage, percentile_epsilon_proportion, + noise_delta, sigma_proportions, number_of_records_epsilon_proportion ), @@ -99,9 +99,9 @@ def aggregate(config): reporting_length, sds.DpParameters( noise_epsilon, - noise_delta, percentile_percentage, percentile_epsilon_proportion, + noise_delta, sigma_proportions, number_of_records_epsilon_proportion ), diff --git a/packages/python-pipeline/src/evaluator.py b/packages/python-pipeline/src/evaluator.py index ee051dd..e378d37 100644 --- a/packages/python-pipeline/src/evaluator.py +++ b/packages/python-pipeline/src/evaluator.py @@ -84,11 +84,11 @@ class Evaluator: self.syn_sds_processor = sds.SDSProcessor( self.synthetic_microdata_path, "\t", - None, # the synthetic data does not have an ID - [], # use all columns from synthetic file + [], # use all columns from synthetic file self.multi_value_columns, self.sensitive_zeros, - 0 # use all records from synthetic file + 0, # use all records from synthetic file + None # the synthetic data does not have an ID ) self.syn_aggregated_data = self.syn_sds_processor.aggregate( self.reporting_length diff --git a/packages/python-pipeline/src/generator.py b/packages/python-pipeline/src/generator.py index 28a35b5..f28795a 100644 --- a/packages/python-pipeline/src/generator.py +++ b/packages/python-pipeline/src/generator.py @@ -46,11 +46,11 @@ def generate(config): sds_processor = sds.SDSProcessor( sensitive_microdata_path, sensitive_microdata_delimiter, - subject_id, use_columns, multi_value_columns, sensitive_zeros, - max(record_limit, 0) + max(record_limit, 0), + subject_id ) if synthesis_mode == 'unseeded': diff --git a/packages/webapp/src/pages/Synthesize/DataSynthesis/DataSynthesis.hooks.ts b/packages/webapp/src/pages/Synthesize/DataSynthesis/DataSynthesis.hooks.ts index 71d5cd0..e44d13b 100644 --- a/packages/webapp/src/pages/Synthesize/DataSynthesis/DataSynthesis.hooks.ts +++ b/packages/webapp/src/pages/Synthesize/DataSynthesis/DataSynthesis.hooks.ts @@ -186,15 +186,15 @@ function convertRawToSynthesisParameters( ...ret, dpParameters: { epsilon: rawParams.noiseEpsilon, - delta: noiseDelta, percentilePercentage: rawParams.percentilePercentage, percentileEpsilonProportion: rawParams.percentileEpsilonProportion, - numberOfRecordsEpsilonProportion: - rawParams.numberOfRecordsEpsilonProportion, + delta: noiseDelta, sigmaProportions: generateSigmaProportions( rawParams.reportingLength, rawParams.accuracyMode, ), + numberOfRecordsEpsilonProportion: + rawParams.numberOfRecordsEpsilonProportion, }, noiseThreshold: { type: 'Adaptive',