documentation and signatures for AccuracyMode

This commit is contained in:
Rodrigo Racanicci 2022-07-28 14:31:32 -03:00
Родитель dfdd62b529
Коммит 576bce8cfa
1 изменённых файлов: 82 добавлений и 0 удалений

Просмотреть файл

@ -12,6 +12,12 @@ pub enum AccuracyModeEnum {
#[pyclass]
#[derive(Clone, Serialize)]
/// This controls how to split the privacy budget for different
/// combination lengths during the aggregation with differential privacy
/// (DP Aggregation).
///
/// A set of static methods are provided to construct an object
/// representing the desired accuracy mode.
pub struct AccuracyMode {
pub(crate) mode: AccuracyModeEnum,
}
@ -20,6 +26,26 @@ pub struct AccuracyMode {
impl AccuracyMode {
#[inline]
#[staticmethod]
#[pyo3(text_signature = "()")]
/// This mode will ensure that more privacy budget is spent for
/// for larger attribute combination lengths.
///
/// For example, if reporting_length=3 and S(i) the scale of a gaussian noise
/// added to the correspondent combination length will be:
/// - single attribute counts (1-counts) = S(1)
/// - combinations of 2 attributes (2-counts) = S(2) = S(1) / 2
/// - combinations of 3 attributes (3-counts) = S(3) = S(1) / 3
///
/// So 3 times MORE BUDGET is going to be spent with the 3-counts
/// than with the 1-counts, meaning that the scale of noise related to the 1-counts
/// will be 3 times bigger than the scale related with the 3-counts.
///
/// Summary:
/// Use this if you want smaller errors for larger attribute combination lengths
/// (e.g. the accuracy for 3-counts is more important than for 1-counts)
///
/// Returns:
/// AccuracyMode
pub fn prioritize_large_counts() -> Self {
Self {
mode: AccuracyModeEnum::PrioritizeLargeCounts,
@ -28,6 +54,26 @@ impl AccuracyMode {
#[inline]
#[staticmethod]
#[pyo3(text_signature = "()")]
/// This mode will ensure that more privacy budget is spent for
/// for smaller attribute combination lengths.
///
/// For example, if reporting_length=3 and S(i) the scale of a gaussian noise
/// added to the correspondent combination length will be:
/// - single attribute counts (1-counts) = S(1) = S(3) / 3
/// - combinations of 2 attributes (2-counts) = S(2) = S(3) / 2
/// - combinations of 3 attributes (3-counts) = S(3)
///
/// So 3 times LESS BUDGET is going to be spent with the 3 counts
/// than with the 1-counts, meaning that the scale of noise related to that 1-counts
/// will be 3 times smaller than the scale related with the 3-counts.
///
/// Summary:
/// Use this if you want smaller errors for smaller attribute combination lengths
/// (e.g. the accuracy for 1-counts is more important than for 3-counts)
///
/// Returns:
/// AccuracyMode
pub fn prioritize_small_counts() -> Self {
Self {
mode: AccuracyModeEnum::PrioritizeSmallCounts,
@ -36,6 +82,18 @@ impl AccuracyMode {
#[inline]
#[staticmethod]
#[pyo3(text_signature = "()")]
/// This mode will evenly distribute the privacy budget across
/// all attribute combination lengths.
///
/// For example, if reporting_length=3 and S(i) the scale of a gaussian noise
/// added to the correspondent combination length will be:
/// - single attribute counts (1-counts) = S(1)
/// - combinations of 2 attributes (2-counts) = S(2) = S(1)
/// - combinations of 3 attributes (3-counts) = S(3) = S(2) = S(1)
///
/// Returns:
/// AccuracyMode
pub fn balanced() -> Self {
Self {
mode: AccuracyModeEnum::Balanced,
@ -44,6 +102,30 @@ impl AccuracyMode {
#[inline]
#[staticmethod]
#[pyo3(text_signature = "(sigma_proportions)")]
/// This mode lets you specify how to split the privacy budget across
/// attribute combination lengths.
///
/// Sigma defines the scale (standard deviation) of the gaussian noise
/// added to a particular combination length.
///
/// Example:
/// - given reporting_length=3
/// - given sigma_proportions=[1.0, 0.25, 0.5]
/// - being S(1), S(2), S(3) the sigma values for the noise related to the
/// 1,2 and 3-counts, respectively
/// - then:
/// - scale of the noise added to the 1-counts: S(1) = S
/// - scale of the noise added to the 2-counts: S(2) = S(1) / 4 = S / 4
/// - scale of the noise added to the 3-counts: S(3) = S(1) / 2 = S / 2
///
/// Arguments:
/// * sigma_proportions: list[float] - sigma proportions to be kept across the different
/// combination lengths
/// - len(sigma_proportions) must be the same as the reporting_length
///
/// Returns:
/// AccuracyMode
pub fn custom(sigma_proportions: Vec<f64>) -> Self {
Self {
mode: AccuracyModeEnum::Custom(sigma_proportions),