Use more suitable argument names (#2)

* Use more suitable argument names * Fix tests Co-authored-by: Lukas Wutschitz <lukas.wutschitz@microsoft.com>
2021-06-18 17:50:07 +01:00 · 2021-06-18 17:50:07 +01:00 · 53164600f6
--- a/.github/workflows/check-scripts.yml
+++ b/.github/workflows/check-scripts.yml
@ -29,4 +29,4 @@ jobs:
        python -m pip install .
    - name: Run command line scripts
      run: |
-        compute-dp-epsilon --sampling-probability 5e-3 --noise-multiplier 0.8 --delta 1e-6 --iterations 1000
+        compute-dp-epsilon --sampling-probability 5e-3 --noise-multiplier 0.8 --delta 1e-6 --num-compositions 1000
--- a/README.md
+++ b/README.md
@ -15,7 +15,7 @@ pip install git+https://github.com/microsoft/prv_accountant.git
 Getting epsilon estimate directly from the command line.

 ```
-compute-dp-epsilon --sampling-probability 5e-3 --noise-multiplier 0.8 --delta 1e-6 --iterations 1000
+compute-dp-epsilon --sampling-probability 5e-3 --noise-multiplier 0.8 --delta 1e-6 --num-compositions 1000
 ```

 Or, use it in python code
@ -30,7 +30,7 @@ accountant = Accountant(
 	eps_error=0.1
 )

-eps_low, eps_estimate, eps_upper = accountant.compute_epsilon(iterations=1000)
+eps_low, eps_estimate, eps_upper = accountant.compute_epsilon(num_compositions=1000)
 ```

 For more examples, have a look in the `notebooks` directory.
--- a/bin/compute-dp-epsilon
+++ b/bin/compute-dp-epsilon
@ -21,8 +21,8 @@ def arg_parser() -> argparse.ArgumentParser:
        help="A parameter which governs how much noise is added."
    )
    parser.add_argument(
-        "-i", "--iterations", type=int, required=True,
-        help="The number of iterations at which epsilon is computed."
+        "-i", "--num-compositions", type=int, required=True,
+        help="The number of compositions at which epsilon is computed."
    )
    parser.add_argument(
        "-d", "--delta", type=float, required=True,
@ -44,7 +44,7 @@ def main() -> int:
        noise_multiplier=args.noise_multiplier,
        sampling_probability=args.sampling_probability,
        delta=args.delta,
-        max_iterations=args.iterations,
+        max_compositions=args.num_compositions,
        verbose=args.verbose,
        eps_error=0.1
    )
@ -62,7 +62,7 @@ def main() -> int:
 
    for name, accountant in accountants.items():
        try:
-            eps_lower, eps_est, eps_upper = accountant.compute_epsilon(iterations=args.iterations)
+            eps_lower, eps_est, eps_upper = accountant.compute_epsilon(num_compositions=args.num_compositions)
            print(f"{name}:\t\teps_lower = {eps_lower:6.3} eps_estimate = {eps_est:6.3}, eps_upper = {eps_upper:6.3} ")
        except Exception as e:
            print(f"{name}:\t\tn/a")
--- a/notebooks/validate-gaussian.ipynb
+++ b/notebooks/validate-gaussian.ipynb
@ -25,15 +25,15 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2021-06-11T17:58:14.929286Z",
-     "iopub.status.busy": "2021-06-11T17:58:14.928276Z",
-     "iopub.status.idle": "2021-06-11T17:58:14.930323Z",
-     "shell.execute_reply": "2021-06-11T17:58:14.931111Z"
+     "iopub.execute_input": "2021-06-18T16:35:14.701793Z",
+     "iopub.status.busy": "2021-06-18T16:35:14.700812Z",
+     "iopub.status.idle": "2021-06-18T16:35:14.702823Z",
+     "shell.execute_reply": "2021-06-18T16:35:14.703607Z"
    }
   },
   "outputs": [],
   "source": [
-    "max_iterations = 10000\n",
+    "max_compositions = 10000\n",
    "noise_multiplier = 100.0\n",
    "sampling_probability = 1.0"
   ]
@ -43,17 +43,17 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2021-06-11T17:58:14.936228Z",
-     "iopub.status.busy": "2021-06-11T17:58:14.935243Z",
-     "iopub.status.idle": "2021-06-11T17:58:15.059231Z",
-     "shell.execute_reply": "2021-06-11T17:58:15.060035Z"
+     "iopub.execute_input": "2021-06-18T16:35:14.709004Z",
+     "iopub.status.busy": "2021-06-18T16:35:14.707998Z",
+     "iopub.status.idle": "2021-06-18T16:35:14.853537Z",
+     "shell.execute_reply": "2021-06-18T16:35:14.854337Z"
    }
   },
   "outputs": [],
   "source": [
-    "acc_1 = Accountant(noise_multiplier, sampling_probability, 1e-7, max_iterations=max_iterations, eps_error=1.0)\n",
-    "acc_05 = Accountant(noise_multiplier, sampling_probability, 1e-7, max_iterations=max_iterations, eps_error=0.5)\n",
-    "acc_01 = Accountant(noise_multiplier, sampling_probability, 1e-7, max_iterations=max_iterations, eps_error=0.1)"
+    "acc_1 = Accountant(noise_multiplier, sampling_probability, 1e-7, max_compositions=max_compositions, eps_error=1.0)\n",
+    "acc_05 = Accountant(noise_multiplier, sampling_probability, 1e-7, max_compositions=max_compositions, eps_error=0.5)\n",
+    "acc_01 = Accountant(noise_multiplier, sampling_probability, 1e-7, max_compositions=max_compositions, eps_error=0.1)"
   ]
  },
  {
@ -61,17 +61,17 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2021-06-11T17:58:15.064415Z",
-     "iopub.status.busy": "2021-06-11T17:58:15.063446Z",
-     "iopub.status.idle": "2021-06-11T17:58:15.072501Z",
-     "shell.execute_reply": "2021-06-11T17:58:15.071702Z"
+     "iopub.execute_input": "2021-06-18T16:35:14.858522Z",
+     "iopub.status.busy": "2021-06-18T16:35:14.857565Z",
+     "iopub.status.idle": "2021-06-18T16:35:14.868400Z",
+     "shell.execute_reply": "2021-06-18T16:35:14.867636Z"
    }
   },
   "outputs": [],
   "source": [
-    "f_n_01 = acc_01.compute_compositions(max_iterations)\n",
-    "f_n_05 = acc_05.compute_compositions(max_iterations)\n",
-    "f_n_1 = acc_1.compute_compositions(max_iterations)"
+    "f_n_01 = acc_01.compute_compositions(max_compositions)\n",
+    "f_n_05 = acc_05.compute_compositions(max_compositions)\n",
+    "f_n_1 = acc_1.compute_compositions(max_compositions)"
   ]
  },
  {
@ -79,16 +79,16 @@
   "execution_count": null,
   "metadata": {
    "execution": {
-     "iopub.execute_input": "2021-06-11T17:58:15.077137Z",
-     "iopub.status.busy": "2021-06-11T17:58:15.076078Z",
-     "iopub.status.idle": "2021-06-11T17:58:15.078866Z",
-     "shell.execute_reply": "2021-06-11T17:58:15.078077Z"
+     "iopub.execute_input": "2021-06-18T16:35:14.873159Z",
+     "iopub.status.busy": "2021-06-18T16:35:14.872171Z",
+     "iopub.status.idle": "2021-06-18T16:35:14.874177Z",
+     "shell.execute_reply": "2021-06-18T16:35:14.874946Z"
    }
   },
   "outputs": [],
   "source": [
    "def delta_exact(eps):\n",
-    "    mu = np.sqrt(max_iterations)/noise_multiplier\n",
+    "    mu = np.sqrt(max_compositions)/noise_multiplier\n",
    "    return stats.norm.cdf(-eps/mu+mu/2)-np.exp(eps)*stats.norm.cdf(-eps/mu-mu/2)"
   ]
  },
--- a/prv_accountant/accountant.py
+++ b/prv_accountant/accountant.py
@ -17,7 +17,7 @@ from . import privacy_random_variables

 class Accountant:
    def __init__(self, noise_multiplier: float, sampling_probability: float,
-                 delta: float, max_iterations: int, eps_error: float = None,
+                 delta: float, max_compositions: int, eps_error: float = None,
                 mesh_size: float = None, verbose: bool = False) -> None:
        """
        Create an PRV accountant
@ -27,10 +27,10 @@ class Accountant:
        :param float noise_multiplier: Noise multiplier of the DP-SGD training
        :param float sampling_probability: Sampling probability of the training
        :param float delta: Target delta value
-        :param int max_iterations: Max number of iterations this accountant is
-                                   used for. This value is used to estimate a
-                                   automatically determine a mesh size which
-                                   influences the accuracy of the privacy budget.
+        :param int max_compositions: Max number of compositions this accountant is
+                                     used for. This value is used to estimate a
+                                     automatically determine a mesh size which
+                                     influences the accuracy of the privacy budget.
        :param float eps_error: Allowed error in epsilon
        :param float mesh_size: Mesh size of the pdf discretisation.
                                (This is an upper bound the actual mesh size
@ -39,7 +39,7 @@ class Accountant:
        self.noise_multiplier = noise_multiplier
        self.sampling_probability = sampling_probability
        self.delta = delta
-        self.max_iterations = max_iterations
+        self.max_compositions = max_compositions
        self.delta_error = delta/1000.0

        eta0 = self.delta_error/3
@ -47,22 +47,22 @@ class Accountant:
            if eps_error:
                raise ValueError("Cannot specify `eps_error` when `mesh_size` is specified.")
            mesh_size = mesh_size
-            self.eps_error = mesh_size*np.sqrt(2*max_iterations*np.log(2/eta0))/2
+            self.eps_error = mesh_size*np.sqrt(2*max_compositions*np.log(2/eta0))/2
        else:
            if not eps_error:
                raise ValueError("Need to specify either `eps_error` or `mesh_size`.")
            self.eps_error = eps_error
-            mesh_size = 2*eps_error / np.sqrt(2*max_iterations*np.log(2/eta0))
+            mesh_size = 2*eps_error / np.sqrt(2*max_compositions*np.log(2/eta0))

        rdp = RDP(
            noise_multiplier=noise_multiplier,
            sampling_probability=sampling_probability,
            delta=self.delta_error/4)
-        L = self.eps_error + rdp.compute_epsilon(max_iterations)[2]
+        L = self.eps_error + rdp.compute_epsilon(max_compositions)[2]
        rdp = RDP(
            noise_multiplier=noise_multiplier,
            sampling_probability=sampling_probability,
-            delta=self.delta_error/8/max_iterations)
+            delta=self.delta_error/8/max_compositions)
        L = 3 + max(L, rdp.compute_epsilon(1)[2])

        domain = Domain.create_aligned(-L, L, mesh_size)
@ -79,7 +79,7 @@ class Accountant:
        self.composer = composers.Fourier(self.f_0)

    def compute_compositions(self, num_compositions: int) -> DiscretePrivacyRandomVariable:
-        if num_compositions > self.max_iterations:
+        if num_compositions > self.max_compositions:
            raise ValueError("Requested number of compositions exceeds the maximum number of compositions")
        return self.composer.compute_composition(num_compositions)

@ -96,16 +96,16 @@ class Accountant:
                delta += p_n_i*(1.0 - np.exp(epsilon)*np.exp(-t_i))
        return np.float64(delta)

-    def compute_epsilon(self, iterations: int) -> Tuple[float, float, float]:
+    def compute_epsilon(self, num_compositions: int) -> Tuple[float, float, float]:
        """
        Compute epsilon bounds

-        :param int iterations:
+        :param int num_compositions: The number of compositions
        :return Tuple[float, float, float] lower bound of true epsilon,
                                           approximation of true epsilon,
                                           upper bound of true epsilon
        """
-        f_n = self.composer.compute_composition(iterations)
+        f_n = self.composer.compute_composition(num_compositions=num_compositions)
        eps_lower = optimize.root_scalar(
            lambda e: self.compute_delta_lower(f_n, e) - self.delta, bracket=(0, f_n.domain.t_max())).root
        eps_upper = optimize.root_scalar(
--- a/prv_accountant/other_accountants.py
+++ b/prv_accountant/other_accountants.py
@ -35,14 +35,8 @@ class RDP:
            noise_multiplier=self.noise_multiplier, steps=1,
            orders=self.orders)

-    def compute_epsilon(self, iterations: int) -> Tuple[float, float, float]:
-        """
-
-        :param int iterations:
-        :return: DP epsilon
-        :rtype: float
-        """
-        rdp_steps = self.rdp*iterations
+    def compute_epsilon(self, num_compositions: int) -> Tuple[float, float, float]:
+        rdp_steps = self.rdp*num_compositions
        eps, _, opt_order = rdp_accountant.get_privacy_spent(
            orders=self.orders, rdp=rdp_steps, target_eps=None,
            target_delta=self.delta)
@ -67,10 +61,10 @@ class GDP:

        self.distribution = distribution

-    def compute_epsilon(self, iterations: int) -> Tuple[float, float, float]:
+    def compute_epsilon(self, num_compositions: int) -> Tuple[float, float, float]:
        batch_size = 1
        n = 1/self.sampling_probability
-        epoch = iterations / n
+        epoch = num_compositions / n

        if self.distribution == Distribution.UNIFORM:
            mu = gdp_accountant.compute_mu_uniform(
--- a/tests/test_accountant.py
+++ b/tests/test_accountant.py
@ -20,7 +20,7 @@ class TestAccountant:
            sampling_probability=1.0,
            delta=1e-8,
            eps_error=0.01,
-            max_iterations=10000
+            max_compositions=10000
        )

        f_n = accountant.compute_compositions(10000)