Rename gcm bootstrap training to fit_and_compute

Related to issue https://github.com/py-why/dowhy/issues/689 Signed-off-by: Patrick Bloebaum <bloebp@amazon.com>
2022-10-28 14:56:48 -07:00 · 2022-10-28 14:56:48 -07:00 · fb5b4d5260
--- a/docs/source/example_notebooks/gcm_rca_microservice_architecture.ipynb
+++ b/docs/source/example_notebooks/gcm_rca_microservice_architecture.ipynb
@ -283,11 +283,11 @@
    "gcm.config.disable_progress_bars() # to disable print statements when computing Shapley values\n",
    "\n",
    "median_attribs, uncertainty_attribs = gcm.confidence_intervals(\n",
-    "    gcm.bootstrap_training_and_sampling(gcm.attribute_anomalies,\n",
-    "                                        causal_model,\n",
-    "                                        normal_data,\n",
-    "                                        target_node='Website',\n",
-    "                                        anomaly_samples=outlier_data),\n",
+    "    gcm.fit_and_compute(gcm.attribute_anomalies,\n",
+    "                        causal_model,\n",
+    "                        normal_data,\n",
+    "                        target_node='Website',\n",
+    "                        anomaly_samples=outlier_data),\n",
    "    num_bootstrap_resamples=10)"
   ]
  },
@ -479,14 +479,14 @@
   "outputs": [],
   "source": [
    "median_mean_latencies, uncertainty_mean_latencies = gcm.confidence_intervals(\n",
-    "    lambda : gcm.bootstrap_training_and_sampling(gcm.interventional_samples,\n",
-    "                                                 causal_model,\n",
-    "                                                 outlier_data,\n",
-    "                                                 interventions = {\n",
-    "                                                    \"Caching Service\": lambda x: x-1,\n",
-    "                                                    \"Shipping Cost Service\": lambda x: x+2\n",
-    "                                                 },\n",
-    "                                                 observed_data=outlier_data)().mean().to_dict(),\n",
+    "    lambda : gcm.fit_and_compute(gcm.interventional_samples,\n",
+    "                                 causal_model,\n",
+    "                                 outlier_data,\n",
+    "                                 interventions = {\n",
+    "                                    \"Caching Service\": lambda x: x-1,\n",
+    "                                    \"Shipping Cost Service\": lambda x: x+2\n",
+    "                                 },\n",
+    "                                 observed_data=outlier_data)().mean().to_dict(),\n",
    "    num_bootstrap_resamples=10)"
   ]
  },
--- a/docs/source/user_guide/gcm_based_inference/estimating_confidence_intervals.rst
+++ b/docs/source/user_guide/gcm_based_inference/estimating_confidence_intervals.rst
@ -97,8 +97,8 @@ second item contains the intervals of the contribution scores for each variable.
 To avoid defining a new function, we can streamline that call by using a lambda:

 >>> gcm.confidence_intervals(lambda: gcm.distribution_change(causal_model,
->>>                                                        data_old, data_new,
->>>                                                        target_node='Z'))
+>>>                                                          data_old, data_new,
+>>>                                                          target_node='Z'))

 Conveniently bootstrapping graph training on random subsets of training data
 ----------------------------------------------------------------------------
@ -107,7 +107,7 @@ Many of the causal queries in the GCM package require a trained causal graph as
 compute confidence intervals for these methods, we need to explicitly re-train our causal graph
 multiple times with different random subsets of data and also run our causal query with each newly
 trained graph. To do this conveniently, the GCM package provides a function
-``bootstrap_training_and_sampling``. Assuming that we have ``data`` and a causal graph:
+``fit_and_compute``. Assuming that we have ``data`` and a causal graph:

 >>> Z = np.random.normal(loc=0, scale=1, size=1000)
 >>> X = 2*Z + np.random.normal(loc=0, scale=1, size=1000)
@ -117,13 +117,13 @@ trained graph. To do this conveniently, the GCM package provides a function
 >>> causal_model = gcm.StructuralCausalModel(nx.DiGraph([('Z', 'Y'), ('Z', 'X'), ('X', 'Y')]))
 >>> gcm.auto.assign_causal_mechanisms(causal_model, data_old)

-we can now use ``bootstrap_training_and_sampling`` as follows:
+we can now use ``fit_and_compute`` as follows:

 >>> strength_median, strength_intervals = gcm.confidence_intervals(
->>>     gcm.bootstrap_training_and_sampling(gcm.arrow_strength,
->>>                                         causal_model,
->>>                                         bootstrap_training_data=data,
->>>                                         target_node='Y'))
+>>>     gcm.fit_and_compute(gcm.arrow_strength,
+>>>                         causal_model,
+>>>                         bootstrap_training_data=data,
+>>>                         target_node='Y'))
 >>> strength_median, strength_intervals
 ({('X', 'Y'): 45.90886398636573, ('Z', 'Y'): 15.47129383737619},
 {('X', 'Y'): array([42.88319632, 50.43890079]), ('Z', 'Y'): array([13.44202416, 17.74266107])})
--- a/dowhy/gcm/init.py
+++ b/dowhy/gcm/init.py
@ -15,7 +15,7 @@ from .anomaly_scorers import (
 )
 from .cms import FunctionalCausalModel, InvertibleStructuralCausalModel, ProbabilisticCausalModel, StructuralCausalModel
 from .confidence_intervals import confidence_intervals
-from .confidence_intervals_cms import bootstrap_sampling, bootstrap_training_and_sampling
+from .confidence_intervals_cms import bootstrap_sampling, fit_and_compute
 from .density_estimators import GaussianMixtureDensityEstimator, KernelDensityEstimator1D
 from .distribution_change import distribution_change, distribution_change_of_graphs
 from .fcms import AdditiveNoiseModel, ClassificationModel, ClassifierFCM, PostNonlinearModel, PredictionModel
--- a/dowhy/gcm/confidence_intervals_cms.py
+++ b/dowhy/gcm/confidence_intervals_cms.py
@ -17,7 +17,7 @@ from dowhy.gcm.fitting_sampling import fit
 # results.
 # Note that this function does not re-fit the causal model(s) and only executes the provided query as it is. In order
 # to re-refit the graphical causal model on random subsets of the data before executing the query, consider using the
-# bootstrap_training_and_sampling function.
+# fit_and_compute function.
 #
 # **Example usage:**
 #
@ -32,9 +32,9 @@ from dowhy.gcm.fitting_sampling import fit
 # lambda : gcm.arrow_strength(causal_model, target_node='Y').
 #
 # In order to incorporate uncertainties coming from fitting the causal model(s), we can use
-# gcm.bootstrap_training_and_sampling instead:
+# gcm.fit_and_compute instead:
 # >>>  strength_medians, strength_intervals = gcm.confidence_intervals(
-# >>>        gcm.bootstrap_training_and_sampling(gcm.arrow_strength,
+# >>>        gcm.fit_and_compute(gcm.arrow_strength,
 # >>>                                            causal_model,
 # >>>                                            bootstrap_training_data=data,
 # >>>                                            target_node='Y'))
@ -43,7 +43,7 @@ from dowhy.gcm.fitting_sampling import fit
 bootstrap_sampling = partial


-def bootstrap_training_and_sampling(
+def fit_and_compute(
    f: Callable[
        [Union[ProbabilisticCausalModel, StructuralCausalModel, InvertibleStructuralCausalModel], Any],
        Dict[Any, Union[np.ndarray, float]],
@ -60,10 +60,10 @@ def bootstrap_training_and_sampling(
    **Example usage:**

        >>> scores_median, scores_intervals = gcm.confidence_intervals(
-        >>>     gcm.bootstrap_training_and_sampling(gcm.arrow_strength,
-        >>>                                         causal_model,
-        >>>                                         bootstrap_training_data=data,
-        >>>                                         target_node='Y'))
+        >>>     gcm.fit_and_compute(gcm.arrow_strength,
+        >>>                         causal_model,
+        >>>                         bootstrap_training_data=data,
+        >>>                         target_node='Y'))

    :param f: The causal query to perform. A causal query is a function taking a graphical causal model as first
              parameter and an arbitrary number of remaining parameters. It must return a dictionary with
--- a/tests/gcm/test_confidence_intervals_cms.py
+++ b/tests/gcm/test_confidence_intervals_cms.py
@ -9,15 +9,15 @@ from dowhy.gcm import (
    EmpiricalDistribution,
    ProbabilisticCausalModel,
    bootstrap_sampling,
-    bootstrap_training_and_sampling,
    draw_samples,
+    fit_and_compute,
 )
 from dowhy.gcm.confidence_intervals import confidence_intervals
 from dowhy.gcm.ml import create_hist_gradient_boost_regressor


@flaky(max_runs=2)
-def test_given_causal_graph_based_estimation_func_when_confidence_interval_then_can_use_bootstrap_training_and_sampling():
+def test_given_causal_graph_based_estimation_func_when_confidence_interval_then_can_use_fit_and_compute():
    def draw_single_sample(causal_graph, variable):
        return draw_samples(causal_graph, 1)[variable][0]

@ -26,7 +26,7 @@ def test_given_causal_graph_based_estimation_func_when_confidence_interval_then_
    causal_model.set_causal_mechanism("Y", AdditiveNoiseModel(create_hist_gradient_boost_regressor()))

    median, interval = confidence_intervals(
-        bootstrap_training_and_sampling(
+        fit_and_compute(
            draw_single_sample,
            causal_model,
            bootstrap_training_data=pd.DataFrame(