Rename gcm bootstrap training to fit_and_compute

Related to issue https://github.com/py-why/dowhy/issues/689

Signed-off-by: Patrick Bloebaum <bloebp@amazon.com>
This commit is contained in:
Patrick Bloebaum 2022-10-28 14:56:48 -07:00 коммит произвёл Peter Götz
Родитель f749ceebb7
Коммит fb5b4d5260
5 изменённых файлов: 33 добавлений и 33 удалений

Просмотреть файл

@ -283,11 +283,11 @@
"gcm.config.disable_progress_bars() # to disable print statements when computing Shapley values\n",
"\n",
"median_attribs, uncertainty_attribs = gcm.confidence_intervals(\n",
" gcm.bootstrap_training_and_sampling(gcm.attribute_anomalies,\n",
" causal_model,\n",
" normal_data,\n",
" target_node='Website',\n",
" anomaly_samples=outlier_data),\n",
" gcm.fit_and_compute(gcm.attribute_anomalies,\n",
" causal_model,\n",
" normal_data,\n",
" target_node='Website',\n",
" anomaly_samples=outlier_data),\n",
" num_bootstrap_resamples=10)"
]
},
@ -479,14 +479,14 @@
"outputs": [],
"source": [
"median_mean_latencies, uncertainty_mean_latencies = gcm.confidence_intervals(\n",
" lambda : gcm.bootstrap_training_and_sampling(gcm.interventional_samples,\n",
" causal_model,\n",
" outlier_data,\n",
" interventions = {\n",
" \"Caching Service\": lambda x: x-1,\n",
" \"Shipping Cost Service\": lambda x: x+2\n",
" },\n",
" observed_data=outlier_data)().mean().to_dict(),\n",
" lambda : gcm.fit_and_compute(gcm.interventional_samples,\n",
" causal_model,\n",
" outlier_data,\n",
" interventions = {\n",
" \"Caching Service\": lambda x: x-1,\n",
" \"Shipping Cost Service\": lambda x: x+2\n",
" },\n",
" observed_data=outlier_data)().mean().to_dict(),\n",
" num_bootstrap_resamples=10)"
]
},

Просмотреть файл

@ -97,8 +97,8 @@ second item contains the intervals of the contribution scores for each variable.
To avoid defining a new function, we can streamline that call by using a lambda:
>>> gcm.confidence_intervals(lambda: gcm.distribution_change(causal_model,
>>> data_old, data_new,
>>> target_node='Z'))
>>> data_old, data_new,
>>> target_node='Z'))
Conveniently bootstrapping graph training on random subsets of training data
----------------------------------------------------------------------------
@ -107,7 +107,7 @@ Many of the causal queries in the GCM package require a trained causal graph as
compute confidence intervals for these methods, we need to explicitly re-train our causal graph
multiple times with different random subsets of data and also run our causal query with each newly
trained graph. To do this conveniently, the GCM package provides a function
``bootstrap_training_and_sampling``. Assuming that we have ``data`` and a causal graph:
``fit_and_compute``. Assuming that we have ``data`` and a causal graph:
>>> Z = np.random.normal(loc=0, scale=1, size=1000)
>>> X = 2*Z + np.random.normal(loc=0, scale=1, size=1000)
@ -117,13 +117,13 @@ trained graph. To do this conveniently, the GCM package provides a function
>>> causal_model = gcm.StructuralCausalModel(nx.DiGraph([('Z', 'Y'), ('Z', 'X'), ('X', 'Y')]))
>>> gcm.auto.assign_causal_mechanisms(causal_model, data_old)
we can now use ``bootstrap_training_and_sampling`` as follows:
we can now use ``fit_and_compute`` as follows:
>>> strength_median, strength_intervals = gcm.confidence_intervals(
>>> gcm.bootstrap_training_and_sampling(gcm.arrow_strength,
>>> causal_model,
>>> bootstrap_training_data=data,
>>> target_node='Y'))
>>> gcm.fit_and_compute(gcm.arrow_strength,
>>> causal_model,
>>> bootstrap_training_data=data,
>>> target_node='Y'))
>>> strength_median, strength_intervals
({('X', 'Y'): 45.90886398636573, ('Z', 'Y'): 15.47129383737619},
{('X', 'Y'): array([42.88319632, 50.43890079]), ('Z', 'Y'): array([13.44202416, 17.74266107])})

Просмотреть файл

@ -15,7 +15,7 @@ from .anomaly_scorers import (
)
from .cms import FunctionalCausalModel, InvertibleStructuralCausalModel, ProbabilisticCausalModel, StructuralCausalModel
from .confidence_intervals import confidence_intervals
from .confidence_intervals_cms import bootstrap_sampling, bootstrap_training_and_sampling
from .confidence_intervals_cms import bootstrap_sampling, fit_and_compute
from .density_estimators import GaussianMixtureDensityEstimator, KernelDensityEstimator1D
from .distribution_change import distribution_change, distribution_change_of_graphs
from .fcms import AdditiveNoiseModel, ClassificationModel, ClassifierFCM, PostNonlinearModel, PredictionModel

Просмотреть файл

@ -17,7 +17,7 @@ from dowhy.gcm.fitting_sampling import fit
# results.
# Note that this function does not re-fit the causal model(s) and only executes the provided query as it is. In order
# to re-refit the graphical causal model on random subsets of the data before executing the query, consider using the
# bootstrap_training_and_sampling function.
# fit_and_compute function.
#
# **Example usage:**
#
@ -32,9 +32,9 @@ from dowhy.gcm.fitting_sampling import fit
# lambda : gcm.arrow_strength(causal_model, target_node='Y').
#
# In order to incorporate uncertainties coming from fitting the causal model(s), we can use
# gcm.bootstrap_training_and_sampling instead:
# gcm.fit_and_compute instead:
# >>> strength_medians, strength_intervals = gcm.confidence_intervals(
# >>> gcm.bootstrap_training_and_sampling(gcm.arrow_strength,
# >>> gcm.fit_and_compute(gcm.arrow_strength,
# >>> causal_model,
# >>> bootstrap_training_data=data,
# >>> target_node='Y'))
@ -43,7 +43,7 @@ from dowhy.gcm.fitting_sampling import fit
bootstrap_sampling = partial
def bootstrap_training_and_sampling(
def fit_and_compute(
f: Callable[
[Union[ProbabilisticCausalModel, StructuralCausalModel, InvertibleStructuralCausalModel], Any],
Dict[Any, Union[np.ndarray, float]],
@ -60,10 +60,10 @@ def bootstrap_training_and_sampling(
**Example usage:**
>>> scores_median, scores_intervals = gcm.confidence_intervals(
>>> gcm.bootstrap_training_and_sampling(gcm.arrow_strength,
>>> causal_model,
>>> bootstrap_training_data=data,
>>> target_node='Y'))
>>> gcm.fit_and_compute(gcm.arrow_strength,
>>> causal_model,
>>> bootstrap_training_data=data,
>>> target_node='Y'))
:param f: The causal query to perform. A causal query is a function taking a graphical causal model as first
parameter and an arbitrary number of remaining parameters. It must return a dictionary with

Просмотреть файл

@ -9,15 +9,15 @@ from dowhy.gcm import (
EmpiricalDistribution,
ProbabilisticCausalModel,
bootstrap_sampling,
bootstrap_training_and_sampling,
draw_samples,
fit_and_compute,
)
from dowhy.gcm.confidence_intervals import confidence_intervals
from dowhy.gcm.ml import create_hist_gradient_boost_regressor
@flaky(max_runs=2)
def test_given_causal_graph_based_estimation_func_when_confidence_interval_then_can_use_bootstrap_training_and_sampling():
def test_given_causal_graph_based_estimation_func_when_confidence_interval_then_can_use_fit_and_compute():
def draw_single_sample(causal_graph, variable):
return draw_samples(causal_graph, 1)[variable][0]
@ -26,7 +26,7 @@ def test_given_causal_graph_based_estimation_func_when_confidence_interval_then_
causal_model.set_causal_mechanism("Y", AdditiveNoiseModel(create_hist_gradient_boost_regressor()))
median, interval = confidence_intervals(
bootstrap_training_and_sampling(
fit_and_compute(
draw_single_sample,
causal_model,
bootstrap_training_data=pd.DataFrame(