Rename gcm bootstrap training to fit_and_compute
Related to issue https://github.com/py-why/dowhy/issues/689 Signed-off-by: Patrick Bloebaum <bloebp@amazon.com>
This commit is contained in:
Родитель
f749ceebb7
Коммит
fb5b4d5260
|
@ -283,11 +283,11 @@
|
|||
"gcm.config.disable_progress_bars() # to disable print statements when computing Shapley values\n",
|
||||
"\n",
|
||||
"median_attribs, uncertainty_attribs = gcm.confidence_intervals(\n",
|
||||
" gcm.bootstrap_training_and_sampling(gcm.attribute_anomalies,\n",
|
||||
" causal_model,\n",
|
||||
" normal_data,\n",
|
||||
" target_node='Website',\n",
|
||||
" anomaly_samples=outlier_data),\n",
|
||||
" gcm.fit_and_compute(gcm.attribute_anomalies,\n",
|
||||
" causal_model,\n",
|
||||
" normal_data,\n",
|
||||
" target_node='Website',\n",
|
||||
" anomaly_samples=outlier_data),\n",
|
||||
" num_bootstrap_resamples=10)"
|
||||
]
|
||||
},
|
||||
|
@ -479,14 +479,14 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"median_mean_latencies, uncertainty_mean_latencies = gcm.confidence_intervals(\n",
|
||||
" lambda : gcm.bootstrap_training_and_sampling(gcm.interventional_samples,\n",
|
||||
" causal_model,\n",
|
||||
" outlier_data,\n",
|
||||
" interventions = {\n",
|
||||
" \"Caching Service\": lambda x: x-1,\n",
|
||||
" \"Shipping Cost Service\": lambda x: x+2\n",
|
||||
" },\n",
|
||||
" observed_data=outlier_data)().mean().to_dict(),\n",
|
||||
" lambda : gcm.fit_and_compute(gcm.interventional_samples,\n",
|
||||
" causal_model,\n",
|
||||
" outlier_data,\n",
|
||||
" interventions = {\n",
|
||||
" \"Caching Service\": lambda x: x-1,\n",
|
||||
" \"Shipping Cost Service\": lambda x: x+2\n",
|
||||
" },\n",
|
||||
" observed_data=outlier_data)().mean().to_dict(),\n",
|
||||
" num_bootstrap_resamples=10)"
|
||||
]
|
||||
},
|
||||
|
|
|
@ -97,8 +97,8 @@ second item contains the intervals of the contribution scores for each variable.
|
|||
To avoid defining a new function, we can streamline that call by using a lambda:
|
||||
|
||||
>>> gcm.confidence_intervals(lambda: gcm.distribution_change(causal_model,
|
||||
>>> data_old, data_new,
|
||||
>>> target_node='Z'))
|
||||
>>> data_old, data_new,
|
||||
>>> target_node='Z'))
|
||||
|
||||
Conveniently bootstrapping graph training on random subsets of training data
|
||||
----------------------------------------------------------------------------
|
||||
|
@ -107,7 +107,7 @@ Many of the causal queries in the GCM package require a trained causal graph as
|
|||
compute confidence intervals for these methods, we need to explicitly re-train our causal graph
|
||||
multiple times with different random subsets of data and also run our causal query with each newly
|
||||
trained graph. To do this conveniently, the GCM package provides a function
|
||||
``bootstrap_training_and_sampling``. Assuming that we have ``data`` and a causal graph:
|
||||
``fit_and_compute``. Assuming that we have ``data`` and a causal graph:
|
||||
|
||||
>>> Z = np.random.normal(loc=0, scale=1, size=1000)
|
||||
>>> X = 2*Z + np.random.normal(loc=0, scale=1, size=1000)
|
||||
|
@ -117,13 +117,13 @@ trained graph. To do this conveniently, the GCM package provides a function
|
|||
>>> causal_model = gcm.StructuralCausalModel(nx.DiGraph([('Z', 'Y'), ('Z', 'X'), ('X', 'Y')]))
|
||||
>>> gcm.auto.assign_causal_mechanisms(causal_model, data_old)
|
||||
|
||||
we can now use ``bootstrap_training_and_sampling`` as follows:
|
||||
we can now use ``fit_and_compute`` as follows:
|
||||
|
||||
>>> strength_median, strength_intervals = gcm.confidence_intervals(
|
||||
>>> gcm.bootstrap_training_and_sampling(gcm.arrow_strength,
|
||||
>>> causal_model,
|
||||
>>> bootstrap_training_data=data,
|
||||
>>> target_node='Y'))
|
||||
>>> gcm.fit_and_compute(gcm.arrow_strength,
|
||||
>>> causal_model,
|
||||
>>> bootstrap_training_data=data,
|
||||
>>> target_node='Y'))
|
||||
>>> strength_median, strength_intervals
|
||||
({('X', 'Y'): 45.90886398636573, ('Z', 'Y'): 15.47129383737619},
|
||||
{('X', 'Y'): array([42.88319632, 50.43890079]), ('Z', 'Y'): array([13.44202416, 17.74266107])})
|
||||
|
|
|
@ -15,7 +15,7 @@ from .anomaly_scorers import (
|
|||
)
|
||||
from .cms import FunctionalCausalModel, InvertibleStructuralCausalModel, ProbabilisticCausalModel, StructuralCausalModel
|
||||
from .confidence_intervals import confidence_intervals
|
||||
from .confidence_intervals_cms import bootstrap_sampling, bootstrap_training_and_sampling
|
||||
from .confidence_intervals_cms import bootstrap_sampling, fit_and_compute
|
||||
from .density_estimators import GaussianMixtureDensityEstimator, KernelDensityEstimator1D
|
||||
from .distribution_change import distribution_change, distribution_change_of_graphs
|
||||
from .fcms import AdditiveNoiseModel, ClassificationModel, ClassifierFCM, PostNonlinearModel, PredictionModel
|
||||
|
|
|
@ -17,7 +17,7 @@ from dowhy.gcm.fitting_sampling import fit
|
|||
# results.
|
||||
# Note that this function does not re-fit the causal model(s) and only executes the provided query as it is. In order
|
||||
# to re-refit the graphical causal model on random subsets of the data before executing the query, consider using the
|
||||
# bootstrap_training_and_sampling function.
|
||||
# fit_and_compute function.
|
||||
#
|
||||
# **Example usage:**
|
||||
#
|
||||
|
@ -32,9 +32,9 @@ from dowhy.gcm.fitting_sampling import fit
|
|||
# lambda : gcm.arrow_strength(causal_model, target_node='Y').
|
||||
#
|
||||
# In order to incorporate uncertainties coming from fitting the causal model(s), we can use
|
||||
# gcm.bootstrap_training_and_sampling instead:
|
||||
# gcm.fit_and_compute instead:
|
||||
# >>> strength_medians, strength_intervals = gcm.confidence_intervals(
|
||||
# >>> gcm.bootstrap_training_and_sampling(gcm.arrow_strength,
|
||||
# >>> gcm.fit_and_compute(gcm.arrow_strength,
|
||||
# >>> causal_model,
|
||||
# >>> bootstrap_training_data=data,
|
||||
# >>> target_node='Y'))
|
||||
|
@ -43,7 +43,7 @@ from dowhy.gcm.fitting_sampling import fit
|
|||
bootstrap_sampling = partial
|
||||
|
||||
|
||||
def bootstrap_training_and_sampling(
|
||||
def fit_and_compute(
|
||||
f: Callable[
|
||||
[Union[ProbabilisticCausalModel, StructuralCausalModel, InvertibleStructuralCausalModel], Any],
|
||||
Dict[Any, Union[np.ndarray, float]],
|
||||
|
@ -60,10 +60,10 @@ def bootstrap_training_and_sampling(
|
|||
**Example usage:**
|
||||
|
||||
>>> scores_median, scores_intervals = gcm.confidence_intervals(
|
||||
>>> gcm.bootstrap_training_and_sampling(gcm.arrow_strength,
|
||||
>>> causal_model,
|
||||
>>> bootstrap_training_data=data,
|
||||
>>> target_node='Y'))
|
||||
>>> gcm.fit_and_compute(gcm.arrow_strength,
|
||||
>>> causal_model,
|
||||
>>> bootstrap_training_data=data,
|
||||
>>> target_node='Y'))
|
||||
|
||||
:param f: The causal query to perform. A causal query is a function taking a graphical causal model as first
|
||||
parameter and an arbitrary number of remaining parameters. It must return a dictionary with
|
||||
|
|
|
@ -9,15 +9,15 @@ from dowhy.gcm import (
|
|||
EmpiricalDistribution,
|
||||
ProbabilisticCausalModel,
|
||||
bootstrap_sampling,
|
||||
bootstrap_training_and_sampling,
|
||||
draw_samples,
|
||||
fit_and_compute,
|
||||
)
|
||||
from dowhy.gcm.confidence_intervals import confidence_intervals
|
||||
from dowhy.gcm.ml import create_hist_gradient_boost_regressor
|
||||
|
||||
|
||||
@flaky(max_runs=2)
|
||||
def test_given_causal_graph_based_estimation_func_when_confidence_interval_then_can_use_bootstrap_training_and_sampling():
|
||||
def test_given_causal_graph_based_estimation_func_when_confidence_interval_then_can_use_fit_and_compute():
|
||||
def draw_single_sample(causal_graph, variable):
|
||||
return draw_samples(causal_graph, 1)[variable][0]
|
||||
|
||||
|
@ -26,7 +26,7 @@ def test_given_causal_graph_based_estimation_func_when_confidence_interval_then_
|
|||
causal_model.set_causal_mechanism("Y", AdditiveNoiseModel(create_hist_gradient_boost_regressor()))
|
||||
|
||||
median, interval = confidence_intervals(
|
||||
bootstrap_training_and_sampling(
|
||||
fit_and_compute(
|
||||
draw_single_sample,
|
||||
causal_model,
|
||||
bootstrap_training_data=pd.DataFrame(
|
||||
|
|
Загрузка…
Ссылка в новой задаче