From f13ed30f42440552e4a912372abcb7c3023fc9c0 Mon Sep 17 00:00:00 2001
From: Chris Trevino <darthtrevino@users.noreply.github.com>
Date: Fri, 28 Oct 2022 15:46:08 -0700
Subject: [PATCH] Restore some key tests into the regular CI pipeline (#725)

* restore some key notebook tests

Signed-off-by: Chris Trevino <darthtrevino@gmail.com>

* remove advanced marks from unit tests

Signed-off-by: Chris Trevino <darthtrevino@gmail.com>

* remove advanced mark from test_validation.py

Signed-off-by: Chris Trevino <darthtrevino@gmail.com>

* apply formatting

Signed-off-by: Chris Trevino <darthtrevino@gmail.com>

* update test durations, restore some test notebooks

Signed-off-by: Chris Trevino <darthtrevino@gmail.com>

* bump the test splits

Signed-off-by: Chris Trevino <darthtrevino@gmail.com>

* restore main notebook tests, take a more piecemeal approach

Signed-off-by: Chris Trevino <darthtrevino@gmail.com>

Signed-off-by: Chris Trevino <darthtrevino@gmail.com>
---
 .github/workflows/ci.yml                      |   4 +-
 .test_durations                               | 744 ++++++++++--------
 .../test_data_subset_refuter.py               |   2 -
 tests/gcm/test_anomaly_attribution.py         |   3 +-
 tests/gcm/test_feature.py                     |   3 +-
 tests/gcm/test_intrinsic_influence.py         |   6 +-
 tests/gcm/test_stats.py                       |   3 +-
 tests/gcm/test_validation.py                  |   3 -
 tests/test_notebooks.py                       |   3 +-
 9 files changed, 411 insertions(+), 360 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 038589543..380627783 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -15,7 +15,7 @@ jobs:
       matrix:
         python-version: [3.8, 3.9]
         poetry-version: [1.2.0]
-        test-group: [1, 2, 3, 4]
+        test-group: [1, 2, 3, 4, 5, 6]
 
     steps:
     - uses: actions/checkout@v2
@@ -47,7 +47,7 @@ jobs:
       run: poetry run poe format_check
 
     - name: Test
-      run: poetry run poe test --splits 4 --group ${{ matrix.test-group }}
+      run: poetry run poe test --splits 6 --group ${{ matrix.test-group }}
 
     - uses: fateyan/action-discord-notifier@v1
       if: failure() && github.ref == 'refs/heads/main'
diff --git a/.test_durations b/.test_durations
index 7ea3a390e..f202bd6c7 100644
--- a/.test_durations
+++ b/.test_durations
@@ -1,240 +1,287 @@
 {
-    "tests/causal_estimators/test_causalml_estimator.py::TestCausalmlEstimator::test_causalml_LRSRegressor": 1.8709904220013414,
-    "tests/causal_estimators/test_causalml_estimator.py::TestCausalmlEstimator::test_causalml_MLPTRegressor": 0.3396745189966168,
-    "tests/causal_estimators/test_causalml_estimator.py::TestCausalmlEstimator::test_causalml_RLearner": 2.3262762230042426,
-    "tests/causal_estimators/test_causalml_estimator.py::TestCausalmlEstimator::test_causalml_XGBTRegressor": 0.473878592998517,
-    "tests/causal_estimators/test_causalml_estimator.py::TestCausalmlEstimator::test_causalml_XLearner": 0.9274584140002844,
-    "tests/causal_estimators/test_econml_estimator.py::TestEconMLEstimator::test_backdoor_estimators": 28.61218002500027,
-    "tests/causal_estimators/test_econml_estimator.py::TestEconMLEstimator::test_iv_estimators": 31.404631056000653,
-    "tests/causal_estimators/test_generalized_linear_model_estimator.py::TestGeneralizedLinearModelEstimator::test_average_treatment_effect[0.1-GeneralizedLinearModelEstimator-num_common_causes0-num_instruments0-num_effect_modifiers0-num_treatments0-treatment_is_binary0-outcome_is_binary0-backdoor]": 8.239595178001764,
-    "tests/causal_estimators/test_instrumental_variable_estimator.py::TestInstrumentalVariableEstimator::test_average_treatment_effect[0.4-InstrumentalVariableEstimator-num_common_causes0-num_instruments0-num_effect_modifiers0-num_treatments0-treatment_is_binary0-outcome_is_binary0-iv]": 19.130412003003585,
-    "tests/causal_estimators/test_linear_regression_estimator.py::TestLinearRegressionEstimator::test_average_treatment_effect[0.1-LinearRegressionEstimator-num_common_causes0-num_instruments0-num_effect_modifiers0-num_treatments0-treatment_is_binary0-treatment_is_category0-outcome_is_binary0]": 26.658716234000167,
-    "tests/causal_estimators/test_linear_regression_estimator.py::TestLinearRegressionEstimator::test_average_treatment_effect[0.1-LinearRegressionEstimator-num_common_causes1-num_instruments1-num_effect_modifiers1-num_treatments1-treatment_is_binary1-treatment_is_category1-outcome_is_binary1]": 31.03801075100273,
-    "tests/causal_estimators/test_linear_regression_estimator.py::TestLinearRegressionEstimator::test_average_treatment_effect[0.1-LinearRegressionEstimator-num_common_causes2-num_instruments2-num_effect_modifiers2-num_treatments2-treatment_is_binary2-treatment_is_category2-outcome_is_binary2]": 9.276061235999805,
-    "tests/causal_estimators/test_propensity_score_matching_estimator.py::TestPropensityScoreMatchingEstimator::test_average_treatment_effect[0.3-PropensityScoreMatchingEstimator-num_common_causes0-num_instruments0-num_effect_modifiers0-num_treatments0-treatment_is_binary0-outcome_is_binary0]": 31.91888126900085,
-    "tests/causal_estimators/test_propensity_score_stratification_estimator.py::TestPropensityScoreStratificationEstimator::test_average_treatment_effect[0.1-PropensityScoreStratificationEstimator-num_common_causes0-num_instruments0-num_effect_modifiers0-num_treatments0-treatment_is_binary0-outcome_is_binary0]": 138.48159910100003,
-    "tests/causal_estimators/test_propensity_score_weighting_estimator.py::TestPropensityScoreWeightingEstimator::test_average_treatment_effect[0.4-PropensityScoreWeightingEstimator-num_common_causes0-num_instruments0-num_effect_modifiers0-num_treatments0-treatment_is_binary0-outcome_is_binary0]": 4.26221863000319,
-    "tests/causal_estimators/test_regression_discontinuity_estimator.py::TestRegressionDiscontinuityEstimator::test_average_treatment_effect[0.2-RegressionDiscontinuityEstimator-num_common_causes0-num_instruments0-num_effect_modifiers0-num_treatments0-treatment_is_binary0-outcome_is_binary0-iv]": 1.4388477979991876,
-    "tests/causal_estimators/test_two_stage_regression_estimator.py::TestTwoStageRegressionEstimator::test_average_treatment_effect[0.1-TwoStageRegressionEstimator-num_common_causes0-num_instruments0-num_effect_modifiers0-num_treatments0-num_frontdoor_variables0-treatment_is_binary0-outcome_is_binary0]": 0.9643940740024846,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[book_of_why_game2]": 0.0014306690027297009,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[book_of_why_game5]": 0.0012191680034447927,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[book_of_why_game5_C_is_unobserved]": 0.0009748299999046139,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[common_cause_of_mediator1]": 0.0008750499982852489,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[common_cause_of_mediator2]": 0.0008238410009653307,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[iv]": 0.0008677469995745923,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[mbias_with_unobserved]": 0.001158308001322439,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[no_treatment_but_valid_maximal_set]": 0.0009303000006184448,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[pearl_backdoor_example_graph]": 0.0015849739975237753,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[pearl_simpsons_machine_lvl1]": 0.0014502840022032615,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[pearl_simpsons_paradox_1c]": 0.0008510210027452558,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[pearl_simpsons_paradox_1d]": 0.0007988319957803469,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[pearl_simpsons_paradox_2a]": 0.0007327519961108919,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[pearl_simpsons_paradox_2b]": 0.0008152410009643063,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[pearl_simpsons_paradox_2b_L_observed]": 0.0009078209986910224,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[simple_no_confounder_graph]": 0.0008009720004338305,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[simple_selection_bias_graph]": 0.0006262339993554633,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[book_of_why_game2]": 0.0007835610049369279,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[book_of_why_game5]": 0.0013942850018793251,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[book_of_why_game5_C_is_unobserved]": 0.0010989290021825582,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[common_cause_of_mediator1]": 0.0012708910035144072,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[common_cause_of_mediator2]": 0.0008613020036136732,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[iv]": 0.0007903810001153033,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[mbias_with_unobserved]": 0.0007522919986513443,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[no_treatment_but_valid_maximal_set]": 0.0006968919988139533,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[pearl_backdoor_example_graph]": 0.0027513909990375396,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[pearl_simpsons_machine_lvl1]": 0.0007191530021373183,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[pearl_simpsons_paradox_1c]": 0.000729311999748461,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[pearl_simpsons_paradox_1d]": 0.0008954100012488198,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[pearl_simpsons_paradox_2a]": 0.0006999129946052562,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[pearl_simpsons_paradox_2b]": 0.0008020609966479242,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[pearl_simpsons_paradox_2b_L_observed]": 0.0010208290004811715,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[simple_no_confounder_graph]": 0.0006257239947444759,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[simple_selection_bias_graph]": 0.0007188820018200204,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[book_of_why_game2]": 0.0038469469982373994,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[book_of_why_game5]": 0.0031662459987273905,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[book_of_why_game5_C_is_unobserved]": 0.00308589699852746,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[common_cause_of_mediator1]": 0.0011341090030327905,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[common_cause_of_mediator2]": 0.003449792999163037,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[iv]": 0.0015098440017027315,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[mbias_with_unobserved]": 0.001752481002768036,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[no_treatment_but_valid_maximal_set]": 0.0012205960010760464,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[pearl_backdoor_example_graph]": 0.011672910997731378,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[pearl_simpsons_machine_lvl1]": 0.002395045001321705,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[pearl_simpsons_paradox_1c]": 0.0011582170009205583,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[pearl_simpsons_paradox_1d]": 0.001117357998737134,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[pearl_simpsons_paradox_2a]": 0.000866992002556799,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[pearl_simpsons_paradox_2b]": 0.0010696089993871283,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[pearl_simpsons_paradox_2b_L_observed]": 0.0013814560006721877,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[simple_no_confounder_graph]": 0.0010535480032558553,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[simple_selection_bias_graph]": 0.001052767998771742,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[book_of_why_game2]": 0.0026418130000820383,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[book_of_why_game5]": 0.0016909120022319257,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[book_of_why_game5_C_is_unobserved]": 0.0011231769967707805,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[common_cause_of_mediator1]": 0.0008846720011206344,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[common_cause_of_mediator2]": 0.0008662520012876485,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[iv]": 0.00082448000102886,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[mbias_with_unobserved]": 0.0014589849997719284,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[no_treatment_but_valid_maximal_set]": 0.0008935509977163747,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[pearl_backdoor_example_graph]": 0.004645663000701461,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[pearl_simpsons_machine_lvl1]": 0.0015170339975156821,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[pearl_simpsons_paradox_1c]": 0.0009246910012734588,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[pearl_simpsons_paradox_1d]": 0.0008557020009902772,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[pearl_simpsons_paradox_2a]": 0.0006576430023415014,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[pearl_simpsons_paradox_2b]": 0.0007975809967319947,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[pearl_simpsons_paradox_2b_L_observed]": 0.0010628490017552394,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[simple_no_confounder_graph]": 0.0007228830036183354,
-    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[simple_selection_bias_graph]": 0.0006180359996506013,
-    "tests/causal_identifiers/test_efficient_backdoor_identifier.py::test_fail_multivar_outcome_efficient_backdoor_algorithms": 0.0007458220025000628,
-    "tests/causal_identifiers/test_efficient_backdoor_identifier.py::test_fail_multivar_treat_efficient_backdoor_algorithms": 0.0007435719962813891,
-    "tests/causal_identifiers/test_efficient_backdoor_identifier.py::test_fail_negative_costs_efficient_backdoor_algorithms": 0.0009358400056953542,
-    "tests/causal_identifiers/test_efficient_backdoor_identifier.py::test_fail_unobserved_cond_vars_efficient_backdoor_algorithms": 0.0007732110025244765,
-    "tests/causal_identifiers/test_efficient_backdoor_identifier.py::test_identify_efficient_backdoor_algorithms": 0.6694562520024192,
-    "tests/causal_identifiers/test_id_identifier.py::TestIDIdentifier::test_1": 0.0017407909981557168,
-    "tests/causal_identifiers/test_id_identifier.py::TestIDIdentifier::test_2": 0.001019930001348257,
-    "tests/causal_identifiers/test_id_identifier.py::TestIDIdentifier::test_3": 0.0014090740005485713,
-    "tests/causal_identifiers/test_id_identifier.py::TestIDIdentifier::test_4": 0.0013240250009403098,
-    "tests/causal_identifiers/test_id_identifier.py::TestIDIdentifier::test_5": 0.0013728450030612294,
-    "tests/causal_identifiers/test_id_identifier.py::TestIDIdentifier::test_6": 0.0011200879998796154,
-    "tests/causal_identifiers/test_optimize_backdoor.py::TestOptimizeBackdoorIdentifier::test_1": 0.001310937001107959,
-    "tests/causal_identifiers/test_optimize_backdoor.py::TestOptimizeBackdoorIdentifier::test_2": 0.001268016996618826,
-    "tests/causal_identifiers/test_optimize_backdoor.py::TestOptimizeBackdoorIdentifier::test_3": 0.0015109139967535157,
-    "tests/causal_identifiers/test_optimize_backdoor.py::TestOptimizeBackdoorIdentifier::test_4": 0.0012465669969969895,
-    "tests/causal_identifiers/test_optimize_backdoor.py::TestOptimizeBackdoorIdentifier::test_5": 0.00135361599677708,
-    "tests/causal_identifiers/test_optimize_backdoor.py::TestOptimizeBackdoorIdentifier::test_6": 0.0013527060000342317,
+    "tests/causal_estimators/test_causalml_estimator.py::TestCausalmlEstimator::test_causalml_LRSRegressor": 0.3759101269988605,
+    "tests/causal_estimators/test_causalml_estimator.py::TestCausalmlEstimator::test_causalml_MLPTRegressor": 0.39586411700292956,
+    "tests/causal_estimators/test_causalml_estimator.py::TestCausalmlEstimator::test_causalml_RLearner": 3.5449028330003785,
+    "tests/causal_estimators/test_causalml_estimator.py::TestCausalmlEstimator::test_causalml_XGBTRegressor": 16.717251526999462,
+    "tests/causal_estimators/test_causalml_estimator.py::TestCausalmlEstimator::test_causalml_XLearner": 6.498728092999954,
+    "tests/causal_estimators/test_econml_estimator.py::TestEconMLEstimator::test_backdoor_estimators": 34.96089299800042,
+    "tests/causal_estimators/test_econml_estimator.py::TestEconMLEstimator::test_iv_estimators": 34.40518009199877,
+    "tests/causal_estimators/test_generalized_linear_model_estimator.py::TestGeneralizedLinearModelEstimator::test_average_treatment_effect[0.1-GeneralizedLinearModelEstimator-num_common_causes0-num_instruments0-num_effect_modifiers0-num_treatments0-treatment_is_binary0-outcome_is_binary0-backdoor]": 7.966845259001275,
+    "tests/causal_estimators/test_instrumental_variable_estimator.py::TestInstrumentalVariableEstimator::test_average_treatment_effect[0.4-InstrumentalVariableEstimator-num_common_causes0-num_instruments0-num_effect_modifiers0-num_treatments0-treatment_is_binary0-outcome_is_binary0-iv]": 19.22158048899837,
+    "tests/causal_estimators/test_linear_regression_estimator.py::TestLinearRegressionEstimator::test_average_treatment_effect[0.1-LinearRegressionEstimator-num_common_causes0-num_instruments0-num_effect_modifiers0-num_treatments0-treatment_is_binary0-treatment_is_category0-outcome_is_binary0]": 29.023863354997957,
+    "tests/causal_estimators/test_linear_regression_estimator.py::TestLinearRegressionEstimator::test_average_treatment_effect[0.1-LinearRegressionEstimator-num_common_causes1-num_instruments1-num_effect_modifiers1-num_treatments1-treatment_is_binary1-treatment_is_category1-outcome_is_binary1]": 31.650325972997962,
+    "tests/causal_estimators/test_linear_regression_estimator.py::TestLinearRegressionEstimator::test_average_treatment_effect[0.1-LinearRegressionEstimator-num_common_causes2-num_instruments2-num_effect_modifiers2-num_treatments2-treatment_is_binary2-treatment_is_category2-outcome_is_binary2]": 10.845288793998407,
+    "tests/causal_estimators/test_propensity_score_matching_estimator.py::TestPropensityScoreMatchingEstimator::test_average_treatment_effect[0.3-PropensityScoreMatchingEstimator-num_common_causes0-num_instruments0-num_effect_modifiers0-num_treatments0-treatment_is_binary0-outcome_is_binary0]": 30.789178972998343,
+    "tests/causal_estimators/test_propensity_score_stratification_estimator.py::TestPropensityScoreStratificationEstimator::test_average_treatment_effect[0.1-PropensityScoreStratificationEstimator-num_common_causes0-num_instruments0-num_effect_modifiers0-num_treatments0-treatment_is_binary0-outcome_is_binary0]": 160.28022117399996,
+    "tests/causal_estimators/test_propensity_score_weighting_estimator.py::TestPropensityScoreWeightingEstimator::test_average_treatment_effect[0.4-PropensityScoreWeightingEstimator-num_common_causes0-num_instruments0-num_effect_modifiers0-num_treatments0-treatment_is_binary0-outcome_is_binary0]": 4.023867720999988,
+    "tests/causal_estimators/test_regression_discontinuity_estimator.py::TestRegressionDiscontinuityEstimator::test_average_treatment_effect[0.2-RegressionDiscontinuityEstimator-num_common_causes0-num_instruments0-num_effect_modifiers0-num_treatments0-treatment_is_binary0-outcome_is_binary0-iv]": 1.5116642730008607,
+    "tests/causal_estimators/test_two_stage_regression_estimator.py::TestTwoStageRegressionEstimator::test_average_treatment_effect[0.1-TwoStageRegressionEstimator-num_common_causes0-num_instruments0-num_effect_modifiers0-num_treatments0-num_frontdoor_variables0-treatment_is_binary0-outcome_is_binary0]": 1.1414662810002483,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[book_of_why_game2]": 0.0018126160011888715,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[book_of_why_game5]": 0.0017006359994411469,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[book_of_why_game5_C_is_unobserved]": 0.001571333999891067,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[common_cause_of_mediator1]": 0.0010482779998710612,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[common_cause_of_mediator2]": 0.0012751069989462849,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[iv]": 0.0009463570004299982,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[mbias_with_unobserved]": 0.0017421259999537142,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[mediator-with-conf]": 0.0016782349994173273,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[mediator]": 0.0009926780003297608,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[no_treatment_but_valid_maximal_set]": 0.001140028000008897,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[pearl_backdoor_example_graph]": 0.0017095849980250932,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[pearl_simpsons_machine_lvl1]": 0.0017166450015793089,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[pearl_simpsons_paradox_1c]": 0.001360526001008111,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[pearl_simpsons_paradox_1d]": 0.0009135779982898384,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[pearl_simpsons_paradox_2a]": 0.001015068000924657,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[pearl_simpsons_paradox_2b]": 0.0014208780030458001,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[pearl_simpsons_paradox_2b_L_observed]": 0.001367434999338002,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[simple_no_confounder_graph]": 0.0008193280009436421,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_adjustment[simple_selection_bias_graph]": 0.0006739690015820088,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_direct_effect[book_of_why_game2]": 0.00189562599916826,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_direct_effect[book_of_why_game5]": 0.0019867740011250135,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_direct_effect[book_of_why_game5_C_is_unobserved]": 0.0011870069993165089,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_direct_effect[common_cause_of_mediator1]": 0.001332175001152791,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_direct_effect[common_cause_of_mediator2]": 0.0015398049999930663,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_direct_effect[iv]": 0.001466906001951429,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_direct_effect[mbias_with_unobserved]": 0.0013741579987254227,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_direct_effect[mediator-with-conf]": 0.001223986999320914,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_direct_effect[mediator]": 0.0011637180014076876,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_direct_effect[no_treatment_but_valid_maximal_set]": 0.001180844999908004,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_direct_effect[pearl_backdoor_example_graph]": 0.0019885229994542897,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_direct_effect[pearl_simpsons_machine_lvl1]": 0.0016714459998183884,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_direct_effect[pearl_simpsons_paradox_1c]": 0.0012556760011648294,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_direct_effect[pearl_simpsons_paradox_1d]": 0.0010645379989000503,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_direct_effect[pearl_simpsons_paradox_2a]": 0.001484447000621003,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_direct_effect[pearl_simpsons_paradox_2b]": 0.0011713570002029883,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_direct_effect[pearl_simpsons_paradox_2b_L_observed]": 0.0010907880005106563,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_direct_effect[simple_no_confounder_graph]": 0.0009234479985025246,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_maximal_direct_effect[simple_selection_bias_graph]": 0.000886957999682636,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[book_of_why_game2]": 0.000806748001195956,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[book_of_why_game5]": 0.0011636770013865316,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[book_of_why_game5_C_is_unobserved]": 0.0012951469998370158,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[common_cause_of_mediator1]": 0.0011018169989256421,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[common_cause_of_mediator2]": 0.0011450869988038903,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[iv]": 0.000854836998769315,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[mbias_with_unobserved]": 0.0007795979981892742,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[mediator-with-conf]": 0.0011874659994646208,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[mediator]": 0.001107837002564338,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[no_treatment_but_valid_maximal_set]": 0.0006330279993562726,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[pearl_backdoor_example_graph]": 0.002885192001485848,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[pearl_simpsons_machine_lvl1]": 0.000785227000960731,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[pearl_simpsons_paradox_1c]": 0.0007251780007209163,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[pearl_simpsons_paradox_1d]": 0.0008188480005628662,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[pearl_simpsons_paradox_2a]": 0.0007698690023971722,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[pearl_simpsons_paradox_2b]": 0.000812217002021498,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[pearl_simpsons_paradox_2b_L_observed]": 0.0009112580009968951,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[simple_no_confounder_graph]": 0.0005759289997513406,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_minimal_adjustment[simple_selection_bias_graph]": 0.0006972580013098195,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[book_of_why_game2]": 0.012994315000469214,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[book_of_why_game5]": 0.002812681999785127,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[book_of_why_game5_C_is_unobserved]": 0.0026856629992835224,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[common_cause_of_mediator1]": 0.0015318550013034837,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[common_cause_of_mediator2]": 0.0014273050019255606,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[iv]": 0.0009888179993140511,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[mbias_with_unobserved]": 0.001702436000414309,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[mediator-with-conf]": 0.001329264998275903,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[mediator]": 0.0010871759986912366,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[no_treatment_but_valid_maximal_set]": 0.001577997001732001,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[pearl_backdoor_example_graph]": 0.006493291000879253,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[pearl_simpsons_machine_lvl1]": 0.002696543997444678,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[pearl_simpsons_paradox_1c]": 0.0012809950003429549,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[pearl_simpsons_paradox_1d]": 0.0014350460005516652,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[pearl_simpsons_paradox_2a]": 0.0010957560007227585,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[pearl_simpsons_paradox_2b]": 0.011523588000272866,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[pearl_simpsons_paradox_2b_L_observed]": 0.0022056550023989985,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[simple_no_confounder_graph]": 0.001455075998819666,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_no_biased_sets[simple_selection_bias_graph]": 0.0018298760023753857,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[book_of_why_game2]": 0.002340043998628971,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[book_of_why_game5]": 0.002466014000674477,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[book_of_why_game5_C_is_unobserved]": 0.001431676000720472,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[common_cause_of_mediator1]": 0.0009155080006166827,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[common_cause_of_mediator2]": 0.0009229280003637541,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[iv]": 0.000789707999501843,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[mbias_with_unobserved]": 0.0012646070008486276,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[mediator-with-conf]": 0.0012902469989057863,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[mediator]": 0.000820707999082515,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[no_treatment_but_valid_maximal_set]": 0.001187568001114414,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[pearl_backdoor_example_graph]": 0.004794317999767372,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[pearl_simpsons_machine_lvl1]": 0.001530625000668806,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[pearl_simpsons_paradox_1c]": 0.0008735680003155721,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[pearl_simpsons_paradox_1d]": 0.0008980869988590712,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[pearl_simpsons_paradox_2a]": 0.0006939380018593511,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[pearl_simpsons_paradox_2b]": 0.0008037369989324361,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[pearl_simpsons_paradox_2b_L_observed]": 0.0010643670011631912,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[simple_no_confounder_graph]": 0.0007463379988621455,
+    "tests/causal_identifiers/test_backdoor_identifier.py::TestBackdoorIdentification::test_identify_backdoor_unobserved_not_in_backdoor_set[simple_selection_bias_graph]": 0.0007493590019294061,
+    "tests/causal_identifiers/test_efficient_backdoor_identifier.py::test_fail_multivar_outcome_efficient_backdoor_algorithms": 0.0008672480016684858,
+    "tests/causal_identifiers/test_efficient_backdoor_identifier.py::test_fail_multivar_treat_efficient_backdoor_algorithms": 0.0008551280006940942,
+    "tests/causal_identifiers/test_efficient_backdoor_identifier.py::test_fail_negative_costs_efficient_backdoor_algorithms": 0.0010978469999827212,
+    "tests/causal_identifiers/test_efficient_backdoor_identifier.py::test_fail_unobserved_cond_vars_efficient_backdoor_algorithms": 0.0009948770002665697,
+    "tests/causal_identifiers/test_efficient_backdoor_identifier.py::test_identify_efficient_backdoor_algorithms": 0.6932751590011321,
+    "tests/causal_identifiers/test_id_identifier.py::TestIDIdentifier::test_1": 0.002907263000452076,
+    "tests/causal_identifiers/test_id_identifier.py::TestIDIdentifier::test_2": 0.001736685999276233,
+    "tests/causal_identifiers/test_id_identifier.py::TestIDIdentifier::test_3": 0.002051943001788459,
+    "tests/causal_identifiers/test_id_identifier.py::TestIDIdentifier::test_4": 0.00194300499970268,
+    "tests/causal_identifiers/test_id_identifier.py::TestIDIdentifier::test_5": 0.001570145002915524,
+    "tests/causal_identifiers/test_id_identifier.py::TestIDIdentifier::test_6": 0.0012518270032160217,
+    "tests/causal_identifiers/test_optimize_backdoor.py::TestOptimizeBackdoorIdentifier::test_1": 0.001383537000947399,
+    "tests/causal_identifiers/test_optimize_backdoor.py::TestOptimizeBackdoorIdentifier::test_2": 0.0012244170011399547,
+    "tests/causal_identifiers/test_optimize_backdoor.py::TestOptimizeBackdoorIdentifier::test_3": 0.0015989059993444243,
+    "tests/causal_identifiers/test_optimize_backdoor.py::TestOptimizeBackdoorIdentifier::test_4": 0.0012727270004688762,
+    "tests/causal_identifiers/test_optimize_backdoor.py::TestOptimizeBackdoorIdentifier::test_5": 0.00143652699807717,
+    "tests/causal_identifiers/test_optimize_backdoor.py::TestOptimizeBackdoorIdentifier::test_6": 0.00155684499986819,
+    "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_evalue": 0.038604204999501235,
+    "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_evalue_linear_regression[backdoor.linear_regression]": 0.08990149099918199,
+    "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_evalue_logistic_regression[backdoor.generalized_linear_model]": 0.20605873000022257,
+    "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_linear_sensitivity_dataset_without_confounders[backdoor.linear_regression-effect_fraction_on_treatment0-benchmark_common_causes0-linear-partial-R2-0.95]": 3.2043318770010956,
     "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_linear_sensitivity_dataset_without_confounders[backdoor.linear_regression-effect_strength_on_t0-benchmark_common_causes0-linear-partial-R2-0.95]": 2.850189850996685,
+    "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_linear_sensitivity_given_strength_of_confounding[backdoor.linear_regression-effect_fraction_on_treatment0-benchmark_common_causes0-linear-partial-R2]": 3.153957377999177,
     "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_linear_sensitivity_given_strength_of_confounding[backdoor.linear_regression-effect_strength_on_t0-benchmark_common_causes0-linear-partial-R2]": 2.7763720100010687,
+    "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_linear_sensitivity_with_confounders[backdoor.linear_regression-effect_fraction_on_treatment0-benchmark_common_causes0-linear-partial-R2]": 3.195294589002515,
     "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_linear_sensitivity_with_confounders[backdoor.linear_regression-effect_strength_on_t0-benchmark_common_causes0-linear-partial-R2]": 2.8144416110008024,
-    "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_refutation_binary_treatment[0.01-backdoor.propensity_score_matching-0.01-0.02]": 43.11950718300068,
-    "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_refutation_continuous_treatment[0.01-iv.instrumental_variable-0.01-0.02]": 0.0784161240044341,
-    "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_refutation_continuous_treatment_range_both_treatment_outcome[0.01-iv.instrumental_variable-effect_strength_on_t0-effect_strength_on_y0]": 1.1829802219981502,
-    "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_refutation_continuous_treatment_range_outcome[0.01-iv.instrumental_variable-0.01-effect_strength_on_y0]": 0.18137515900525614,
-    "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_refutation_continuous_treatment_range_treatment[0.01-iv.instrumental_variable-effect_strength_on_t0-0.02]": 0.17905196800347767,
-    "tests/causal_refuters/test_bootstrap_refuter.py::TestDataSubsetRefuter::test_refutation_bootstrap_refuter_binary[0.05-backdoor.propensity_score_matching-1000]": 0.8577377189976687,
-    "tests/causal_refuters/test_bootstrap_refuter.py::TestDataSubsetRefuter::test_refutation_bootstrap_refuter_binary_integer_argument[0.1-backdoor.propensity_score_matching-5-3-5000]": 4.554001046999474,
-    "tests/causal_refuters/test_bootstrap_refuter.py::TestDataSubsetRefuter::test_refutation_bootstrap_refuter_binary_list_argument[0.1-backdoor.propensity_score_matching-5-required_variables0-5000]": 4.412829904998944,
-    "tests/causal_refuters/test_bootstrap_refuter.py::TestDataSubsetRefuter::test_refutation_bootstrap_refuter_binary_list_negative_argument[0.1-backdoor.propensity_score_matching-5-required_variables0-5000]": 4.431730446001893,
-    "tests/causal_refuters/test_bootstrap_refuter.py::TestDataSubsetRefuter::test_refutation_bootstrap_refuter_continuous[0.05-iv.instrumental_variable-1000]": 0.3983250860037515,
-    "tests/causal_refuters/test_bootstrap_refuter.py::TestDataSubsetRefuter::test_refutation_bootstrap_refuter_continuous_integer_argument[0.05-iv.instrumental_variable-5-3-1000]": 0.3220115190015349,
-    "tests/causal_refuters/test_bootstrap_refuter.py::TestDataSubsetRefuter::test_refutation_bootstrap_refuter_continuous_list_argument[0.05-iv.instrumental_variable-5-required_variables0-1000]": 0.3215620599949034,
-    "tests/causal_refuters/test_bootstrap_refuter.py::TestDataSubsetRefuter::test_refutation_bootstrap_refuter_continuous_list_negative_argument[0.1-iv.instrumental_variable-5-required_variables0-5000]": 0.5046643770001538,
-    "tests/causal_refuters/test_data_subset_refuter.py::TestDataSubsetRefuter::test_refutation_data_subset_refuter_binary[0.01-backdoor.propensity_score_matching]": 68.55800845900012,
-    "tests/causal_refuters/test_data_subset_refuter.py::TestDataSubsetRefuter::test_refutation_data_subset_refuter_continuous[0.01-iv.instrumental_variable]": 1.498870042996714,
-    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_custom_function_linear_regression_with_noise_binary_treatment[0.2-backdoor.linear_regression-transformations0-1000]": 0.07436634000259801,
-    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_custom_function_linear_regression_with_noise_continuous_treatment[0.2-iv.instrumental_variable-transformations0]": 0.3916881670047587,
-    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_custom_function_linear_regression_with_permute_binary_treatment[0.2-backdoor.propensity_score_matching-transformations0-1000]": 0.49299202900147066,
-    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_custom_function_linear_regression_with_permute_continuous_treatment[0.2-backdoor.linear_regression-transformations0]": 0.9126691839992418,
-    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_default_binary_treatment[0.1-backdoor.propensity_score_matching-1000]": 0.4539793549993192,
-    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_default_continuous_treatment[0.03-iv.instrumental_variable]": 2.1723163930000737,
-    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_internal_knn_binary_treatment[0.2-backdoor.propensity_score_matching-transformations0-1000]": 0.46351385800517164,
-    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_internal_knn_continuous_treatment[0.2-iv.instrumental_variable-transformations0]": 0.8979279530030908,
-    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_internal_linear_regression_binary_treatment[0.2-backdoor.propensity_score_matching-transformations0-1000]": 0.47614499700284796,
-    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_internal_linear_regression_continuous_treatment[0.01-iv.instrumental_variable-transformations0]": 0.6039230140013387,
-    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_internal_neural_network_binary_treatment[0.1-backdoor.propensity_score_matching-transformations0-1000]": 0.5101833099979558,
-    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_internal_neural_network_continuous_treatment[0.01-iv.instrumental_variable-transformations0]": 2.9238356690002547,
-    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_internal_random_forest_binary_treatment[0.1-backdoor.propensity_score_matching-transformations0-1000]": 0.7357043550000526,
-    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_internal_random_forest_continuous_treatment[0.01-iv.instrumental_variable-transformations0-10000]": 0.8158579579976504,
-    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_internal_svm_binary_treatment[0.1-backdoor.propensity_score_matching-transformations0-1000]": 0.4558805480010051,
-    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_internal_svm_continuous_treatment[0.01-iv.instrumental_variable-transformations0-10000]": 0.9708759800014377,
-    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_permute_data_binary_treatment[0.1-backdoor.linear_regression-transformations0-1000]": 0.048296427994500846,
-    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_permute_data_continuous_treatment[0.03-iv.instrumental_variable-transformations0]": 0.1175915969943162,
-    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_randomly_generated_binary_treatment[0.05-backdoor.propensity_score_matching-transformations0-1000]": 0.4404820630006725,
-    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_randomly_generated_continuous_treatment[0.05-iv.instrumental_variable-transformations0]": 0.11266090500066639,
-    "tests/causal_refuters/test_placebo_refuter.py::TestPlaceboRefuter::test_refutation_placebo_refuter_binary[0.1-backdoor.propensity_score_matching-5000]": 7.487174318997859,
-    "tests/causal_refuters/test_placebo_refuter.py::TestPlaceboRefuter::test_refutation_placebo_refuter_category[0.1-backdoor.linear_regression-5000]": 1.8801709030012717,
-    "tests/causal_refuters/test_placebo_refuter.py::TestPlaceboRefuter::test_refutation_placebo_refuter_category_non_consecutive_index[0.1-backdoor.linear_regression-5000]": 0.4805744439981936,
-    "tests/causal_refuters/test_placebo_refuter.py::TestPlaceboRefuter::test_refutation_placebo_refuter_continuous[0.03-backdoor.linear_regression-1000]": 1.4750072509996244,
-    "tests/data_transformers/test_pca_reducer.py::TestPCAReducer::test_reduce": 0.002902270993217826,
-    "tests/do_sampler/test_pandas_do_api.py::TestPandasDoAPI::test_pandas_api_continuous_cause_continuous_confounder[10000-0.1]": 3.5036639429999923,
-    "tests/do_sampler/test_pandas_do_api.py::TestPandasDoAPI::test_pandas_api_continuous_cause_discrete_confounder[10000-0.1]": 27.219203676999314,
-    "tests/do_sampler/test_pandas_do_api.py::TestPandasDoAPI::test_pandas_api_discrete_cause_continuous_confounder[10000-0.1]": 0.09374719999686931,
-    "tests/do_sampler/test_pandas_do_api.py::TestPandasDoAPI::test_pandas_api_discrete_cause_discrete_confounder[10000-0.1]": 0.2661919879974448,
-    "tests/do_sampler/test_pandas_do_api.py::TestPandasDoAPI::test_pandas_api_with_dummy_data[1-variable_types0]": 0.011210563003260177,
-    "tests/do_sampler/test_pandas_do_api.py::TestPandasDoAPI::test_pandas_api_with_full_specification_of_type[10000-variable_types0]": 0.026649983003153466,
-    "tests/do_sampler/test_pandas_do_api.py::TestPandasDoAPI::test_pandas_api_with_no_specification_of_type[10000-variable_types0]": 0.023502381998696364,
-    "tests/do_sampler/test_pandas_do_api.py::TestPandasDoAPI::test_pandas_api_with_partial_specification_of_type[10000-variable_types0]": 0.02382615099850227,
-    "tests/gcm/independence_test/test_kernel.py::test_given_categorical_conditionally_dependent_data_when_perform_approx_kernel_based_test_then_reject": 0.29915133899703505,
-    "tests/gcm/independence_test/test_kernel.py::test_given_categorical_conditionally_dependent_data_when_perform_kernel_based_test_then_reject": 1.1452262070015422,
-    "tests/gcm/independence_test/test_kernel.py::test_given_categorical_conditionally_independent_data_when_perform_approx_kernel_based_test_then_not_reject": 0.27699546300209477,
-    "tests/gcm/independence_test/test_kernel.py::test_given_categorical_conditionally_independent_data_when_perform_kernel_based_test_then_not_reject": 1.538889437000762,
-    "tests/gcm/independence_test/test_kernel.py::test_given_categorical_dependent_data_when_perform_approx_kernel_based_test_then_reject": 0.2287549439970462,
-    "tests/gcm/independence_test/test_kernel.py::test_given_categorical_dependent_data_when_perform_kernel_based_test_then_reject": 0.08614612100063823,
-    "tests/gcm/independence_test/test_kernel.py::test_given_categorical_independent_data_when_perform_approx_kernel_based_test_then_not_reject": 0.20994599200275843,
-    "tests/gcm/independence_test/test_kernel.py::test_given_categorical_independent_data_when_perform_kernel_based_test_then_not_reject": 0.08014346700292663,
-    "tests/gcm/independence_test/test_kernel.py::test_given_constant_data_when_perform_approx_kernel_based_test_then_returns_expected_result": 0.08669152299989946,
-    "tests/gcm/independence_test/test_kernel.py::test_given_constant_data_when_perform_kernel_based_test_then_returns_expected_result": 0.002242487000330584,
-    "tests/gcm/independence_test/test_kernel.py::test_given_constant_inputs_when_perform_kernel_based_test_then_returns_non_nan_value": 0.0007114549953257665,
-    "tests/gcm/independence_test/test_kernel.py::test_given_continuous_conditionally_dependent_data_when_perform_approx_kernel_based_test_then_reject": 0.26513264200184494,
-    "tests/gcm/independence_test/test_kernel.py::test_given_continuous_conditionally_dependent_data_when_perform_kernel_based_test_then_reject": 1.8727134019973164,
-    "tests/gcm/independence_test/test_kernel.py::test_given_continuous_conditionally_independent_data_when_perform_approx_kernel_based_test_then_not_reject": 0.2970279729925096,
-    "tests/gcm/independence_test/test_kernel.py::test_given_continuous_conditionally_independent_data_when_perform_kernel_based_test_then_not_reject": 1.007309553995583,
-    "tests/gcm/independence_test/test_kernel.py::test_given_continuous_dependent_data_when_perform_approx_kernel_based_test_then_reject": 0.22697504900133936,
-    "tests/gcm/independence_test/test_kernel.py::test_given_continuous_dependent_data_when_perform_kernel_based_test_then_reject": 0.07577386800039676,
-    "tests/gcm/independence_test/test_kernel.py::test_given_continuous_independent_data_when_perform_approx_kernel_based_test_then_not_reject": 0.21268876300018746,
-    "tests/gcm/independence_test/test_kernel.py::test_given_continuous_independent_data_when_perform_kernel_based_test_then_not_reject": 0.058165056001598714,
-    "tests/gcm/independence_test/test_kernel.py::test_given_random_seed_when_perform_conditional_approx_kernel_based_test_then_return_deterministic_result": 0.1216843800029892,
-    "tests/gcm/independence_test/test_kernel.py::test_given_random_seed_when_perform_conditional_kernel_based_test_then_return_deterministic_result": 0.03439881699887337,
-    "tests/gcm/independence_test/test_kernel.py::test_given_random_seed_when_perform_pairwise_approx_kernel_based_test_then_return_deterministic_result": 0.07384701699629659,
-    "tests/gcm/independence_test/test_kernel.py::test_given_random_seed_when_perform_pairwise_kernel_based_test_then_return_deterministic_result": 0.031348867003544,
+    "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_non_parametric_sensitivity_given_strength_of_confounding[backdoor.econml.dml.KernelDML-2-benchmark_common_causes0-non-parametric-partial-R2]": 11.274044722000326,
+    "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_partially_linear_sensitivity_given_strength_of_confounding[backdoor.econml.dml.LinearDML-2-benchmark_common_causes0-non-parametric-partial-R2]": 2.1124762050003483,
+    "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_refutation_binary_treatment[0.01-backdoor.propensity_score_matching-0.01-0.02]": 44.626613692998944,
+    "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_refutation_continuous_treatment[0.01-iv.instrumental_variable-0.01-0.02]": 0.08056703300098889,
+    "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_refutation_continuous_treatment_range_both_treatment_outcome[0.01-iv.instrumental_variable-effect_strength_on_t0-effect_strength_on_y0]": 1.3013563799995609,
+    "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_refutation_continuous_treatment_range_outcome[0.01-iv.instrumental_variable-0.01-effect_strength_on_y0]": 0.2020682439997472,
+    "tests/causal_refuters/test_add_unobserved_common_cause.py::TestAddUnobservedCommonCauseRefuter::test_refutation_continuous_treatment_range_treatment[0.01-iv.instrumental_variable-effect_strength_on_t0-0.02]": 0.20793321400014975,
+    "tests/causal_refuters/test_bootstrap_refuter.py::TestDataSubsetRefuter::test_refutation_bootstrap_refuter_binary[0.05-backdoor.propensity_score_matching-1000]": 0.8476695269982883,
+    "tests/causal_refuters/test_bootstrap_refuter.py::TestDataSubsetRefuter::test_refutation_bootstrap_refuter_binary_integer_argument[0.1-backdoor.propensity_score_matching-5-3-5000]": 4.607819500000915,
+    "tests/causal_refuters/test_bootstrap_refuter.py::TestDataSubsetRefuter::test_refutation_bootstrap_refuter_binary_list_argument[0.1-backdoor.propensity_score_matching-5-required_variables0-5000]": 4.454197867000403,
+    "tests/causal_refuters/test_bootstrap_refuter.py::TestDataSubsetRefuter::test_refutation_bootstrap_refuter_binary_list_negative_argument[0.1-backdoor.propensity_score_matching-5-required_variables0-5000]": 4.471698694002043,
+    "tests/causal_refuters/test_bootstrap_refuter.py::TestDataSubsetRefuter::test_refutation_bootstrap_refuter_continuous[0.05-iv.instrumental_variable-1000]": 0.4846738450014527,
+    "tests/causal_refuters/test_bootstrap_refuter.py::TestDataSubsetRefuter::test_refutation_bootstrap_refuter_continuous_integer_argument[0.05-iv.instrumental_variable-5-3-1000]": 0.4462375079965568,
+    "tests/causal_refuters/test_bootstrap_refuter.py::TestDataSubsetRefuter::test_refutation_bootstrap_refuter_continuous_list_argument[0.05-iv.instrumental_variable-5-required_variables0-1000]": 0.40095900500091375,
+    "tests/causal_refuters/test_bootstrap_refuter.py::TestDataSubsetRefuter::test_refutation_bootstrap_refuter_continuous_list_negative_argument[0.1-iv.instrumental_variable-5-required_variables0-5000]": 0.5441861099989183,
+    "tests/causal_refuters/test_data_subset_refuter.py::TestDataSubsetRefuter::test_refutation_data_subset_refuter_binary[0.01-backdoor.propensity_score_matching]": 68.9505145080002,
+    "tests/causal_refuters/test_data_subset_refuter.py::TestDataSubsetRefuter::test_refutation_data_subset_refuter_continuous[0.01-iv.instrumental_variable]": 1.811442590000297,
+    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_custom_function_linear_regression_with_noise_binary_treatment[0.2-backdoor.linear_regression-transformations0-1000]": 0.08030410499850404,
+    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_custom_function_linear_regression_with_noise_continuous_treatment[0.2-iv.instrumental_variable-transformations0]": 0.4127799799989589,
+    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_custom_function_linear_regression_with_permute_binary_treatment[0.2-backdoor.propensity_score_matching-transformations0-1000]": 0.47684506999758014,
+    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_custom_function_linear_regression_with_permute_continuous_treatment[0.2-backdoor.linear_regression-transformations0]": 0.9885844689979422,
+    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_default_binary_treatment[0.1-backdoor.propensity_score_matching-1000]": 0.42835930500041286,
+    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_default_continuous_treatment[0.03-iv.instrumental_variable]": 2.288028838000173,
+    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_internal_knn_binary_treatment[0.2-backdoor.propensity_score_matching-transformations0-1000]": 0.4866959130013129,
+    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_internal_knn_continuous_treatment[0.2-iv.instrumental_variable-transformations0]": 0.9384641270007705,
+    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_internal_linear_regression_binary_treatment[0.2-backdoor.propensity_score_matching-transformations0-1000]": 0.5279801420001604,
+    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_internal_linear_regression_continuous_treatment[0.01-iv.instrumental_variable-transformations0]": 0.7205875620002189,
+    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_internal_neural_network_binary_treatment[0.1-backdoor.propensity_score_matching-transformations0-1000]": 0.5486997209991387,
+    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_internal_neural_network_continuous_treatment[0.01-iv.instrumental_variable-transformations0]": 5.1585782559996005,
+    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_internal_random_forest_binary_treatment[0.1-backdoor.propensity_score_matching-transformations0-1000]": 0.732061638002051,
+    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_internal_random_forest_continuous_treatment[0.01-iv.instrumental_variable-transformations0-10000]": 0.8679392299982283,
+    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_internal_svm_binary_treatment[0.1-backdoor.propensity_score_matching-transformations0-1000]": 0.46700796699769853,
+    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_internal_svm_continuous_treatment[0.01-iv.instrumental_variable-transformations0-10000]": 1.0434644179986208,
+    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_permute_data_binary_treatment[0.1-backdoor.linear_regression-transformations0-1000]": 0.056201067001893534,
+    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_permute_data_continuous_treatment[0.03-iv.instrumental_variable-transformations0]": 0.1261690240007738,
+    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_randomly_generated_binary_treatment[0.05-backdoor.propensity_score_matching-transformations0-1000]": 0.42547475500396104,
+    "tests/causal_refuters/test_dummy_outcome_refuter.py::TestDummyOutcomeRefuter::test_refutation_dummy_outcome_refuter_randomly_generated_continuous_treatment[0.05-iv.instrumental_variable-transformations0]": 0.11543983599767671,
+    "tests/causal_refuters/test_placebo_refuter.py::TestPlaceboRefuter::test_refutation_placebo_refuter_binary[0.1-backdoor.propensity_score_matching-5000]": 7.3666792690019065,
+    "tests/causal_refuters/test_placebo_refuter.py::TestPlaceboRefuter::test_refutation_placebo_refuter_category[0.1-backdoor.linear_regression-5000]": 2.8750521050023963,
+    "tests/causal_refuters/test_placebo_refuter.py::TestPlaceboRefuter::test_refutation_placebo_refuter_category_non_consecutive_index[0.1-backdoor.linear_regression-5000]": 0.47494545400149946,
+    "tests/causal_refuters/test_placebo_refuter.py::TestPlaceboRefuter::test_refutation_placebo_refuter_continuous[0.03-backdoor.linear_regression-1000]": 1.8235527470005763,
+    "tests/data_transformers/test_pca_reducer.py::TestPCAReducer::test_reduce": 0.002960286001325585,
+    "tests/do_sampler/test_pandas_do_api.py::TestPandasDoAPI::test_pandas_api_continuous_cause_continuous_confounder[10000-0.1]": 3.5447984439997526,
+    "tests/do_sampler/test_pandas_do_api.py::TestPandasDoAPI::test_pandas_api_continuous_cause_discrete_confounder[10000-0.1]": 27.0796278050002,
+    "tests/do_sampler/test_pandas_do_api.py::TestPandasDoAPI::test_pandas_api_discrete_cause_continuous_confounder[10000-0.1]": 0.09848521200001414,
+    "tests/do_sampler/test_pandas_do_api.py::TestPandasDoAPI::test_pandas_api_discrete_cause_discrete_confounder[10000-0.1]": 0.5648940689970914,
+    "tests/do_sampler/test_pandas_do_api.py::TestPandasDoAPI::test_pandas_api_with_dummy_data[1-variable_types0]": 0.012208598000142956,
+    "tests/do_sampler/test_pandas_do_api.py::TestPandasDoAPI::test_pandas_api_with_full_specification_of_type[10000-variable_types0]": 0.02463898299902212,
+    "tests/do_sampler/test_pandas_do_api.py::TestPandasDoAPI::test_pandas_api_with_no_specification_of_type[10000-variable_types0]": 0.024161270002878155,
+    "tests/do_sampler/test_pandas_do_api.py::TestPandasDoAPI::test_pandas_api_with_partial_specification_of_type[10000-variable_types0]": 0.024245782999059884,
+    "tests/gcm/independence_test/test_kernel.py::test_given_categorical_conditionally_dependent_data_when_perform_approx_kernel_based_test_then_reject": 0.2696055870019336,
+    "tests/gcm/independence_test/test_kernel.py::test_given_categorical_conditionally_dependent_data_when_perform_kernel_based_test_then_reject": 1.410417672001131,
+    "tests/gcm/independence_test/test_kernel.py::test_given_categorical_conditionally_independent_data_when_perform_approx_kernel_based_test_then_not_reject": 0.25542592800047714,
+    "tests/gcm/independence_test/test_kernel.py::test_given_categorical_conditionally_independent_data_when_perform_kernel_based_test_then_not_reject": 1.27113092700165,
+    "tests/gcm/independence_test/test_kernel.py::test_given_categorical_dependent_data_when_perform_approx_kernel_based_test_then_reject": 0.22055077500044717,
+    "tests/gcm/independence_test/test_kernel.py::test_given_categorical_dependent_data_when_perform_kernel_based_test_then_reject": 0.03890899799989711,
+    "tests/gcm/independence_test/test_kernel.py::test_given_categorical_independent_data_when_perform_approx_kernel_based_test_then_not_reject": 0.20551914299903729,
+    "tests/gcm/independence_test/test_kernel.py::test_given_categorical_independent_data_when_perform_kernel_based_test_then_not_reject": 0.03948311400017701,
+    "tests/gcm/independence_test/test_kernel.py::test_given_constant_data_when_perform_approx_kernel_based_test_then_returns_expected_result": 0.03382713999963016,
+    "tests/gcm/independence_test/test_kernel.py::test_given_constant_data_when_perform_kernel_based_test_then_returns_expected_result": 0.0017116209983214503,
+    "tests/gcm/independence_test/test_kernel.py::test_given_constant_inputs_when_perform_kernel_based_test_then_returns_non_nan_value": 0.0004904510005871998,
+    "tests/gcm/independence_test/test_kernel.py::test_given_continuous_conditionally_dependent_data_when_perform_approx_kernel_based_test_then_reject": 0.269351683997229,
+    "tests/gcm/independence_test/test_kernel.py::test_given_continuous_conditionally_dependent_data_when_perform_kernel_based_test_then_reject": 1.4925917610016768,
+    "tests/gcm/independence_test/test_kernel.py::test_given_continuous_conditionally_independent_data_when_perform_approx_kernel_based_test_then_not_reject": 0.20362845199997537,
+    "tests/gcm/independence_test/test_kernel.py::test_given_continuous_conditionally_independent_data_when_perform_kernel_based_test_then_not_reject": 1.9381592869995075,
+    "tests/gcm/independence_test/test_kernel.py::test_given_continuous_dependent_data_when_perform_approx_kernel_based_test_then_reject": 0.20551107300161675,
+    "tests/gcm/independence_test/test_kernel.py::test_given_continuous_dependent_data_when_perform_kernel_based_test_then_reject": 0.04344868600128393,
+    "tests/gcm/independence_test/test_kernel.py::test_given_continuous_independent_data_when_perform_approx_kernel_based_test_then_not_reject": 0.20429461199819343,
+    "tests/gcm/independence_test/test_kernel.py::test_given_continuous_independent_data_when_perform_kernel_based_test_then_not_reject": 0.06942105699999956,
+    "tests/gcm/independence_test/test_kernel.py::test_given_random_seed_when_perform_conditional_approx_kernel_based_test_then_return_deterministic_result": 0.11785191600029066,
+    "tests/gcm/independence_test/test_kernel.py::test_given_random_seed_when_perform_conditional_kernel_based_test_then_return_deterministic_result": 0.01995912500024133,
+    "tests/gcm/independence_test/test_kernel.py::test_given_random_seed_when_perform_pairwise_approx_kernel_based_test_then_return_deterministic_result": 0.09124762999999803,
+    "tests/gcm/independence_test/test_kernel.py::test_given_random_seed_when_perform_pairwise_kernel_based_test_then_return_deterministic_result": 0.010564567000983516,
     "tests/gcm/independence_test/test_kernel.py::test_given_too_few_samples_when_perform_kernel_based_test_then_raise_error": 0.0005369960053940304,
-    "tests/gcm/independence_test/test_kernel.py::test_given_weak_dependency_when_perform_kernel_based_test_then_returns_expected_result": 6.355862268999772,
+    "tests/gcm/independence_test/test_kernel.py::test_given_weak_dependency_when_perform_kernel_based_test_then_returns_expected_result": 1.3359811050013377,
     "tests/gcm/independence_test/test_kernel.py::test_when_using_fast_centering_then_gives_expected_results": 0.05734733299686923,
-    "tests/gcm/independence_test/test_regression.py::test_regression_based_conditional_independence_parallelization": 0.8040251249985886,
-    "tests/gcm/independence_test/test_regression.py::test_regression_based_conditional_independence_test_categorical_dependent": 0.20517464200020186,
-    "tests/gcm/independence_test/test_regression.py::test_regression_based_conditional_independence_test_categorical_independent": 0.2127767899983155,
-    "tests/gcm/independence_test/test_regression.py::test_regression_based_conditional_independence_test_dependent": 0.3155361110002559,
-    "tests/gcm/independence_test/test_regression.py::test_regression_based_conditional_independence_test_independent": 0.1960459349975281,
-    "tests/gcm/independence_test/test_regression.py::test_regression_based_pairwise_independence_test_categorical_dependent": 0.2196950100005779,
-    "tests/gcm/independence_test/test_regression.py::test_regression_based_pairwise_independence_test_categorical_independent": 0.20847917899664026,
-    "tests/gcm/independence_test/test_regression.py::test_regression_based_pairwise_independence_test_dependent": 0.24657917199874646,
-    "tests/gcm/independence_test/test_regression.py::test_regression_based_pairwise_independence_test_independent": 0.24525637500119046,
-    "tests/gcm/test_anomaly.py::test_given_data_with_change_in_mechanism_when_estimate_distribution_change_score_then_returns_expected_result": 17.271205247998296,
-    "tests/gcm/test_anomaly.py::test_given_data_with_change_in_mechanism_when_estimate_distribution_change_score_using_difference_in_means_then_returns_expected_result": 18.573216941000283,
-    "tests/gcm/test_anomaly.py::test_given_data_with_change_in_mechanism_when_estimate_distribution_change_score_using_difference_in_variance_then_returns_expected_result": 30.12939289699716,
-    "tests/gcm/test_anomaly.py::test_given_data_with_change_in_root_node_when_estimate_distribution_change_score_using_difference_in_means_then_returns_expected_result": 17.02896168499865,
-    "tests/gcm/test_anomaly.py::test_given_graph_with_multiple_parents_when_estimate_distribution_change_scores_then_returns_expected_result": 6.83073560000048,
-    "tests/gcm/test_anomaly.py::test_given_multivariate_inputs_when_estimate_anomaly_scores_then_does_not_raise_error": 0.8354802380017645,
-    "tests/gcm/test_anomaly.py::test_given_outlier_observation_when_estimate_anomaly_scores_using_inverse_density_scorer_then_returns_expected_result": 14.157676802999049,
-    "tests/gcm/test_anomaly.py::test_given_outlier_observation_when_estimate_anomaly_scores_using_mean_deviation_scorer_then_returns_expected_result": 1.2811969299982593,
-    "tests/gcm/test_anomaly.py::test_given_outlier_observation_when_estimate_anomaly_scores_using_median_cdf_quantile_scorer_then_returns_expected_result": 1.3741463899968949,
-    "tests/gcm/test_anomaly.py::test_given_outlier_observation_when_estimate_anomaly_scores_using_median_deviation_scorer_then_returns_expected_result": 1.609633436997683,
-    "tests/gcm/test_anomaly.py::test_given_outlier_observation_when_estimate_anomaly_scores_using_rescaled_median_cdf_quantile_scorer_then_returns_expected_result": 1.4401765190013975,
-    "tests/gcm/test_anomaly.py::test_given_simple_linear_data_when_estimate_conditional_anomaly_scores_then_returns_expected_result": 0.0027797249967989046,
-    "tests/gcm/test_anomaly_attribution.py::test_given_non_trivial_graph_with_nonlinear_relationships_when_attribute_anomaly_scores_with_it_score_then_returns_qualitatively_correct_results": 2.0995102369997767,
-    "tests/gcm/test_anomaly_attribution.py::test_given_simple_causal_chain_with_linear_relationships_when_attribute_anomaly_scores_with_feature_relevance_then_returns_qualitatively_correct_results": 2.178279106003174,
-    "tests/gcm/test_anomaly_attribution.py::test_given_simple_causal_chain_with_linear_relationships_when_attribute_anomaly_scores_with_it_score_then_returns_qualitatively_correct_results": 1.3042352470038168,
-    "tests/gcm/test_anomaly_attribution.py::test_given_simple_gaussian_data_when_attribute_anomaly_scores_with_feature_relevance_then_returns_qualitatively_correct_results": 0.5284207230033644,
-    "tests/gcm/test_anomaly_attribution.py::test_given_simple_gaussian_data_when_attribute_anomaly_scores_with_it_score_then_returns_qualitatively_correct_results": 6.348568976001843,
-    "tests/gcm/test_anomaly_attribution.py::test_relative_frequency": 0.0002936099990620278,
-    "tests/gcm/test_anomaly_scorers.py::test_given_simple_toy_data_when_using_MedianCDFQuantileScorer_then_returns_expected_scores": 0.000498973997309804,
-    "tests/gcm/test_anomaly_scorers.py::test_given_simple_toy_data_when_using_MedianDeviationScorer_then_returns_expected_scores": 0.000577939998038346,
-    "tests/gcm/test_arrow_strength.py::test_given_categorical_target_node_when_estimate_arrow_strength_of_model_classifier_then_returns_expected_result": 1.073614480003016,
-    "tests/gcm/test_arrow_strength.py::test_given_continuous_data_with_default_attribution_func_when_estimate_arrow_strength_then_returns_expected_results": 0.10413893800432561,
-    "tests/gcm/test_arrow_strength.py::test_given_gcm_with_misspecified_mechanism_when_evaluate_arrow_strength_with__observational_data_then_gives_expected_results": 0.059819231002620654,
-    "tests/gcm/test_arrow_strength.py::test_given_kl_divergence_attribution_func_when_estimate_arrow_strength_then_returns_expected_results": 4.1209102970024105,
-    "tests/gcm/test_auto.py::test_given_linear_classification_problem_when_auto_assign_causal_models_with_better_quality_returns_linear_model": 0.5792933610027831,
-    "tests/gcm/test_auto.py::test_given_linear_classification_problem_when_auto_assign_causal_models_with_good_quality_returns_linear_model": 0.2876891730047646,
-    "tests/gcm/test_auto.py::test_given_linear_regression_problem_when_auto_assign_causal_models_with_better_quality_returns_linear_model": 0.8629348760005087,
-    "tests/gcm/test_auto.py::test_given_linear_regression_problem_when_auto_assign_causal_models_with_good_quality_returns_linear_model": 0.31510659000559826,
-    "tests/gcm/test_auto.py::test_given_non_linear_classification_problem_when_auto_assign_causal_models_with_better_quality_returns_non_linear_model": 0.5879168029969151,
-    "tests/gcm/test_auto.py::test_given_non_linear_classification_problem_when_auto_assign_causal_models_with_good_quality_returns_non_linear_model": 0.2784427689985023,
-    "tests/gcm/test_auto.py::test_given_non_linear_regression_problem_when_auto_assign_causal_models_with_better_quality_returns_non_linear_model": 1.1180348099951516,
-    "tests/gcm/test_auto.py::test_given_non_linear_regression_problem_when_auto_assign_causal_models_with_good_quality_returns_non_linear_model": 0.7233570249991317,
-    "tests/gcm/test_auto.py::test_when_auto_called_from_main_namespace_returns_no_attribute_error": 0.000274299003649503,
-    "tests/gcm/test_confidence_intervals.py::test_given_simple_counter_as_estimation_func_when_confidence_interval_then_returns_mean_and_interval_counter": 0.0018954930019390304,
-    "tests/gcm/test_confidence_intervals_cms.py::test_given_causal_graph_based_estimation_func_when_confidence_interval_then_can_use_bootstrap_training_and_sampling": 0.6973980469956587,
-    "tests/gcm/test_confidence_intervals_cms.py::test_given_parameterized_estimation_func_when_confidence_interval_then_can_use_bootstrap_sampling_to_bind_parameters": 0.0017940140023711137,
+    "tests/gcm/independence_test/test_regression.py::test_regression_based_conditional_independence_parallelization": 0.3592468739989272,
+    "tests/gcm/independence_test/test_regression.py::test_regression_based_conditional_independence_test_categorical_dependent": 0.26605106299939507,
+    "tests/gcm/independence_test/test_regression.py::test_regression_based_conditional_independence_test_categorical_independent": 0.31590158400103974,
+    "tests/gcm/independence_test/test_regression.py::test_regression_based_conditional_independence_test_dependent": 0.3428450259980309,
+    "tests/gcm/independence_test/test_regression.py::test_regression_based_conditional_independence_test_independent": 0.16104197300046508,
+    "tests/gcm/independence_test/test_regression.py::test_regression_based_pairwise_independence_test_categorical_dependent": 0.04541199199957191,
+    "tests/gcm/independence_test/test_regression.py::test_regression_based_pairwise_independence_test_categorical_independent": 0.10463475799951993,
+    "tests/gcm/independence_test/test_regression.py::test_regression_based_pairwise_independence_test_dependent": 0.10435450899785792,
+    "tests/gcm/independence_test/test_regression.py::test_regression_based_pairwise_independence_test_independent": 0.14417282299837098,
+    "tests/gcm/ml/test_regression.py::test_given_product_regressor_then_computes_correct_values": 0.001283600999158807,
+    "tests/gcm/test_anomaly.py::test_given_data_with_change_in_mechanism_when_estimate_distribution_change_score_then_returns_expected_result": 29.807445410000582,
+    "tests/gcm/test_anomaly.py::test_given_data_with_change_in_mechanism_when_estimate_distribution_change_score_using_difference_in_means_then_returns_expected_result": 25.35173482800019,
+    "tests/gcm/test_anomaly.py::test_given_data_with_change_in_mechanism_when_estimate_distribution_change_score_using_difference_in_variance_then_returns_expected_result": 34.873429462000786,
+    "tests/gcm/test_anomaly.py::test_given_data_with_change_in_root_node_when_estimate_distribution_change_score_using_difference_in_means_then_returns_expected_result": 24.64987556900269,
+    "tests/gcm/test_anomaly.py::test_given_graph_with_multiple_parents_when_estimate_distribution_change_scores_then_returns_expected_result": 13.599023269998725,
+    "tests/gcm/test_anomaly.py::test_given_multivariate_inputs_when_estimate_anomaly_scores_then_does_not_raise_error": 13.891689223999492,
+    "tests/gcm/test_anomaly.py::test_given_outlier_observation_when_estimate_anomaly_scores_using_inverse_density_scorer_then_returns_expected_result": 30.14598051699977,
+    "tests/gcm/test_anomaly.py::test_given_outlier_observation_when_estimate_anomaly_scores_using_mean_deviation_scorer_then_returns_expected_result": 17.030378150000615,
+    "tests/gcm/test_anomaly.py::test_given_outlier_observation_when_estimate_anomaly_scores_using_median_cdf_quantile_scorer_then_returns_expected_result": 24.73614000700036,
+    "tests/gcm/test_anomaly.py::test_given_outlier_observation_when_estimate_anomaly_scores_using_median_deviation_scorer_then_returns_expected_result": 16.054258019998088,
+    "tests/gcm/test_anomaly.py::test_given_outlier_observation_when_estimate_anomaly_scores_using_rescaled_median_cdf_quantile_scorer_then_returns_expected_result": 18.47946877600043,
+    "tests/gcm/test_anomaly.py::test_given_simple_linear_data_when_estimate_conditional_anomaly_scores_then_returns_expected_result": 0.0029044759994576452,
+    "tests/gcm/test_anomaly_attribution.py::test_given_non_trivial_graph_with_nonlinear_relationships_when_attribute_anomaly_scores_with_it_score_then_returns_qualitatively_correct_results": 2.1123664880014985,
+    "tests/gcm/test_anomaly_attribution.py::test_given_simple_causal_chain_with_linear_relationships_when_attribute_anomaly_scores_with_feature_relevance_then_returns_qualitatively_correct_results": 24.001711004000754,
+    "tests/gcm/test_anomaly_attribution.py::test_given_simple_causal_chain_with_linear_relationships_when_attribute_anomaly_scores_with_it_score_then_returns_qualitatively_correct_results": 29.369829627998115,
+    "tests/gcm/test_anomaly_attribution.py::test_given_simple_gaussian_data_when_attribute_anomaly_scores_with_feature_relevance_then_returns_qualitatively_correct_results": 0.6089531779998651,
+    "tests/gcm/test_anomaly_attribution.py::test_given_simple_gaussian_data_when_attribute_anomaly_scores_with_it_score_then_returns_qualitatively_correct_results": 4.976234596999348,
+    "tests/gcm/test_anomaly_attribution.py::test_relative_frequency": 0.00037885099845880177,
+    "tests/gcm/test_anomaly_scorers.py::test_given_simple_toy_data_when_using_MedianCDFQuantileScorer_then_returns_expected_scores": 0.0005814419982925756,
+    "tests/gcm/test_anomaly_scorers.py::test_given_simple_toy_data_when_using_MedianDeviationScorer_then_returns_expected_scores": 0.0006448339991038665,
+    "tests/gcm/test_arrow_strength.py::test_given_categorical_target_node_when_estimate_arrow_strength_of_model_classifier_then_returns_expected_result": 1.1652822319992993,
+    "tests/gcm/test_arrow_strength.py::test_given_continuous_data_with_default_attribution_func_when_estimate_arrow_strength_then_returns_expected_results": 0.12396619799983455,
+    "tests/gcm/test_arrow_strength.py::test_given_gcm_with_misspecified_mechanism_when_evaluate_arrow_strength_with__observational_data_then_gives_expected_results": 0.07071328300116875,
+    "tests/gcm/test_arrow_strength.py::test_given_kl_divergence_attribution_func_when_estimate_arrow_strength_then_returns_expected_results": 7.431842104999305,
+    "tests/gcm/test_auto.py::test_given_linear_classification_problem_when_auto_assign_causal_models_with_better_quality_returns_linear_model": 0.5956583260012849,
+    "tests/gcm/test_auto.py::test_given_linear_classification_problem_when_auto_assign_causal_models_with_good_quality_returns_linear_model": 5.7380634199980705,
+    "tests/gcm/test_auto.py::test_given_linear_regression_problem_when_auto_assign_causal_models_with_better_quality_returns_linear_model": 0.8294572150007298,
+    "tests/gcm/test_auto.py::test_given_linear_regression_problem_when_auto_assign_causal_models_with_good_quality_returns_linear_model": 7.128187996999259,
+    "tests/gcm/test_auto.py::test_given_non_linear_classification_problem_when_auto_assign_causal_models_with_better_quality_returns_non_linear_model": 0.5962440150015027,
+    "tests/gcm/test_auto.py::test_given_non_linear_classification_problem_when_auto_assign_causal_models_with_good_quality_returns_non_linear_model": 6.589439229999698,
+    "tests/gcm/test_auto.py::test_given_non_linear_regression_problem_when_auto_assign_causal_models_with_better_quality_returns_non_linear_model": 1.2263876689994504,
+    "tests/gcm/test_auto.py::test_given_non_linear_regression_problem_when_auto_assign_causal_models_with_good_quality_returns_non_linear_model": 9.161473943000601,
+    "tests/gcm/test_auto.py::test_when_auto_called_from_main_namespace_returns_no_attribute_error": 0.00029278099827934057,
+    "tests/gcm/test_confidence_intervals.py::test_given_simple_counter_as_estimation_func_when_confidence_interval_then_returns_mean_and_interval_counter": 0.0020715710015792865,
+    "tests/gcm/test_confidence_intervals_cms.py::test_given_causal_graph_based_estimation_func_when_confidence_interval_then_can_use_bootstrap_training_and_sampling": 5.556269120001161,
+    "tests/gcm/test_confidence_intervals_cms.py::test_given_parameterized_estimation_func_when_confidence_interval_then_can_use_bootstrap_sampling_to_bind_parameters": 0.0018258170002809493,
     "tests/gcm/test_density_estimators.py::test_gaussian_mixture_density_estimator": 0.00399742500303546,
     "tests/gcm/test_density_estimators.py::test_kernel_based_density_estimator_1d": 0.0006911870041221846,
+    "tests/gcm/test_density_estimators.py::test_when_fit_and_evaluate_gaussian_mixture_density_estimator_then_behaves_as_expected": 0.00461995500154444,
+    "tests/gcm/test_density_estimators.py::test_when_fit_and_evaluate_kernel_based_density_estimator_1d_then_behaves_as_expected": 0.0018513159975555027,
     "tests/gcm/test_distribution_change.py::test_distribution_change": 17.114309640997817,
     "tests/gcm/test_distribution_change.py::test_distribution_change_of_graphs": 0.04189897299875156,
-    "tests/gcm/test_distribution_change.py::test_when_using_distribution_change_with_return_additional_info_then_returns_additional_info": 14.030528078998032,
-    "tests/gcm/test_distribution_change.py::test_when_using_distribution_change_without_fdrc_then_returns_valid_results": 17.78734955999971,
+    "tests/gcm/test_distribution_change.py::test_given_non_linear_data_when_using_distribution_change_with_mean_difference_then_returns_expected_results": 65.09276458400018,
+    "tests/gcm/test_distribution_change.py::test_given_two_data_sets_with_different_mechanisms_when_evaluate_distribution_change_then_returns_expected_result": 28.554325112998413,
+    "tests/gcm/test_distribution_change.py::test_given_two_graphs_fitted_on_data_sets_with_different_mechanisms_when_evaluate_distribution_change_of_graphs_then_returns_expected_result": 0.04381045099944458,
+    "tests/gcm/test_distribution_change.py::test_when_using_distribution_change_with_return_additional_info_then_returns_additional_info": 31.143261340999743,
+    "tests/gcm/test_distribution_change.py::test_when_using_distribution_change_without_fdrc_then_returns_valid_results": 26.930980414999794,
     "tests/gcm/test_divergence.py::test_auto_estimate_kl_divergence_categorical": 0.0019550790020730346,
     "tests/gcm/test_divergence.py::test_auto_estimate_kl_divergence_continuous": 0.22346158000436844,
     "tests/gcm/test_divergence.py::test_auto_estimate_kl_divergence_probabilities": 0.0004078410020156298,
     "tests/gcm/test_divergence.py::test_estimate_kl_divergence_categorical": 0.0016859440002008341,
     "tests/gcm/test_divergence.py::test_estimate_kl_divergence_continuous": 0.23781509599939454,
     "tests/gcm/test_divergence.py::test_estimate_kl_divergence_of_probabilities": 0.00036038400139659643,
+    "tests/gcm/test_divergence.py::test_given_categorical_data_when_auto_estimate_kl_divergence_then_correctly_selects_categorical_version": 0.0018206149979960173,
+    "tests/gcm/test_divergence.py::test_given_probability_vectors_when_auto_estimate_kl_divergence_then_correctly_selects_probability_version": 0.0005056440004409524,
+    "tests/gcm/test_divergence.py::test_given_probability_vectors_when_estimate_kl_divergence_of_probabilities_then_returns_expected_result": 0.00048442400111525785,
+    "tests/gcm/test_divergence.py::test_given_simple_categorical_data_estimate_kl_divergence_categorical_then_returns_expected_result": 0.0017990949982049642,
+    "tests/gcm/test_divergence.py::test_given_simple_gaussian_data_when_auto_estimate_kl_divergence_then_correctly_selects_continuous_version": 0.22154908500124293,
+    "tests/gcm/test_divergence.py::test_given_simple_gaussian_data_when_estimate_kl_divergence_continuous_then_returns_expected_result": 0.22085172900006,
     "tests/gcm/test_fcms.py::test_classifier_sem_produces_strings": 0.0052253809953981545,
     "tests/gcm/test_fcms.py::test_classifier_sem_throws_error_when_non_string_targets": 0.0012314030027482659,
     "tests/gcm/test_fcms.py::test_classifier_sem_with_categorical_inputs": 0.008931514999858337,
@@ -242,135 +289,150 @@
     "tests/gcm/test_fcms.py::test_fit_causal_graph_using_additive_noise_model": 0.006898495001223637,
     "tests/gcm/test_fcms.py::test_fit_causal_graph_using_post_nonlinear_models": 0.006813561001763446,
     "tests/gcm/test_fcms.py::test_fit_causal_graph_using_post_nonlinear_models_with_categorical_features": 0.007516005000070436,
+    "tests/gcm/test_fcms.py::test_given_categorical_input_data_when_draw_from_fitted_causal_graph_with_linear_anm_then_generates_correct_marginal_distribution": 0.01656258800176147,
+    "tests/gcm/test_fcms.py::test_given_categorical_input_data_when_fit_causal_graph_with_linear_anm_then_learns_correct_coefficients": 0.005331156997272046,
+    "tests/gcm/test_fcms.py::test_given_exponential_data_when_fit_post_non_linear_sem_with_invertible_exponential_function_then_returns_expected_results": 0.003780343999096658,
+    "tests/gcm/test_fcms.py::test_given_linear_data_when_draw_samples_from_fitted_anm_then_generates_correct_marginal_distribution": 0.008520762001353432,
+    "tests/gcm/test_fcms.py::test_given_linear_data_when_fit_causal_graph_with_linear_anm_then_learns_correct_coefficients": 0.002319291001185775,
+    "tests/gcm/test_fcms.py::test_given_logarithmic_data_when_fit_post_non_linear_sem_with_invertible_logarithmic_function_then_returns_expected_results": 0.004063615999257308,
+    "tests/gcm/test_fcms.py::test_given_non_string_data_when_try_to_fit_classifier_fcm_then_throws_error": 0.0017007439982990036,
+    "tests/gcm/test_fcms.py::test_given_simple_linear_data_when_fit_post_non_linear_sem_with_invertible_identity_then_returns_expected_results": 0.004450587997780531,
     "tests/gcm/test_fcms.py::test_post_non_linear_sem_with_invertible_exponential": 0.0038756509966333397,
     "tests/gcm/test_fcms.py::test_post_non_linear_sem_with_invertible_identity": 0.0038736210008210037,
     "tests/gcm/test_fcms.py::test_post_non_linear_sem_with_invertible_logarithmic": 0.0035957540058007,
-    "tests/gcm/test_feature.py::test_feature_relevance_sample_mean_diff": 1.171173066002666,
-    "tests/gcm/test_feature.py::test_feature_relevance_sample_mean_diff_with_certain_batch_size": 0.1368202419980662,
-    "tests/gcm/test_feature.py::test_given_baseline_values_when_estimating_feature_relevance_sample_with_mean_diff_then_returns_expected_result": 0.3077509229951829,
-    "tests/gcm/test_feature.py::test_given_misspecified_graph_when_estimating_parent_relevance_with_observed_data_then_returns_correct_result": 3.776513219003391,
-    "tests/gcm/test_feature.py::test_when_using_feature_relevance_distribution_with_entropy_set_function_then_returns_correct_results": 0.8174488349977764,
-    "tests/gcm/test_feature.py::test_when_using_parent_relevance_with_categorical_data_then_returns_correct_results": 6.637920232002216,
-    "tests/gcm/test_feature.py::test_when_using_parent_relevance_with_confidence_intervals_then_returns_reasonable_bounds": 9.255171256001631,
-    "tests/gcm/test_feature.py::test_when_using_parent_relevance_with_continous_data_then_returns_correct_results": 0.9622846450001816,
-    "tests/gcm/test_graph.py::test_fit_and_draw_samples": 0.006282384998485213,
-    "tests/gcm/test_graph.py::test_is_root_node": 0.00025715499941725284,
-    "tests/gcm/test_graph.py::test_set_causal_model_raises_error": 0.00030884399529895745,
-    "tests/gcm/test_intrinsic_influence.py::test_given_only_categorical_data_when_estimate_icc_then_does_not_fail": 16.3624693510028,
-    "tests/gcm/test_intrinsic_influence.py::test_intrinsic_causal_influence_categorical": 10.063866736003547,
-    "tests/gcm/test_intrinsic_influence.py::test_intrinsic_causal_influence_categorical_2": 4.945753104999312,
-    "tests/gcm/test_intrinsic_influence.py::test_intrinsic_causal_influence_variance_linear": 8.015012635998573,
-    "tests/gcm/test_intrinsic_influence.py::test_when_calling_intrinsic_causal_influence_then_the_shape_of_inputs_in_the_attribution_function_should_be_equal": 1.692093040997861,
-    "tests/gcm/test_ml.py::test_categorical_features": 0.005635629004245857,
-    "tests/gcm/test_ml.py::test_categorical_inputs": 0.006445263999921735,
-    "tests/gcm/test_ml.py::test_when_cloning_sklearn_classification_model_then_returns_a_cloned_object": 0.0003863520032609813,
-    "tests/gcm/test_shapley.py::test_estimate_shapley_values_symmetry_approximation_via_early_stopping": 0.2916524889988068,
-    "tests/gcm/test_shapley.py::test_estimate_shapley_values_symmetry_approximation_via_early_stopping_with_random_seed": 2.206780909000372,
-    "tests/gcm/test_shapley.py::test_estimate_shapley_values_symmetry_approximation_via_permutation_with_random_seed": 3.5830478360003326,
-    "tests/gcm/test_shapley.py::test_estimate_shapley_values_symmetry_approximation_via_subset": 0.7485893240009318,
-    "tests/gcm/test_shapley.py::test_estimate_shapley_values_symmetry_approximation_via_subset_with_random_seed": 3.2098093750028056,
-    "tests/gcm/test_shapley.py::test_estimate_shapley_values_symmetry_exact": 8.105218070999399,
-    "tests/gcm/test_shapley.py::test_estimate_shapley_values_symmetry_exact_fast": 6.525596875999327,
-    "tests/gcm/test_shapley.py::test_evaluate_set_function_via_shapley_symmetry_approximation_via_permutation": 0.8665359029946558,
-    "tests/gcm/test_shapley.py::test_given_few_features_when_estimate_shapley_values_with_auto_approx_then_returns_correct_result": 0.003006461000040872,
-    "tests/gcm/test_shapley.py::test_given_many_features_when_estimate_shapley_values_with_auto_approx_then_returns_correct_result": 0.04127842900561518,
-    "tests/gcm/test_stats.py::test_estimate_geometric_median": 0.003749911000340944,
-    "tests/gcm/test_stats.py::test_given_different_batch_sizes_when_estimating_marginal_expectation_then_returns_expected_result": 0.12773612499950104,
-    "tests/gcm/test_stats.py::test_given_linear_dependent_data_when_estimate_ftest_pvalue_then_returns_expected_result": 0.0013709800005017314,
-    "tests/gcm/test_stats.py::test_given_multivariate_dependent_data_when_estimate_ftest_pvalue_then_returns_expected_result": 0.001504758998635225,
-    "tests/gcm/test_stats.py::test_given_p_values_with_nans_when_using_quantile_based_fwer_then_ignores_the_nan_values": 0.00038718799987691455,
-    "tests/gcm/test_stats.py::test_marginal_expectation_independent_categorical_linear": 0.19481896700017387,
-    "tests/gcm/test_stats.py::test_marginal_expectation_independent_categorical_nonlinear": 3.155934750004235,
-    "tests/gcm/test_stats.py::test_marginal_expectation_independent_continuous_linear": 0.229990669999097,
-    "tests/gcm/test_stats.py::test_marginal_expectation_independent_continuous_nonlinear": 3.869605367999611,
-    "tests/gcm/test_stats.py::test_marginal_expectation_returns_all_results": 0.016676576000463683,
-    "tests/gcm/test_stats.py::test_marginal_expectation_returns_reduced_results": 0.03466918100093608,
-    "tests/gcm/test_stats.py::test_quantile_based_fwer": 0.0008770439999352675,
-    "tests/gcm/test_stats.py::test_quantile_based_fwer_raises_error": 0.00033082900336012244,
-    "tests/gcm/test_stats.py::test_quantile_based_fwer_scaling": 0.0005289260006975383,
-    "tests/gcm/test_stochastic_models.py::test_bayesian_gaussian_mixture_distribution": 0.11869163300070795,
-    "tests/gcm/test_stochastic_models.py::test_bayesian_gaussian_mixture_distribution_runtime_error": 0.0005404119983722921,
-    "tests/gcm/test_stochastic_models.py::test_empirical_distribution": 0.3082659879983112,
-    "tests/gcm/test_stochastic_models.py::test_fitted_parameters_assigned_correctly_using_beta_distribution": 0.09166112499588053,
-    "tests/gcm/test_stochastic_models.py::test_fitted_parameters_assigned_correctly_using_normal_distribution": 0.000768283996876562,
-    "tests/gcm/test_stochastic_models.py::test_scipy_auto_select_continuous_parametric_distribution": 0.005523714000446489,
-    "tests/gcm/test_stochastic_models.py::test_scipy_fittable_parametric_distribution": 0.0005317560026014689,
-    "tests/gcm/test_stochastic_models.py::test_scipy_fixed_parametric_distribution": 0.0003344569995533675,
-    "tests/gcm/test_uncertainty.py::test_estimate_entropy_discrete": 0.0010040130036941264,
-    "tests/gcm/test_uncertainty.py::test_estimate_entropy_kmeans": 0.11172045399507624,
-    "tests/gcm/test_uncertainty.py::test_estimate_entropy_of_probabilities": 0.00047356600043713115,
-    "tests/gcm/test_uncertainty.py::test_estimate_entropy_using_discretization": 0.0017287959999521263,
-    "tests/gcm/test_uncertainty.py::test_estimate_gaussian_entropy": 0.0007188839954324067,
-    "tests/gcm/test_uncertainty.py::test_estimate_variance": 0.1497926180018112,
-    "tests/gcm/test_validation.py::test_given_non_linear_data_and_correct_dag_when_refute_invertible_model_then_not_reject_model": 5.803969453001628,
-    "tests/gcm/test_validation.py::test_given_non_linear_data_and_incorrect_dag_when_refute_invertible_model_then_reject_model": 5.037821076999535,
-    "tests/gcm/test_validation.py::test_given_non_linear_data_and_incorrect_dag_with_collider_when_refute_invertible_model_then_reject_model": 6.595124856998154,
-    "tests/gcm/test_validation.py::test_refute_causal_structure_adjusted_p_values": 0.9631542310016812,
-    "tests/gcm/test_validation.py::test_refute_causal_structure_chain": 0.20382860899917432,
-    "tests/gcm/test_validation.py::test_refute_causal_structure_collider": 0.029991837000125088,
-    "tests/gcm/test_validation.py::test_refute_causal_structure_fork": 0.5995468189976236,
-    "tests/gcm/test_validation.py::test_refute_causal_structure_general": 0.04577041499942425,
-    "tests/gcm/test_validation.py::test_when_using_refute_causal_structure_without_fdrc_then_nans_for_adjusted_p_values_are_returned": 0.33601710599759826,
-    "tests/gcm/test_whatif.py::test_counterfactual_samples_raises_error_all_parameter_none": 0.003792122002778342,
-    "tests/gcm/test_whatif.py::test_counterfactual_samples_raises_error_both_parameter_given": 0.0040026999995461665,
-    "tests/gcm/test_whatif.py::test_counterfactual_samples_with_noise_samples": 0.0053520000001299195,
-    "tests/gcm/test_whatif.py::test_counterfactual_samples_with_observed_samples": 0.007056922004267108,
-    "tests/gcm/test_whatif.py::test_given_binary_target_when_estimate_average_causal_effect_then_return_expected_result": 0.42823138600215316,
-    "tests/gcm/test_whatif.py::test_given_continuous_target_when_estimate_average_causal_effect_then_return_expected_result": 0.812088416001643,
-    "tests/gcm/test_whatif.py::test_interventional_samples_atomic": 0.008358578001207206,
-    "tests/gcm/test_whatif.py::test_interventional_samples_atomic_draw": 0.0109674770028505,
-    "tests/gcm/test_whatif.py::test_interventional_samples_atomic_multiple_interventions": 0.0057828280005196575,
-    "tests/gcm/test_whatif.py::test_interventional_samples_conditional": 0.008653826003865106,
-    "tests/gcm/test_whatif.py::test_interventional_samples_conditional_draw": 0.010875829004362458,
-    "tests/gcm/test_whatif.py::test_interventional_samples_raise_error_all_parameter_none": 0.003524371000821702,
-    "tests/gcm/test_whatif.py::test_interventional_samples_raise_error_both_parameter_given": 0.003893631997925695,
-    "tests/gcm/test_whatif.py::test_interventional_samples_with_categorical_variables_draw": 0.018410912998660933,
-    "tests/gcm/util/test_plotting.py::test_plot_adjacency_matrix": 0.017139122002845397,
-    "tests/gcm/util/test_plotting.py::test_when_plot_does_not_raise_exception": 0.06699552199643222,
-    "tests/gcm/util/test_pygraphviz.py::test_calc_arrow_width": 0.00036200900285621174,
-    "tests/test_causal_estimator.py::test_causal_estimator_placeholder_methods": 0.00047312800234067254,
-    "tests/test_causal_model.py::TestCausalModel::test_external_estimator[10-100-1]": 0.8277756619972934,
-    "tests/test_causal_model.py::TestCausalModel::test_graph_input2[10-1-100-1]": 0.004107875000045169,
-    "tests/test_causal_model.py::TestCausalModel::test_graph_input3[10-1-100-1]": 0.004984627998055657,
-    "tests/test_causal_model.py::TestCausalModel::test_graph_input4[10-1-100-1]": 0.004323232999013271,
-    "tests/test_causal_model.py::TestCausalModel::test_graph_input[10-1-100-1]": 0.00443547199756722,
-    "tests/test_causal_model.py::TestCausalModel::test_graph_refutation2[10-5000]": 2.546336801999132,
-    "tests/test_causal_model.py::TestCausalModel::test_graph_refutation[10-5000]": 0.38944244000231265,
-    "tests/test_causal_refuter.py::test_causal_refuter_bootstrap_test": 0.0005493779972312041,
-    "tests/test_causal_refuter.py::test_causal_refuter_placeholder_method": 0.00032829900010256097,
-    "tests/test_data_transformer.py::test_dimensionality_reducer_placeholder_methods": 0.00022748899937141687,
+    "tests/gcm/test_fcms.py::test_when_clone_additive_noise_models_with_scipy_distribution_then_clone_has_correct_models": 0.0022859580003569135,
+    "tests/gcm/test_fcms.py::test_when_draw_from_classifier_fcm_then_returns_string_samples": 0.00800903900017147,
+    "tests/gcm/test_fcms.py::test_when_fit_classifier_fcm_with_categorical_inputs_then_returns_expected_results": 0.011222037999687018,
+    "tests/gcm/test_feature.py::test_feature_relevance_sample_mean_diff": 0.9340454179982771,
+    "tests/gcm/test_feature.py::test_feature_relevance_sample_mean_diff_with_certain_batch_size": 0.1313322430014523,
+    "tests/gcm/test_feature.py::test_given_baseline_values_when_estimating_feature_relevance_sample_with_mean_diff_then_returns_expected_result": 0.3403174660015793,
+    "tests/gcm/test_feature.py::test_given_misspecified_graph_when_estimating_parent_relevance_with_observed_data_then_returns_correct_result": 22.445173250998778,
+    "tests/gcm/test_feature.py::test_when_using_feature_relevance_distribution_with_entropy_set_function_then_returns_correct_results": 0.7777337800016539,
+    "tests/gcm/test_feature.py::test_when_using_parent_relevance_with_categorical_data_then_returns_correct_results": 19.896863140002097,
+    "tests/gcm/test_feature.py::test_when_using_parent_relevance_with_confidence_intervals_then_returns_reasonable_bounds": 9.796963109998615,
+    "tests/gcm/test_feature.py::test_when_using_parent_relevance_with_continous_data_then_returns_correct_results": 0.9760240809991956,
+    "tests/gcm/test_graph.py::test_fit_and_draw_samples": 0.006999461998930201,
+    "tests/gcm/test_graph.py::test_is_root_node": 0.0003289969990873942,
+    "tests/gcm/test_graph.py::test_set_causal_model_raises_error": 0.00037695599894504994,
+    "tests/gcm/test_intrinsic_influence.py::test_given_only_categorical_data_when_estimate_icc_then_does_not_fail": 57.989755541000704,
+    "tests/gcm/test_intrinsic_influence.py::test_intrinsic_causal_influence_categorical": 53.30633696399855,
+    "tests/gcm/test_intrinsic_influence.py::test_intrinsic_causal_influence_categorical_2": 39.327678016999926,
+    "tests/gcm/test_intrinsic_influence.py::test_intrinsic_causal_influence_variance_linear": 38.00465657899986,
+    "tests/gcm/test_intrinsic_influence.py::test_when_calling_intrinsic_causal_influence_then_the_shape_of_inputs_in_the_attribution_function_should_be_equal": 18.991308744998605,
+    "tests/gcm/test_ml.py::test_categorical_features": 0.006344903002172941,
+    "tests/gcm/test_ml.py::test_categorical_inputs": 0.007466925000699121,
+    "tests/gcm/test_ml.py::test_when_cloning_sklearn_classification_model_then_returns_a_cloned_object": 0.0004838210006710142,
+    "tests/gcm/test_shapley.py::test_estimate_shapley_values_symmetry_approximation_via_early_stopping": 0.675727908999761,
+    "tests/gcm/test_shapley.py::test_estimate_shapley_values_symmetry_approximation_via_early_stopping_with_random_seed": 2.5750011860000086,
+    "tests/gcm/test_shapley.py::test_estimate_shapley_values_symmetry_approximation_via_permutation_with_random_seed": 3.466346587998487,
+    "tests/gcm/test_shapley.py::test_estimate_shapley_values_symmetry_approximation_via_subset": 0.7537785750009789,
+    "tests/gcm/test_shapley.py::test_estimate_shapley_values_symmetry_approximation_via_subset_with_random_seed": 3.07799904499916,
+    "tests/gcm/test_shapley.py::test_estimate_shapley_values_symmetry_exact": 8.23336829300024,
+    "tests/gcm/test_shapley.py::test_estimate_shapley_values_symmetry_exact_fast": 6.592175635998501,
+    "tests/gcm/test_shapley.py::test_evaluate_set_function_via_shapley_symmetry_approximation_via_permutation": 0.8430921929993929,
+    "tests/gcm/test_shapley.py::test_given_few_features_when_estimate_shapley_values_with_auto_approx_then_returns_correct_result": 0.0038017719980416587,
+    "tests/gcm/test_shapley.py::test_given_many_features_when_estimate_shapley_values_with_auto_approx_then_returns_correct_result": 0.10425491199930548,
+    "tests/gcm/test_stats.py::test_estimate_geometric_median": 0.004048476999741979,
+    "tests/gcm/test_stats.py::test_given_different_batch_sizes_when_estimating_marginal_expectation_then_returns_expected_result": 0.12518689300122787,
+    "tests/gcm/test_stats.py::test_given_linear_dependent_data_when_estimate_ftest_pvalue_then_returns_expected_result": 0.0015984810015652329,
+    "tests/gcm/test_stats.py::test_given_multivariate_dependent_data_when_estimate_ftest_pvalue_then_returns_expected_result": 0.0016053010022005765,
+    "tests/gcm/test_stats.py::test_given_p_values_with_nans_when_using_quantile_based_fwer_then_ignores_the_nan_values": 0.00037483099913515616,
+    "tests/gcm/test_stats.py::test_marginal_expectation_independent_categorical_linear": 0.18587543800094863,
+    "tests/gcm/test_stats.py::test_marginal_expectation_independent_categorical_nonlinear": 56.26012143300068,
+    "tests/gcm/test_stats.py::test_marginal_expectation_independent_continuous_linear": 0.2658739599992259,
+    "tests/gcm/test_stats.py::test_marginal_expectation_independent_continuous_nonlinear": 40.81410756399964,
+    "tests/gcm/test_stats.py::test_marginal_expectation_returns_all_results": 0.01936461700097425,
+    "tests/gcm/test_stats.py::test_marginal_expectation_returns_reduced_results": 0.029648808000274585,
+    "tests/gcm/test_stats.py::test_quantile_based_fwer": 0.0008437809992756229,
+    "tests/gcm/test_stats.py::test_quantile_based_fwer_raises_error": 0.0003298399988125311,
+    "tests/gcm/test_stats.py::test_quantile_based_fwer_scaling": 0.000547830999494181,
+    "tests/gcm/test_stochastic_models.py::test_bayesian_gaussian_mixture_distribution": 0.07944374699945911,
+    "tests/gcm/test_stochastic_models.py::test_bayesian_gaussian_mixture_distribution_runtime_error": 0.00045383999713521916,
+    "tests/gcm/test_stochastic_models.py::test_empirical_distribution": 0.30704421600057685,
+    "tests/gcm/test_stochastic_models.py::test_fitted_parameters_assigned_correctly_using_beta_distribution": 0.12220136099858792,
+    "tests/gcm/test_stochastic_models.py::test_fitted_parameters_assigned_correctly_using_normal_distribution": 0.0007490899988624733,
+    "tests/gcm/test_stochastic_models.py::test_scipy_auto_select_continuous_parametric_distribution": 0.32287205799912044,
+    "tests/gcm/test_stochastic_models.py::test_scipy_fittable_parametric_distribution": 0.000533110001924797,
+    "tests/gcm/test_stochastic_models.py::test_scipy_fixed_parametric_distribution": 0.0003421000019443454,
+    "tests/gcm/test_uncertainty.py::test_estimate_entropy_discrete": 0.0011733400006050942,
+    "tests/gcm/test_uncertainty.py::test_estimate_entropy_kmeans": 0.11020228700181178,
+    "tests/gcm/test_uncertainty.py::test_estimate_entropy_of_probabilities": 0.0005798410002171295,
+    "tests/gcm/test_uncertainty.py::test_estimate_entropy_using_discretization": 0.0018264999980601715,
+    "tests/gcm/test_uncertainty.py::test_estimate_gaussian_entropy": 0.0007306910010811407,
+    "tests/gcm/test_uncertainty.py::test_estimate_variance": 0.1536971050009015,
+    "tests/gcm/test_validation.py::test_given_non_linear_data_and_correct_dag_when_refute_invertible_model_then_not_reject_model": 30.215193159998307,
+    "tests/gcm/test_validation.py::test_given_non_linear_data_and_incorrect_dag_when_refute_invertible_model_then_reject_model": 16.495497463998618,
+    "tests/gcm/test_validation.py::test_given_non_linear_data_and_incorrect_dag_with_collider_when_refute_invertible_model_then_reject_model": 14.271690126999601,
+    "tests/gcm/test_validation.py::test_refute_causal_structure_adjusted_p_values": 0.8805916279980011,
+    "tests/gcm/test_validation.py::test_refute_causal_structure_chain": 0.35697386999891023,
+    "tests/gcm/test_validation.py::test_refute_causal_structure_collider": 0.023211495001305593,
+    "tests/gcm/test_validation.py::test_refute_causal_structure_fork": 1.0799866790039232,
+    "tests/gcm/test_validation.py::test_refute_causal_structure_general": 0.04712397700131987,
+    "tests/gcm/test_validation.py::test_when_using_refute_causal_structure_without_fdrc_then_nans_for_adjusted_p_values_are_returned": 0.7557343110020156,
+    "tests/gcm/test_whatif.py::test_counterfactual_samples_raises_error_all_parameter_none": 0.005386669001381961,
+    "tests/gcm/test_whatif.py::test_counterfactual_samples_raises_error_both_parameter_given": 0.0040787509988149395,
+    "tests/gcm/test_whatif.py::test_counterfactual_samples_with_noise_samples": 0.00703999400138855,
+    "tests/gcm/test_whatif.py::test_counterfactual_samples_with_observed_samples": 0.00909313500051212,
+    "tests/gcm/test_whatif.py::test_given_binary_target_when_estimate_average_causal_effect_then_return_expected_result": 5.736080510998363,
+    "tests/gcm/test_whatif.py::test_given_continuous_target_when_estimate_average_causal_effect_then_return_expected_result": 7.400701212001877,
+    "tests/gcm/test_whatif.py::test_interventional_samples_atomic": 0.005556936001084978,
+    "tests/gcm/test_whatif.py::test_interventional_samples_atomic_draw": 0.006262816999878851,
+    "tests/gcm/test_whatif.py::test_interventional_samples_atomic_multiple_interventions": 0.009842175000812858,
+    "tests/gcm/test_whatif.py::test_interventional_samples_conditional": 0.0048610239991830895,
+    "tests/gcm/test_whatif.py::test_interventional_samples_conditional_draw": 0.006809946999055683,
+    "tests/gcm/test_whatif.py::test_interventional_samples_raise_error_all_parameter_none": 0.004740374997709296,
+    "tests/gcm/test_whatif.py::test_interventional_samples_raise_error_both_parameter_given": 0.006484949999503442,
+    "tests/gcm/test_whatif.py::test_interventional_samples_with_categorical_variables_draw": 0.06180697300260363,
+    "tests/gcm/util/test_general.py::test_given_categorical_data_when_evaluating_has_categorical_then_returns_expected_result": 0.0008290509995276807,
+    "tests/gcm/util/test_general.py::test_given_categorical_data_when_evaluating_is_categorical_then_returns_expected_result": 0.0012246000005688984,
+    "tests/gcm/util/test_plotting.py::test_plot_adjacency_matrix": 0.017390742999850772,
+    "tests/gcm/util/test_plotting.py::test_when_plot_does_not_raise_exception": 0.07301370699860854,
+    "tests/gcm/util/test_pygraphviz.py::test_calc_arrow_width": 0.0003234109990444267,
+    "tests/test_causal_estimator.py::test_causal_estimator_placeholder_methods": 0.0004471669981285231,
+    "tests/test_causal_model.py::TestCausalModel::test_external_estimator[10-100-1]": 0.8948007010003494,
+    "tests/test_causal_model.py::TestCausalModel::test_graph_input2[10-1-100-1]": 0.0038136530019983184,
+    "tests/test_causal_model.py::TestCausalModel::test_graph_input3[10-1-100-1]": 0.004221674000291387,
+    "tests/test_causal_model.py::TestCausalModel::test_graph_input4[10-1-100-1]": 0.0038689320026605856,
+    "tests/test_causal_model.py::TestCausalModel::test_graph_input[10-1-100-1]": 0.004279080003470881,
+    "tests/test_causal_model.py::TestCausalModel::test_graph_refutation2[10-5000]": 2.5579186659997504,
+    "tests/test_causal_model.py::TestCausalModel::test_graph_refutation[10-5000]": 0.38690518799921847,
+    "tests/test_causal_refuter.py::test_causal_refuter_bootstrap_test": 0.0005826579981658142,
+    "tests/test_causal_refuter.py::test_causal_refuter_placeholder_method": 0.0003741770015039947,
+    "tests/test_data_transformer.py::test_dimensionality_reducer_placeholder_methods": 0.00023744900136080105,
     "tests/test_notebooks.py::test_notebook[DoWhy-The Causal Story Behind Hotel Booking Cancellations.ipynb]": 269.5888159609967,
     "tests/test_notebooks.py::test_notebook[do_sampler_demo.ipynb]": 3.0448386579955695,
-    "tests/test_notebooks.py::test_notebook[dowhy-conditional-treatment-effects.ipynb]": 333.77691264999885,
-    "tests/test_notebooks.py::test_notebook[dowhy-simple-iv-example.ipynb]": 4.564906564999546,
-    "tests/test_notebooks.py::test_notebook[dowhy_causal_api.ipynb]": 3.4215711319993716,
+    "tests/test_notebooks.py::test_notebook[dowhy-conditional-treatment-effects.ipynb]": 330.1932206170004,
+    "tests/test_notebooks.py::test_notebook[dowhy-simple-iv-example.ipynb]": 5.2607425800015335,
+    "tests/test_notebooks.py::test_notebook[dowhy_causal_api.ipynb]": 3.450208180000118,
     "tests/test_notebooks.py::test_notebook[dowhy_causal_discovery_example.ipynb]": 3.559609330997773,
-    "tests/test_notebooks.py::test_notebook[dowhy_confounder_example.ipynb]": 9.348463070993603,
-    "tests/test_notebooks.py::test_notebook[dowhy_demo_dummy_outcome_refuter.ipynb]": 5.399920256997575,
-    "tests/test_notebooks.py::test_notebook[dowhy_efficient_backdoor_example.ipynb]": 2.960695682999358,
-    "tests/test_notebooks.py::test_notebook[dowhy_estimation_methods.ipynb]": 6.683948532001523,
-    "tests/test_notebooks.py::test_notebook[dowhy_example_effect_of_memberrewards_program.ipynb]": 21.221713907005324,
-    "tests/test_notebooks.py::test_notebook[dowhy_ihdp_data_example.ipynb]": 14.597732170997915,
-    "tests/test_notebooks.py::test_notebook[dowhy_interpreter.ipynb]": 6.791981726000813,
+    "tests/test_notebooks.py::test_notebook[dowhy_confounder_example.ipynb]": 8.938289206002082,
+    "tests/test_notebooks.py::test_notebook[dowhy_demo_dummy_outcome_refuter.ipynb]": 5.1057545250005205,
+    "tests/test_notebooks.py::test_notebook[dowhy_efficient_backdoor_example.ipynb]": 3.6536705840007926,
+    "tests/test_notebooks.py::test_notebook[dowhy_estimation_methods.ipynb]": 6.895480563000092,
+    "tests/test_notebooks.py::test_notebook[dowhy_example_effect_of_memberrewards_program.ipynb]": 20.268174654001996,
+    "tests/test_notebooks.py::test_notebook[dowhy_functional_api.ipynb]": 39.91400131500268,
+    "tests/test_notebooks.py::test_notebook[dowhy_ihdp_data_example.ipynb]": 16.125613112000792,
+    "tests/test_notebooks.py::test_notebook[dowhy_interpreter.ipynb]": 6.770195254999635,
     "tests/test_notebooks.py::test_notebook[dowhy_lalonde_example.ipynb]": 3.583001503997366,
-    "tests/test_notebooks.py::test_notebook[dowhy_mediation_analysis.ipynb]": 3.145783426003618,
-    "tests/test_notebooks.py::test_notebook[dowhy_multiple_treatments.ipynb]": 2.924541534997843,
+    "tests/test_notebooks.py::test_notebook[dowhy_mediation_analysis.ipynb]": 3.3798217410003417,
+    "tests/test_notebooks.py::test_notebook[dowhy_multiple_treatments.ipynb]": 4.499033778000012,
     "tests/test_notebooks.py::test_notebook[dowhy_optimize_backdoor_example.ipynb]": 2.357267290004529,
     "tests/test_notebooks.py::test_notebook[dowhy_ranking_methods.ipynb]": 0.6213478449972172,
     "tests/test_notebooks.py::test_notebook[dowhy_refutation_testing.ipynb]": 60.58798760599893,
-    "tests/test_notebooks.py::test_notebook[dowhy_refuter_notebook.ipynb]": 410.8002465810023,
-    "tests/test_notebooks.py::test_notebook[dowhy_simple_example.ipynb]": 107.64207525600068,
-    "tests/test_notebooks.py::test_notebook[dowhy_twins_example.ipynb]": 514.2033587900005,
-    "tests/test_notebooks.py::test_notebook[gcm_401k_analysis.ipynb]": 38.251963464001165,
-    "tests/test_notebooks.py::test_notebook[gcm_basic_example.ipynb]": 3.863724312999693,
-    "tests/test_notebooks.py::test_notebook[gcm_counterfactual_medical_dry_eyes.ipynb]": 3.8575890449974395,
-    "tests/test_notebooks.py::test_notebook[gcm_draw_samples.ipynb]": 3.8037730070027465,
-    "tests/test_notebooks.py::test_notebook[gcm_rca_microservice_architecture.ipynb]": 153.4997832049994,
-    "tests/test_notebooks.py::test_notebook[gcm_supply_chain_dist_change.ipynb]": 178.38158313500026,
-    "tests/test_notebooks.py::test_notebook[graph_conditional_independence_refuter.ipynb]": 4.069151714997133,
-    "tests/test_notebooks.py::test_notebook[identifying_effects_using_id_algorithm.ipynb]": 2.658960433000175,
+    "tests/test_notebooks.py::test_notebook[dowhy_refuter_notebook.ipynb]": 386.7324736619976,
+    "tests/test_notebooks.py::test_notebook[dowhy_simple_example.ipynb]": 108.74308741299865,
+    "tests/test_notebooks.py::test_notebook[dowhy_twins_example.ipynb]": 497.4862356700014,
+    "tests/test_notebooks.py::test_notebook[gcm_401k_analysis.ipynb]": 39.396144726000784,
+    "tests/test_notebooks.py::test_notebook[gcm_basic_example.ipynb]": 8.232354712001325,
+    "tests/test_notebooks.py::test_notebook[gcm_counterfactual_medical_dry_eyes.ipynb]": 7.381902852001076,
+    "tests/test_notebooks.py::test_notebook[gcm_draw_samples.ipynb]": 8.667402456001582,
+    "tests/test_notebooks.py::test_notebook[gcm_rca_microservice_architecture.ipynb]": 100.56638710100015,
+    "tests/test_notebooks.py::test_notebook[gcm_supply_chain_dist_change.ipynb]": 197.49963043499883,
+    "tests/test_notebooks.py::test_notebook[graph_conditional_independence_refuter.ipynb]": 4.700006441998994,
+    "tests/test_notebooks.py::test_notebook[identifying_effects_using_id_algorithm.ipynb]": 3.2308513859989034,
     "tests/test_notebooks.py::test_notebook[lalonde_pandas_api.ipynb]": 3.8229689540021354,
-    "tests/test_notebooks.py::test_notebook[load_graph_example.ipynb]": 2.4960804089969315,
-    "tests/test_notebooks.py::test_notebook[sensitivity_analysis_testing.ipynb]": 14.854635226998653,
-    "tests/test_notebooks.py::test_notebook[tutorial-causalinference-machinelearning-using-dowhy-econml.ipynb]": 196.1131719360019
+    "tests/test_notebooks.py::test_notebook[load_graph_example.ipynb]": 3.1312964820008347,
+    "tests/test_notebooks.py::test_notebook[sensitivity_analysis_nonparametric_estimators.ipynb]": 28.16567199399833,
+    "tests/test_notebooks.py::test_notebook[sensitivity_analysis_testing.ipynb]": 15.355559879000793,
+    "tests/test_notebooks.py::test_notebook[tutorial-causalinference-machinelearning-using-dowhy-econml.ipynb]": 195.6636093420002
 }
\ No newline at end of file
diff --git a/tests/causal_refuters/test_data_subset_refuter.py b/tests/causal_refuters/test_data_subset_refuter.py
index fb1f34d47..a7bc7a1ff 100644
--- a/tests/causal_refuters/test_data_subset_refuter.py
+++ b/tests/causal_refuters/test_data_subset_refuter.py
@@ -1,5 +1,4 @@
 import numpy as np
-import pytest
 from pytest import mark
 
 from .base import TestRefuter
@@ -12,7 +11,6 @@ class TestDataSubsetRefuter(object):
         refuter_tester = TestRefuter(error_tolerance, estimator_method, "data_subset_refuter")
         refuter_tester.continuous_treatment_testsuite()  # Run both
 
-    @mark.advanced
     @mark.parametrize(["error_tolerance", "estimator_method"], [(0.01, "backdoor.propensity_score_matching")])
     def test_refutation_data_subset_refuter_binary(self, error_tolerance, estimator_method):
         refuter_tester = TestRefuter(error_tolerance, estimator_method, "data_subset_refuter")
diff --git a/tests/gcm/test_anomaly_attribution.py b/tests/gcm/test_anomaly_attribution.py
index 81840c66d..c7c99bf90 100644
--- a/tests/gcm/test_anomaly_attribution.py
+++ b/tests/gcm/test_anomaly_attribution.py
@@ -2,7 +2,7 @@ import networkx as nx
 import numpy as np
 import pandas as pd
 from flaky import flaky
-from pytest import approx, mark
+from pytest import approx
 
 from dowhy.gcm import (
     AdditiveNoiseModel,
@@ -120,7 +120,6 @@ def test_given_simple_gaussian_data_when_attribute_anomaly_scores_with_feature_r
     )
 
 
-@mark.advanced
 @flaky(max_runs=3)
 def test_given_simple_causal_chain_with_linear_relationships_when_attribute_anomaly_scores_with_it_score_then_returns_qualitatively_correct_results():
     num_training_samples = 5000
diff --git a/tests/gcm/test_feature.py b/tests/gcm/test_feature.py
index 2dca440cc..56dd09331 100644
--- a/tests/gcm/test_feature.py
+++ b/tests/gcm/test_feature.py
@@ -2,7 +2,7 @@ import networkx as nx
 import numpy as np
 import pandas as pd
 from flaky import flaky
-from pytest import approx, mark
+from pytest import approx
 from scipy import stats
 
 from dowhy.gcm import (
@@ -44,7 +44,6 @@ def test_when_using_parent_relevance_with_continous_data_then_returns_correct_re
     assert noise == approx(0, abs=0.5)
 
 
-@mark.advanced
 @flaky(max_runs=5)
 def test_when_using_parent_relevance_with_categorical_data_then_returns_correct_results():
     causal_model = StructuralCausalModel(nx.DiGraph([("X0", "Y"), ("X1", "Y"), ("X2", "Y"), ("X3", "Y"), ("X4", "Y")]))
diff --git a/tests/gcm/test_intrinsic_influence.py b/tests/gcm/test_intrinsic_influence.py
index 1d31a8639..689d83994 100644
--- a/tests/gcm/test_intrinsic_influence.py
+++ b/tests/gcm/test_intrinsic_influence.py
@@ -2,7 +2,7 @@ import networkx as nx
 import numpy as np
 import pandas as pd
 from flaky import flaky
-from pytest import approx, mark
+from pytest import approx
 from sklearn.linear_model import LogisticRegression
 
 from dowhy.gcm import StructuralCausalModel, auto, fit, intrinsic_causal_influence
@@ -13,7 +13,6 @@ from dowhy.gcm.uncertainty import estimate_entropy_of_probabilities, estimate_va
 from dowhy.gcm.util.general import apply_one_hot_encoding, fit_one_hot_encoders
 
 
-@mark.advanced
 @flaky(max_runs=3)
 def test_intrinsic_causal_influence_variance_linear():
     causal_model = StructuralCausalModel(nx.DiGraph([("X0", "X1"), ("X1", "X2"), ("X2", "X3")]))
@@ -42,7 +41,6 @@ def test_intrinsic_causal_influence_variance_linear():
     assert np.sum([iccs[key] for key in iccs]) == approx(estimate_variance(X3), abs=0.5)
 
 
-@mark.advanced
 @flaky(max_runs=3)
 def test_intrinsic_causal_influence_categorical():
     causal_model = StructuralCausalModel(nx.DiGraph([("X0", "X1"), ("X1", "X2"), ("X2", "X3")]))
@@ -86,7 +84,6 @@ def test_intrinsic_causal_influence_categorical():
     assert np.sum([iccs[key] for key in iccs]) == approx(-expected_output_empty_subset, abs=0.05)
 
 
-@mark.advanced
 @flaky(max_runs=3)
 def test_intrinsic_causal_influence_categorical_2():
     causal_model = StructuralCausalModel(nx.DiGraph([("X0", "X1"), ("X1", "X2"), ("X2", "X3")]))
@@ -142,7 +139,6 @@ def test_intrinsic_causal_influence_categorical_2():
     assert np.sum([iccs[key] for key in iccs]) == approx(-expected_output_empty_subset, abs=0.05)
 
 
-@mark.advanced
 @flaky(max_runs=3)
 def test_given_only_categorical_data_when_estimate_icc_then_does_not_fail():
     causal_model = StructuralCausalModel(nx.DiGraph([("X0", "X1"), ("X1", "X2"), ("X2", "X3")]))
diff --git a/tests/gcm/test_stats.py b/tests/gcm/test_stats.py
index 7293c9179..e86e42233 100644
--- a/tests/gcm/test_stats.py
+++ b/tests/gcm/test_stats.py
@@ -2,7 +2,7 @@ import numpy as np
 import pytest
 from flaky import flaky
 from numpy.matlib import repmat
-from pytest import approx, mark
+from pytest import approx
 
 from dowhy.gcm.ml import (
     create_hist_gradient_boost_classifier,
@@ -97,7 +97,6 @@ def test_marginal_expectation_returns_reduced_results():
     assert results.shape[1] == 1
 
 
-@mark.advanced
 @flaky(max_runs=5)
 def test_marginal_expectation_independent_continuous_linear():
     X = np.random.normal(0, 1, (1000, 3))
diff --git a/tests/gcm/test_validation.py b/tests/gcm/test_validation.py
index 26ce3cd98..5d9162ad3 100644
--- a/tests/gcm/test_validation.py
+++ b/tests/gcm/test_validation.py
@@ -2,7 +2,6 @@ import networkx as nx
 import numpy as np
 import pandas as pd
 from flaky import flaky
-from pytest import mark
 
 from dowhy.gcm import (
     InvertibleStructuralCausalModel,
@@ -174,7 +173,6 @@ def test_given_non_linear_data_and_correct_dag_when_refute_invertible_model_then
     )
 
 
-@mark.advanced
 @flaky(max_runs=2)
 def test_given_non_linear_data_and_incorrect_dag_when_refute_invertible_model_then_reject_model():
     data = _generate_simple_non_linear_data()
@@ -201,7 +199,6 @@ def test_given_non_linear_data_and_incorrect_dag_when_refute_invertible_model_th
     )
 
 
-@mark.advanced
 @flaky(max_runs=3)
 def test_given_non_linear_data_and_incorrect_dag_with_collider_when_refute_invertible_model_then_reject_model():
     data = _generate_simple_non_linear_data()
diff --git a/tests/test_notebooks.py b/tests/test_notebooks.py
index 9a39b7cf3..024293e7a 100644
--- a/tests/test_notebooks.py
+++ b/tests/test_notebooks.py
@@ -19,13 +19,14 @@ advanced_notebooks = [
     "dowhy_optimize_backdoor_example.ipynb",
     # applied notebook, not necessary to test each time
     "dowhy_ranking_methods.ipynb",
+    # needs xgboost too
+    "DoWhy-The Causal Story Behind Hotel Booking Cancellations.ipynb",
     #
     # Slow Notebooks
     #
     "tutorial-causalinference-machinelearning-using-dowhy-econml.ipynb",
     "dowhy-conditional-treatment-effects.ipynb",
     "dowhy_refuter_notebook.ipynb",
-    "DoWhy-The Causal Story Behind Hotel Booking Cancellations.ipynb",  # needs xgboost too
     "dowhy_twins_example.ipynb",
     "gcm_rca_microservice_architecture.ipynb",
     "gcm_supply_chain_dist_change.ipynb",