Fixed dimensionality bug in bootstrap testing for dummy outcome refuter and updated tests (#203)

* Updated bug with statistical testing for dummy outcome refuter * updated error tolerance for dummy outcomes test
2020-12-05 17:28:51 +05:30 · 2020-12-05 17:28:51 +05:30 · 58a8924e85
--- a/docs/source/example_notebooks/dowhy_demo_dummy_outcome_refuter.ipynb
+++ b/docs/source/example_notebooks/dowhy_demo_dummy_outcome_refuter.ipynb
--- a/dowhy/causal_refuters/dummy_outcome_refuter.py
+++ b/dowhy/causal_refuters/dummy_outcome_refuter.py
@ -326,7 +326,7 @@ class DummyOutcomeRefuter(CausalRefuter):
                    )

            refute.add_significance_test_results(
-                self.test_significance(dummy_estimate, simulation_results)
+                self.test_significance(dummy_estimate, np.ravel(simulation_results))
            )

            refute.add_refuter(self)
@ -354,7 +354,6 @@ class DummyOutcomeRefuter(CausalRefuter):

                refute.add_refuter(self)
                refute_list.append(refute)
-
        return refute_list

    def process_data(self, X_train, outcome_train, X_validation, outcome_validation, transformation_list):
--- a/tests/causal_refuters/base.py
+++ b/tests/causal_refuters/base.py
@ -4,7 +4,7 @@ import logging

 class TestRefuter(object):
    def __init__(self, error_tolerance, estimator_method, refuter_method,
-            transformations=None, params = None, confounders_effect_on_t=None, 
+            transformations=None, params = None, confounders_effect_on_t=None,
            confounders_effect_on_y=None, effect_strength_on_t=None,
            effect_strength_on_y=None, **kwargs):
        self._error_tolerance = error_tolerance
@ -17,7 +17,7 @@ class TestRefuter(object):
        self.confounders_effect_on_y = confounders_effect_on_y
        self.effect_strength_on_t = effect_strength_on_t
        self.effect_strength_on_y = effect_strength_on_y
-        
+
        if 'logging_level' in kwargs:
            logging.basicConfig(level=kwargs['logging_level'])
        else:
@ -28,7 +28,7 @@ class TestRefuter(object):

    def null_refutation_test(self, data=None, dataset="linear", beta=10,
            num_common_causes=1, num_instruments=1, num_samples=100000,
-            treatment_is_binary=True):
+            treatment_is_binary=True, num_dummyoutcome_simulations=None):
        # Supports user-provided dataset object
        if data is None:
            data = dowhy.datasets.linear_dataset(beta=beta,
@ -76,7 +76,7 @@ class TestRefuter(object):
                effect_strength_on_treatment = 0,
                effect_strength_on_outcome = 0)
            error = abs(refute.new_effect - ate_estimate.value)
-            
+
            print("Error in refuted estimate = {0} with tolerance {1}%. Estimated={2},After Refutation={3}".format(
                error, self._error_tolerance * 100, ate_estimate.value, refute.new_effect)
            )
@ -85,44 +85,44 @@ class TestRefuter(object):

        elif self.refuter_method == "placebo_treatment_refuter":
            if treatment_is_binary is True:
-                ref = model.refute_estimate(target_estimand, 
+                ref = model.refute_estimate(target_estimand,
                                        ate_estimate,
                                        method_name=self.refuter_method,
                                        num_simulations=10
                                        )
            else:
-                ref = model.refute_estimate(target_estimand, 
+                ref = model.refute_estimate(target_estimand,
                                            ate_estimate,
                                            method_name=self.refuter_method
                                            )
            # This value is hardcoded to be zero as we are runnning this on a linear dataset.
            # Ordinarily, we should expect this value to be zero.
            EXPECTED_PLACEBO_VALUE = 0
-            
+
            error =  abs(ref.new_effect - EXPECTED_PLACEBO_VALUE)

-            print("Error in the refuted estimate = {0} with tolerence {1}%. Expected Value={2}, After Refutation={3}".format(
-                error, self._error_tolerance * 100, EXPECTED_PLACEBO_VALUE, ref.new_effect)
+            print("Error in the refuted estimate = {0} with tolerence {1}. Expected Value={2}, After Refutation={3}".format(
+                error, self._error_tolerance, EXPECTED_PLACEBO_VALUE, ref.new_effect)
            )

            print(ref)

            res = True if (error <  self._error_tolerance) else False
            assert res
-            
+
        elif self.refuter_method == "data_subset_refuter":
            if treatment_is_binary is True:
-                ref = model.refute_estimate(target_estimand, 
+                ref = model.refute_estimate(target_estimand,
                                        ate_estimate,
                                        method_name=self.refuter_method,
                                        num_simulations=5
                                        )
            else:
-                ref = model.refute_estimate(target_estimand, 
+                ref = model.refute_estimate(target_estimand,
                                            ate_estimate,
                                            method_name=self.refuter_method
                                            )
-            
+
            error =  abs(ref.new_effect - ate_estimate.value)

            print("Error in the refuted estimate = {0} with tolerence {1}%. Estimated={2}, After Refutation={3}".format(
@ -133,20 +133,20 @@ class TestRefuter(object):

            res = True if (error <  abs(ate_estimate.value)*self._error_tolerance) else False
            assert res
-        
+
        elif self.refuter_method == "bootstrap_refuter":
            if treatment_is_binary is True:
-                ref = model.refute_estimate(target_estimand, 
+                ref = model.refute_estimate(target_estimand,
                                        ate_estimate,
                                        method_name=self.refuter_method,
                                        num_simulations=5
                                        )
            else:
-                ref = model.refute_estimate(target_estimand, 
+                ref = model.refute_estimate(target_estimand,
                                            ate_estimate,
                                            method_name=self.refuter_method
                                            )
-            
+
            error =  abs(ref.new_effect - ate_estimate.value)

            print("Error in the refuted estimate = {0} with tolerence {1}%. Estimated={2}, After Refutation={3}".format(
@ -163,7 +163,7 @@ class TestRefuter(object):
                ref_list = model.refute_estimate(target_estimand,
                                            ate_estimate,
                                            method_name=self.refuter_method,
-                                            num_simulations = 2
+                                            num_simulations = num_dummyoutcome_simulations
                                            )
            else:
                ref_list = model.refute_estimate(target_estimand,
@ -171,7 +171,7 @@ class TestRefuter(object):
                                            method_name=self.refuter_method,
                                            transformation_list = self.transformations,
                                            params = self.params,
-                                            num_simulations = 2
+                                            num_simulations = num_dummyoutcome_simulations
                                            )

            INDEX = 0
@ -183,27 +183,36 @@ class TestRefuter(object):

            error = abs( ref.new_effect - EXPECTED_DUMMY_OUTCOME_VALUE)

-            print("Error in the refuted estimate = {0} with tolerence {1}%. Expected Value={2}, After Refutation={3}".format(
-                error, self._error_tolerance * 100, EXPECTED_DUMMY_OUTCOME_VALUE, ref.new_effect)
+            print("Error in the refuted estimate = {0} with tolerence {1}. Expected Value={2}, After Refutation={3}".format(
+                error, self._error_tolerance, EXPECTED_DUMMY_OUTCOME_VALUE, ref.new_effect)
            )

            print(ref)
-    
-            res = True if (error <  abs(ate_estimate.value)*self._error_tolerance) else False
-            assert res
- 
-    def binary_treatment_testsuite(self, num_samples=100000,num_common_causes=1,tests_to_run="all"):
-        self.null_refutation_test(num_common_causes=num_common_causes,num_samples=num_samples)
-        if tests_to_run != "atleast-one-common-cause":
-            self.null_refutation_test(num_common_causes=0,num_samples=num_samples)

-    def continuous_treatment_testsuite(self, num_samples=100000,num_common_causes=1,tests_to_run="all"):
+            res = True if (error <  self._error_tolerance) else False
+            assert res
+
+    def binary_treatment_testsuite(self, num_samples=100000,num_common_causes=1,tests_to_run="all",
+            num_dummyoutcome_simulations=2):
+        self.null_refutation_test(num_common_causes=num_common_causes,
+                num_samples=num_samples,
+                num_dummyoutcome_simulations=num_dummyoutcome_simulations)
+        if tests_to_run != "atleast-one-common-cause":
+            self.null_refutation_test(num_common_causes=0,
+                    num_samples=num_samples,
+                    num_dummyoutcome_simulations=num_dummyoutcome_simulations)
+
+    def continuous_treatment_testsuite(self, num_samples=100000,
+            num_common_causes=1,tests_to_run="all",
+            num_dummyoutcome_simulations=2):
        self.null_refutation_test(
            num_common_causes=num_common_causes,num_samples=num_samples,
-            treatment_is_binary=False)
+            treatment_is_binary=False,
+            num_dummyoutcome_simulations=num_dummyoutcome_simulations)
        if tests_to_run != "atleast-one-common-cause":
            self.null_refutation_test(num_common_causes=0, num_samples=num_samples,
-                    treatment_is_binary=False)
-    
-        
+                    treatment_is_binary=False,
+                    num_dummyoutcome_simulations=num_dummyoutcome_simulations)
+
+

--- a/tests/causal_refuters/test_dummy_outcome_refuter.py
+++ b/tests/causal_refuters/test_dummy_outcome_refuter.py
@ -1,194 +1,194 @@
-import pytest
-import numpy as np
-import pdb
-from .base import TestRefuter
-
-def simple_linear_outcome_model(X_train, output_train):
-    # The outcome is a linear function of the confounder
-    # The slope is 1,2 and the intercept is 3
-    return lambda X_train:  X_train[:,0] + 2*X_train[:,1] + 3
-    
-
-@pytest.mark.usefixtures("fixed_seed")
-class TestDummyOutcomeRefuter(object):
-    @pytest.mark.parametrize(["error_tolerence","estimator_method"],
-                             [(0.03, "iv.instrumental_variable")])
-    def test_refutation_dummy_outcome_refuter_default_continuous_treatment(self, error_tolerence, estimator_method):
-        refuter_tester = TestRefuter(error_tolerence, estimator_method, "dummy_outcome_refuter")
-        refuter_tester.continuous_treatment_testsuite()
-
-    @pytest.mark.parametrize(["error_tolerence","estimator_method","num_samples"],
-                             [(0.03, "backdoor.propensity_score_matching",1000)])
-    def test_refutation_dummy_outcome_refuter_default_binary_treatment(self, error_tolerence, estimator_method, num_samples):
-        refuter_tester = TestRefuter(error_tolerence, estimator_method, "dummy_outcome_refuter")
-        refuter_tester.binary_treatment_testsuite(tests_to_run="atleast-one-common-cause", num_samples=num_samples)
-
-    @pytest.mark.parametrize(["error_tolerence","estimator_method", "transformations"],
-                             [(0.03, "iv.instrumental_variable", [("zero",""),("noise", {'std_dev': 1} )] )] )
-    def test_refutation_dummy_outcome_refuter_randomly_generated_continuous_treatment(self, error_tolerence, estimator_method, transformations):
-        refuter_tester = TestRefuter(error_tolerence,
-                                    estimator_method, 
-                                    "dummy_outcome_refuter", 
-                                    transformations=transformations)
-
-        refuter_tester.continuous_treatment_testsuite()
-
-    @pytest.mark.parametrize(["error_tolerence","estimator_method", "transformations","num_samples"],
-                             [(0.03, "backdoor.propensity_score_matching", [("zero",""),("noise", {'std_dev': 1} )], 1000 )] )
-    def test_refutation_dummy_outcome_refuter_randomly_generated_binary_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
-        refuter_tester = TestRefuter(error_tolerence,
-                                    estimator_method, 
-                                    "dummy_outcome_refuter", 
-                                    transformations=transformations)
-
-        refuter_tester.binary_treatment_testsuite(tests_to_run="atleast-one-common-cause", num_samples=num_samples)
-
-    @pytest.mark.parametrize(["error_tolerence", "estimator_method", "transformations"],
-                            [(0.03, "iv.instrumental_variable", [("permute", {'permute_fraction' :1} )] )] )
-    def test_refutation_dummy_outcome_refuter_permute_data_continuous_treatment(self, error_tolerence, estimator_method, transformations):
-        refuter_tester = TestRefuter(error_tolerence,
-                                    estimator_method,
-                                    "dummy_outcome_refuter",
-                                    transformations=transformations)
-
-        refuter_tester.continuous_treatment_testsuite()
-
-    @pytest.mark.parametrize(["error_tolerence", "estimator_method", "transformations","num_samples"],
-                            [(0.05, "backdoor.propensity_score_matching", [("permute", {'permute_fraction' :1} )], 1000 )] )
-    def test_refutation_dummy_outcome_refuter_permute_data_binary_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
-        refuter_tester = TestRefuter(error_tolerence,
-                                    estimator_method,
-                                    "dummy_outcome_refuter",
-                                    transformations=transformations)
-
-        refuter_tester.binary_treatment_testsuite(tests_to_run="atleast-one-common-cause", num_samples=num_samples)
-
-    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations"],
-                             [(0.03, "iv.instrumental_variable",[(simple_linear_outcome_model, {}), ("noise", {'std_dev': 1} )] )])
-    def test_refutation_dummy_outcome_refuter_custom_function_linear_regression_with_noise_continuous_treatment(self, error_tolerence, estimator_method, transformations):
-        refuter_tester = TestRefuter(error_tolerence, 
-                                    estimator_method, 
-                                    "dummy_outcome_refuter",
-                                    transformations=transformations)
-        refuter_tester.continuous_treatment_testsuite(tests_to_run="atleast-one-common-cause")
-
-    @pytest.mark.xfail
-    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations","num_samples"],
-                             [(0.03, "backdoor.propensity_score_matching",[(simple_linear_outcome_model, {}), ("noise", {'std_dev': 1} )], 1000 )])
-    def test_refutation_dummy_outcome_refuter_custom_function_linear_regression_with_noise_binary_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
-        refuter_tester = TestRefuter(error_tolerence, 
-                                    estimator_method, 
-                                    "dummy_outcome_refuter",
-                                    transformations=transformations)
-        refuter_tester.binary_treatment_testsuite(tests_to_run="atleast-one-common-cause", num_samples=num_samples)
-
-    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations"],
-                             [(0.03, "iv.instrumental_variable",[("permute", {'permute_fraction':0.5}),(simple_linear_outcome_model, {}), ("noise", {'std_dev': 1} )] )])
-    def test_refutation_dummy_outcome_refuter_custom_function_linear_regression_with_permute_continuous_treatment(self, error_tolerence, estimator_method, transformations):
-        refuter_tester = TestRefuter(error_tolerence, 
-                                    estimator_method, 
-                                    "dummy_outcome_refuter",
-                                    transformations=transformations)
-        refuter_tester.continuous_treatment_testsuite(tests_to_run="atleast-one-common-cause")
-
-    @pytest.mark.xfail
-    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations","num_samples"],
-                             [(0.03, "backdoor.propensity_score_matching",[("permute", {'permute_fraction':0.5}),(simple_linear_outcome_model, {}), ("noise", {'std_dev': 1} )], 1000 )])
-    def test_refutation_dummy_outcome_refuter_custom_function_linear_regression_with_permute_binary_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
-        refuter_tester = TestRefuter(error_tolerence, 
-                                    estimator_method, 
-                                    "dummy_outcome_refuter",
-                                    transformations=transformations)
-        refuter_tester.binary_treatment_testsuite(tests_to_run="atleast-one-common-cause", num_samples=num_samples)
-
-    
-    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations"],
-                             [(0.01, "iv.instrumental_variable",[("linear_regression",{}) , ("zero",""), ("noise", {'std_dev': 1} )] )])
-    def test_refutation_dummy_outcome_refuter_internal_linear_regression_continuous_treatment(self, error_tolerence, estimator_method, transformations):
-        refuter_tester = TestRefuter(error_tolerence, 
-                                    estimator_method, 
-                                    "dummy_outcome_refuter",
-                                    transformations=transformations)
-        refuter_tester.continuous_treatment_testsuite(tests_to_run="atleast-one-common-cause")
-
-    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations","num_samples"],
-                             [(0.01, "backdoor.propensity_score_matching",[("linear_regression",{}) , ("zero",""), ("noise", {'std_dev': 1} )], 1000 )])
-    def test_refutation_dummy_outcome_refuter_internal_linear_regression_binary_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
-        refuter_tester = TestRefuter(error_tolerence, 
-                                    estimator_method, 
-                                    "dummy_outcome_refuter",
-                                    transformations=transformations)
-        refuter_tester.binary_treatment_testsuite(tests_to_run="atleast-one-common-cause", num_samples=num_samples)
-
-    @pytest.mark.parametrize(["error_tolerence","estimator_method", "transformations"],
-                             [(0.01, "iv.instrumental_variable",[("knn",{'n_neighbors':5}), ("zero",""), ("noise", {'std_dev': 1} )] )])
-    def test_refutation_dummy_outcome_refuter_internal_knn_continuous_treatment(self, error_tolerence, estimator_method, transformations):
-        refuter_tester = TestRefuter(error_tolerence, 
-                                    estimator_method, 
-                                    "dummy_outcome_refuter",
-                                    transformations=transformations)
-        refuter_tester.continuous_treatment_testsuite(tests_to_run="atleast-one-common-cause")
-
-    @pytest.mark.parametrize(["error_tolerence","estimator_method", "transformations","num_samples"],
-                             [(0.01, "backdoor.propensity_score_matching",[("knn",{'n_neighbors':5}), ("zero",""), ("noise", {'std_dev': 1} )], 1000 )])
-    def test_refutation_dummy_outcome_refuter_internal_knn_binary_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
-        refuter_tester = TestRefuter(error_tolerence, 
-                                    estimator_method, 
-                                    "dummy_outcome_refuter",
-                                    transformations=transformations)
-        refuter_tester.binary_treatment_testsuite(tests_to_run="atleast-one-common-cause", num_samples=num_samples)
-    
-    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations", "num_samples"],
-                             [(0.01, "iv.instrumental_variable",[("svm",{'C':1,'epsilon':0.2}), ("zero",""), ("noise", {'std_dev': 1} )], 10000 )])
-    def test_refutation_dummy_outcome_refuter_internal_svm_continuous_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
-        refuter_tester = TestRefuter(error_tolerence, 
-                                    estimator_method, 
-                                    "dummy_outcome_refuter",
-                                    transformations=transformations)
-        refuter_tester.continuous_treatment_testsuite(num_samples=num_samples, tests_to_run="atleast-one-common-cause")
-
-    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations", "num_samples"],
-                             [(0.01, "backdoor.propensity_score_matching",[("svm",{'C':1,'epsilon':0.2}), ("zero",""), ("noise", {'std_dev': 1} )], 1000 )])
-    def test_refutation_dummy_outcome_refuter_internal_svm_binary_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
-        refuter_tester = TestRefuter(error_tolerence, 
-                                    estimator_method, 
-                                    "dummy_outcome_refuter",
-                                    transformations=transformations)
-        refuter_tester.binary_treatment_testsuite(num_samples=num_samples, tests_to_run="atleast-one-common-cause")
-
-    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations","num_samples"],
-                             [(0.01, "iv.instrumental_variable",[("random_forest",{'max_depth':20}), ("zero",""), ("noise", {'std_dev': 1} )], 10000)])
-    def test_refutation_dummy_outcome_refuter_internal_random_forest_continuous_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
-        refuter_tester = TestRefuter(error_tolerence, 
-                                    estimator_method, 
-                                    "dummy_outcome_refuter",
-                                    transformations=transformations)
-        refuter_tester.continuous_treatment_testsuite(num_samples,tests_to_run="atleast-one-common-cause")
-
-    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations","num_samples"],
-                             [(0.01, "backdoor.propensity_score_matching",[("random_forest",{'max_depth':20}), ("zero",""), ("noise", {'std_dev': 1} )], 1000)])
-    def test_refutation_dummy_outcome_refuter_internal_random_forest_binary_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
-        refuter_tester = TestRefuter(error_tolerence, 
-                                    estimator_method, 
-                                    "dummy_outcome_refuter",
-                                    transformations=transformations)
-        refuter_tester.binary_treatment_testsuite(num_samples,tests_to_run="atleast-one-common-cause")
-
-    # As we run with only one common cause and one instrument variable we run with (?, 2)
-    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations"],
-                             [(0.01, "iv.instrumental_variable",[("neural_network",{'solver':'lbfgs', 'alpha':1e-5, 'hidden_layer_sizes':(5,2)}), ("zero",""), ("noise", {'std_dev': 1} )]  )])
-    def test_refutation_dummy_outcome_refuter_internal_neural_network_continuous_treatment(self, error_tolerence, estimator_method, transformations):
-        refuter_tester = TestRefuter(error_tolerence, 
-                                    estimator_method, 
-                                    "dummy_outcome_refuter",
-                                    transformations=transformations)
-        refuter_tester.continuous_treatment_testsuite(tests_to_run="atleast-one-common-cause")
-
-    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations","num_samples"],
-                             [(0.01, "backdoor.propensity_score_matching",[("neural_network",{'solver':'lbfgs', 'alpha':1e-5, 'hidden_layer_sizes':(5,2)}), ("zero",""), ("noise", {'std_dev': 1} )], 1000  )])
-    def test_refutation_dummy_outcome_refuter_internal_neural_network_binary_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
-        refuter_tester = TestRefuter(error_tolerence, 
-                                    estimator_method, 
-                                    "dummy_outcome_refuter",
-                                    transformations=transformations)
-        refuter_tester.binary_treatment_testsuite(num_samples=num_samples, tests_to_run="atleast-one-common-cause")
+import pytest
+import numpy as np
+import pdb
+from .base import TestRefuter
+
+def simple_linear_outcome_model(X_train, output_train):
+    # The outcome is a linear function of the confounder
+    # The slope is 1,2 and the intercept is 3
+    return lambda X_train:  X_train[:,0] + 2*X_train[:,1] + 3
+
+
+@pytest.mark.usefixtures("fixed_seed")
+class TestDummyOutcomeRefuter(object):
+    @pytest.mark.parametrize(["error_tolerence","estimator_method"],
+                             [(0.03, "iv.instrumental_variable")])
+    def test_refutation_dummy_outcome_refuter_default_continuous_treatment(self, error_tolerence, estimator_method):
+        refuter_tester = TestRefuter(error_tolerence, estimator_method, "dummy_outcome_refuter")
+        refuter_tester.continuous_treatment_testsuite(num_dummyoutcome_simulations=100)
+
+    @pytest.mark.parametrize(["error_tolerence","estimator_method","num_samples"],
+                             [(0.1, "backdoor.propensity_score_matching",1000)])
+    def test_refutation_dummy_outcome_refuter_default_binary_treatment(self, error_tolerence, estimator_method, num_samples):
+        refuter_tester = TestRefuter(error_tolerence, estimator_method, "dummy_outcome_refuter")
+        refuter_tester.binary_treatment_testsuite(tests_to_run="atleast-one-common-cause", num_samples=num_samples)
+
+    @pytest.mark.parametrize(["error_tolerence","estimator_method", "transformations"],
+                             [(0.05, "iv.instrumental_variable", [("zero",""),("noise", {'std_dev': 1} )] )] )
+    def test_refutation_dummy_outcome_refuter_randomly_generated_continuous_treatment(self, error_tolerence, estimator_method, transformations):
+        refuter_tester = TestRefuter(error_tolerence,
+                                    estimator_method,
+                                    "dummy_outcome_refuter",
+                                    transformations=transformations)
+
+        refuter_tester.continuous_treatment_testsuite()
+
+    @pytest.mark.parametrize(["error_tolerence","estimator_method", "transformations","num_samples"],
+                             [(0.05, "backdoor.propensity_score_matching", [("zero",""),("noise", {'std_dev': 1} )], 1000 )] )
+    def test_refutation_dummy_outcome_refuter_randomly_generated_binary_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
+        refuter_tester = TestRefuter(error_tolerence,
+                                    estimator_method,
+                                    "dummy_outcome_refuter",
+                                    transformations=transformations)
+
+        refuter_tester.binary_treatment_testsuite(tests_to_run="atleast-one-common-cause", num_samples=num_samples)
+
+    @pytest.mark.parametrize(["error_tolerence", "estimator_method", "transformations"],
+                            [(0.03, "iv.instrumental_variable", [("permute", {'permute_fraction' :1} )] )] )
+    def test_refutation_dummy_outcome_refuter_permute_data_continuous_treatment(self, error_tolerence, estimator_method, transformations):
+        refuter_tester = TestRefuter(error_tolerence,
+                                    estimator_method,
+                                    "dummy_outcome_refuter",
+                                    transformations=transformations)
+
+        refuter_tester.continuous_treatment_testsuite()
+
+    @pytest.mark.parametrize(["error_tolerence", "estimator_method", "transformations","num_samples"],
+                            [(0.1, "backdoor.linear_regression", [("permute", {'permute_fraction' :1} )], 1000 )] )
+    def test_refutation_dummy_outcome_refuter_permute_data_binary_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
+        refuter_tester = TestRefuter(error_tolerence,
+                                    estimator_method,
+                                    "dummy_outcome_refuter",
+                                    transformations=transformations)
+
+        refuter_tester.binary_treatment_testsuite(tests_to_run="atleast-one-common-cause", num_samples=num_samples)
+
+    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations"],
+                             [(0.2, "iv.instrumental_variable",[(simple_linear_outcome_model, {}), ("noise", {'std_dev': 1} )] )])
+    def test_refutation_dummy_outcome_refuter_custom_function_linear_regression_with_noise_continuous_treatment(self, error_tolerence, estimator_method, transformations):
+        refuter_tester = TestRefuter(error_tolerence,
+                                    estimator_method,
+                                    "dummy_outcome_refuter",
+                                    transformations=transformations)
+        refuter_tester.continuous_treatment_testsuite(tests_to_run="atleast-one-common-cause")
+
+    @pytest.mark.xfail
+    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations","num_samples"],
+                             [(0.2, "backdoor.linear_regression",[(simple_linear_outcome_model, {}), ("noise", {'std_dev': 1} )], 1000 )])
+    def test_refutation_dummy_outcome_refuter_custom_function_linear_regression_with_noise_binary_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
+        refuter_tester = TestRefuter(error_tolerence,
+                                    estimator_method,
+                                    "dummy_outcome_refuter",
+                                    transformations=transformations)
+        refuter_tester.binary_treatment_testsuite(tests_to_run="atleast-one-common-cause", num_samples=num_samples)
+
+    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations"],
+                             [(0.2, "backdoor.linear_regression",[("permute", {'permute_fraction':0.5}),(simple_linear_outcome_model, {}), ("noise", {'std_dev': 1} )] )])
+    def test_refutation_dummy_outcome_refuter_custom_function_linear_regression_with_permute_continuous_treatment(self, error_tolerence, estimator_method, transformations):
+        refuter_tester = TestRefuter(error_tolerence,
+                                    estimator_method,
+                                    "dummy_outcome_refuter",
+                                    transformations=transformations)
+        refuter_tester.continuous_treatment_testsuite(tests_to_run="atleast-one-common-cause")
+
+    @pytest.mark.xfail
+    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations","num_samples"],
+                             [(0.2, "backdoor.propensity_score_matching",[("permute", {'permute_fraction':0.5}),(simple_linear_outcome_model, {}), ("noise", {'std_dev': 1} )], 1000 )])
+    def test_refutation_dummy_outcome_refuter_custom_function_linear_regression_with_permute_binary_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
+        refuter_tester = TestRefuter(error_tolerence,
+                                    estimator_method,
+                                    "dummy_outcome_refuter",
+                                    transformations=transformations)
+        refuter_tester.binary_treatment_testsuite(tests_to_run="atleast-one-common-cause", num_samples=num_samples)
+
+
+    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations"],
+                             [(0.01, "iv.instrumental_variable",[("linear_regression",{}) , ("zero",""), ("noise", {'std_dev': 1} )] )])
+    def test_refutation_dummy_outcome_refuter_internal_linear_regression_continuous_treatment(self, error_tolerence, estimator_method, transformations):
+        refuter_tester = TestRefuter(error_tolerence,
+                                    estimator_method,
+                                    "dummy_outcome_refuter",
+                                    transformations=transformations)
+        refuter_tester.continuous_treatment_testsuite(tests_to_run="atleast-one-common-cause")
+
+    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations","num_samples"],
+                             [(0.2, "backdoor.propensity_score_matching",[("linear_regression",{}) , ("zero",""), ("noise", {'std_dev': 1} )], 1000 )])
+    def test_refutation_dummy_outcome_refuter_internal_linear_regression_binary_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
+        refuter_tester = TestRefuter(error_tolerence,
+                                    estimator_method,
+                                    "dummy_outcome_refuter",
+                                    transformations=transformations)
+        refuter_tester.binary_treatment_testsuite(tests_to_run="atleast-one-common-cause", num_samples=num_samples)
+
+    @pytest.mark.parametrize(["error_tolerence","estimator_method", "transformations"],
+                             [(0.2, "iv.instrumental_variable",[("knn",{'n_neighbors':5}), ("zero",""), ("noise", {'std_dev': 1} )] )])
+    def test_refutation_dummy_outcome_refuter_internal_knn_continuous_treatment(self, error_tolerence, estimator_method, transformations):
+        refuter_tester = TestRefuter(error_tolerence,
+                                    estimator_method,
+                                    "dummy_outcome_refuter",
+                                    transformations=transformations)
+        refuter_tester.continuous_treatment_testsuite(tests_to_run="atleast-one-common-cause")
+
+    @pytest.mark.parametrize(["error_tolerence","estimator_method", "transformations","num_samples"],
+                             [(0.2, "backdoor.propensity_score_matching",[("knn",{'n_neighbors':5}), ("zero",""), ("noise", {'std_dev': 1} )], 1000 )])
+    def test_refutation_dummy_outcome_refuter_internal_knn_binary_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
+        refuter_tester = TestRefuter(error_tolerence,
+                                    estimator_method,
+                                    "dummy_outcome_refuter",
+                                    transformations=transformations)
+        refuter_tester.binary_treatment_testsuite(tests_to_run="atleast-one-common-cause", num_samples=num_samples)
+
+    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations", "num_samples"],
+                             [(0.01, "iv.instrumental_variable",[("svm",{'C':1,'epsilon':0.2}), ("zero",""), ("noise", {'std_dev': 1} )], 10000 )])
+    def test_refutation_dummy_outcome_refuter_internal_svm_continuous_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
+        refuter_tester = TestRefuter(error_tolerence,
+                                    estimator_method,
+                                    "dummy_outcome_refuter",
+                                    transformations=transformations)
+        refuter_tester.continuous_treatment_testsuite(num_samples=num_samples, tests_to_run="atleast-one-common-cause")
+
+    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations", "num_samples"],
+                             [(0.1, "backdoor.propensity_score_matching",[("svm",{'C':1,'epsilon':0.2}), ("zero",""), ("noise", {'std_dev': 1} )], 1000 )])
+    def test_refutation_dummy_outcome_refuter_internal_svm_binary_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
+        refuter_tester = TestRefuter(error_tolerence,
+                                    estimator_method,
+                                    "dummy_outcome_refuter",
+                                    transformations=transformations)
+        refuter_tester.binary_treatment_testsuite(num_samples=num_samples, tests_to_run="atleast-one-common-cause")
+
+    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations","num_samples"],
+                             [(0.01, "iv.instrumental_variable",[("random_forest",{'max_depth':20}), ("zero",""), ("noise", {'std_dev': 1} )], 10000)])
+    def test_refutation_dummy_outcome_refuter_internal_random_forest_continuous_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
+        refuter_tester = TestRefuter(error_tolerence,
+                                    estimator_method,
+                                    "dummy_outcome_refuter",
+                                    transformations=transformations)
+        refuter_tester.continuous_treatment_testsuite(num_samples,tests_to_run="atleast-one-common-cause")
+
+    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations","num_samples"],
+                             [(0.1, "backdoor.propensity_score_matching",[("random_forest",{'max_depth':20}), ("zero",""), ("noise", {'std_dev': 1} )], 1000)])
+    def test_refutation_dummy_outcome_refuter_internal_random_forest_binary_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
+        refuter_tester = TestRefuter(error_tolerence,
+                                    estimator_method,
+                                    "dummy_outcome_refuter",
+                                    transformations=transformations)
+        refuter_tester.binary_treatment_testsuite(num_samples,tests_to_run="atleast-one-common-cause")
+
+    # As we run with only one common cause and one instrument variable we run with (?, 2)
+    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations"],
+                             [(0.01, "iv.instrumental_variable",[("neural_network",{'solver':'lbfgs', 'alpha':1e-5, 'hidden_layer_sizes':(5,2)}), ("zero",""), ("noise", {'std_dev': 1} )]  )])
+    def test_refutation_dummy_outcome_refuter_internal_neural_network_continuous_treatment(self, error_tolerence, estimator_method, transformations):
+        refuter_tester = TestRefuter(error_tolerence,
+                                    estimator_method,
+                                    "dummy_outcome_refuter",
+                                    transformations=transformations)
+        refuter_tester.continuous_treatment_testsuite(tests_to_run="atleast-one-common-cause")
+
+    @pytest.mark.parametrize(["error_tolerence","estimator_method","transformations","num_samples"],
+                             [(0.1, "backdoor.propensity_score_matching",[("neural_network",{'solver':'lbfgs', 'alpha':1e-5, 'hidden_layer_sizes':(5,2)}), ("zero",""), ("noise", {'std_dev': 1} )], 1000  )])
+    def test_refutation_dummy_outcome_refuter_internal_neural_network_binary_treatment(self, error_tolerence, estimator_method, transformations, num_samples):
+        refuter_tester = TestRefuter(error_tolerence,
+                                    estimator_method,
+                                    "dummy_outcome_refuter",
+                                    transformations=transformations)
+        refuter_tester.binary_treatment_testsuite(num_samples=num_samples, tests_to_run="atleast-one-common-cause")