Optimize backdoor criterion identification for large graphs (#342)

* Used the d-separation algorithm from networkx * Updated backdoor identification to be faster * Refactor code for the new function * Changed the logic for unobserved confounding to simply remove the unobserved confounder when returning a backdoor set. Simplifies the logic.
2022-01-03 09:10:16 +05:30 · 2022-01-03 09:10:16 +05:30 · 8452f3948c
--- a/docs/source/example_notebooks/dowhy_ranking_methods.ipynb
+++ b/docs/source/example_notebooks/dowhy_ranking_methods.ipynb
--- a/dowhy/causal_graph.py
+++ b/dowhy/causal_graph.py
@ -159,8 +159,11 @@ class CausalGraph:
                if node_name not in common_cause_names:
                    for outcome in self.outcome_name:
                        self._graph.add_node(node_name, observed="yes")
-                        self._graph.add_edge(node_name, outcome, style = "dotted", headport="s", tailport="n")
-                        self._graph.add_edge(outcome, node_name, style = "dotted", headport="n", tailport="s") # TODO make the ports more general so that they apply not just to top-bottom node configurations
+                        # Assuming the simple form of effect modifier
+                        # that directly causes the outcome.
+                        self._graph.add_edge(node_name, outcome)
+                        #self._graph.add_edge(node_name, outcome, style = "dotted", headport="s", tailport="n")
+                        #self._graph.add_edge(outcome, node_name, style = "dotted", headport="n", tailport="s") # TODO make the ports more general so that they apply not just to top-bottom node configurations
        if mediator_names is not None:
            for node_name in mediator_names:
                for treatment, outcome in itertools.product(self.treatment_name, self.outcome_name):
@ -209,6 +212,7 @@ class CausalGraph:

    def do_surgery(self, node_names, remove_outgoing_edges=False,
                   remove_incoming_edges=False):
+        node_names = parse_state(node_names)
        new_graph = self._graph.copy()
        for node_name in node_names:
            if remove_outgoing_edges:
@ -236,15 +240,39 @@ class CausalGraph:
            causes = causes.union(self.get_ancestors(v, new_graph=new_graph))
        return causes

-    def check_valid_backdoor_set(self, nodes1, nodes2, nodes3, backdoor_paths=None):
+    def check_dseparation(self, nodes1, nodes2, nodes3, new_graph=None,
+            dseparation_algo="default"):
+        if dseparation_algo == "default":
+            if new_graph is None:
+                new_graph = self._graph
+            dseparated = nx.algorithms.d_separated(new_graph,
+                    set(nodes1), set(nodes2), set(nodes3))
+        else:
+            raise ValueError(f"{dseparation_algo} method for d-separation not supported.")
+        return dseparated
+
+    def check_valid_backdoor_set(self, nodes1, nodes2, nodes3,
+            backdoor_paths=None, new_graph=None, dseparation_algo="default"):
+        """ Assume that the first parameter (nodes1) is the treatment,
+        the second is the outcome, and the third is the candidate backdoor set
+        """
        # also return the number of backdoor paths blocked by observed nodes
-        if backdoor_paths is None:
-            backdoor_paths = self.get_backdoor_paths(nodes1, nodes2)
-        d_separated = all([self.is_blocked(path, nodes3) for path in backdoor_paths])
-        observed_nodes3 = self.filter_unobserved_variables(nodes3)
-        num_paths_blocked = sum([self.is_blocked(path, observed_nodes3) for path in backdoor_paths])
-        return {'is_dseparated': d_separated,
-                'num_paths_blocked_by_observed_nodes': num_paths_blocked}
+        if dseparation_algo == "default":
+            if new_graph is None:
+                # Assume that nodes1 is the treatment
+                new_graph = self.do_surgery(nodes1,
+                    remove_outgoing_edges=True)
+            dseparated = nx.algorithms.d_separated(new_graph,
+                    set(nodes1), set(nodes2), set(nodes3))
+        elif dseparation_algo == "naive":
+            # ignores new_graph parameter, always uses self._graph
+            if backdoor_paths is None:
+                backdoor_paths = self.get_backdoor_paths(nodes1, nodes2)
+            dseparated = all([self.is_blocked(path, nodes3) for path in backdoor_paths])
+        else:
+            raise ValueError(f"{dseparation_algo} method for d-separation not supported.")
+        return {'is_dseparated': dseparated}
+

    def get_backdoor_paths(self, nodes1, nodes2):
        paths = []
@ -406,8 +434,9 @@ class CausalGraph:

        Currently only supports singleton sets.
        """
-        dpaths = self.get_all_directed_paths(nodes1, nodes2)
-        return len(dpaths) > 0
+        #dpaths = self.get_all_directed_paths(nodes1, nodes2)
+        #return len(dpaths) > 0
+        return nx.has_path(self._graph, nodes1[0], nodes2[0])

    def get_adjacency_matrix(self, *args, **kwargs):
        '''
@ -416,14 +445,25 @@ class CausalGraph:
        '''
        return nx.convert_matrix.to_numpy_matrix(self._graph, *args, **kwargs)

-    def check_valid_frontdoor_set(self, nodes1, nodes2, candidate_nodes, frontdoor_paths=None):
+    def check_valid_frontdoor_set(self, nodes1, nodes2, candidate_nodes,
+            frontdoor_paths=None, new_graph = None,
+            dseparation_algo="default"):
        """Check if valid the frontdoor variables for set of treatments, nodes1 to set of outcomes, nodes2.
        """
-        if frontdoor_paths is None:
-            frontdoor_paths = self.get_all_directed_paths(nodes1, nodes2)
+        # Condition 1: node 1 ---> node 2 is intercepted by candidate_nodes
+        if dseparation_algo == "default":
+            if new_graph is None:
+                new_graph = self._graph
+            dseparated = nx.algorithms.d_separated(new_graph,
+                    set(nodes1), set(nodes2), set(candidate_nodes))
+        elif dseparation_algo == "naive":
+            if frontdoor_paths is None:
+                frontdoor_paths = self.get_all_directed_paths(nodes1, nodes2)

-        d_separated = all([self.is_blocked(path, candidate_nodes) for path in frontdoor_paths])
-        return d_separated
+            dseparated = all([self.is_blocked(path, candidate_nodes) for path in frontdoor_paths])
+        else:
+            raise ValueError(f"{dseparation_algo} method for d-separation not supported.")
+        return dseparated

    def check_valid_mediation_set(self, nodes1, nodes2, candidate_nodes, mediation_paths=None):
        """Check if candidate nodes are valid mediators for set of treatments, nodes1 to set of outcomes, nodes2.
--- a/dowhy/causal_identifier.py
+++ b/dowhy/causal_identifier.py
@ -30,7 +30,7 @@ class CausalIdentifier:
    BACKDOOR_MIN="minimal-adjustment"
    BACKDOOR_MAX="maximal-adjustment"
    METHOD_NAMES = {BACKDOOR_DEFAULT, BACKDOOR_EXHAUSTIVE, BACKDOOR_MIN, BACKDOOR_MAX}
-    DEFAULT_BACKDOOR_METHOD = BACKDOOR_MAX
+    DEFAULT_BACKDOOR_METHOD = BACKDOOR_DEFAULT

    def __init__(self, graph, estimand_type,
            method_name = "default",
@ -249,19 +249,28 @@ class CausalIdentifier:
        )
        return estimand

-    def identify_backdoor(self, treatment_name, outcome_name, include_unobserved=True):
+    def identify_backdoor(self, treatment_name, outcome_name,
+            include_unobserved=True, dseparation_algo="default"):
        backdoor_sets = []
-        backdoor_paths = self._graph.get_backdoor_paths(treatment_name, outcome_name)
+        backdoor_paths = None
+        bdoor_graph = None
+        if dseparation_algo == "naive":
+            backdoor_paths = self._graph.get_backdoor_paths(treatment_name, outcome_name)
+        elif dseparation_algo == "default":
+            bdoor_graph = self._graph.do_surgery(treatment_name,
+                    remove_outgoing_edges=True)
+        else:
+            raise ValueError(f"d-separation algorithm {dseparation_algo} is not supported")
        method_name = self.method_name if self.method_name != CausalIdentifier.BACKDOOR_DEFAULT else CausalIdentifier.DEFAULT_BACKDOOR_METHOD

        # First, checking if empty set is a valid backdoor set
        empty_set = set()
-        check = self._graph.check_valid_backdoor_set(treatment_name, outcome_name, empty_set,
-                backdoor_paths=backdoor_paths)
+        check = self._graph.check_valid_backdoor_set(treatment_name,
+                outcome_name, empty_set,
+                backdoor_paths=backdoor_paths, new_graph=bdoor_graph,
+                dseparation_algo=dseparation_algo)
        if check["is_dseparated"]:
-            backdoor_sets.append({
-                'backdoor_set':empty_set,
-                'num_paths_blocked_by_observed_nodes': check["num_paths_blocked_by_observed_nodes"]})
+            backdoor_sets.append({'backdoor_set':empty_set})
            # If the method is `minimal-adjustment`, return the empty set right away.
            if method_name == CausalIdentifier.BACKDOOR_MIN:
                return backdoor_sets
@ -271,33 +280,75 @@ class CausalIdentifier:
            - set(treatment_name) \
            - set(outcome_name)
        eligible_variables -= self._graph.get_descendants(treatment_name)
-
-        num_iterations = 0
-        found_valid_adjustment_set = False
+        # If var is d-separated from both treatment or outcome, it cannot
+        # be a part of the backdoor set
+        filt_eligible_variables = set()
+        for var in eligible_variables:
+            dsep_treat_var = self._graph.check_dseparation(
+                    treatment_name, parse_state(var),
+                    set())
+            dsep_outcome_var = self._graph.check_dseparation(
+                    outcome_name, parse_state(var), set())
+            if not dsep_outcome_var or not dsep_treat_var:
+                filt_eligible_variables.add(var)
        if method_name in CausalIdentifier.METHOD_NAMES:
-            # If `minimal-adjustment` method is specified, start the search from the set with minimum size. Otherwise, start from the largest.
-            set_sizes = range(1, len(eligible_variables) + 1, 1) if method_name == CausalIdentifier.BACKDOOR_MIN else range(len(eligible_variables), 0, -1)
-            for size_candidate_set in set_sizes:
-                for candidate_set in itertools.combinations(eligible_variables, size_candidate_set):
-                    check = self._graph.check_valid_backdoor_set(treatment_name,
-                            outcome_name, candidate_set, backdoor_paths=backdoor_paths)
-                    self.logger.debug("Candidate backdoor set: {0}, is_dseparated: {1}, No. of paths blocked by observed_nodes: {2}".format(candidate_set, check["is_dseparated"], check["num_paths_blocked_by_observed_nodes"]))
-                    if check["is_dseparated"]:
-                        backdoor_sets.append({
-                            'backdoor_set': candidate_set,
-                            'num_paths_blocked_by_observed_nodes': check["num_paths_blocked_by_observed_nodes"]})
-                        found_valid_adjustment_set = True
-                    num_iterations += 1
-                    if method_name == CausalIdentifier.BACKDOOR_EXHAUSTIVE and num_iterations > CausalIdentifier.MAX_BACKDOOR_ITERATIONS:
-                        break
-                # If the backdoor method is `maximal-adjustment` or `minimal-adjustment`, return the first found adjustment set.
-                if method_name in {CausalIdentifier.BACKDOOR_MAX, CausalIdentifier.BACKDOOR_MIN} and found_valid_adjustment_set:
-                    break
+            backdoor_sets, found_valid_adjustment_set = self.find_valid_adjustment_sets(
+                    treatment_name, outcome_name,
+                    backdoor_paths, bdoor_graph,
+                    dseparation_algo,
+                    backdoor_sets, filt_eligible_variables,
+                    method_name=method_name,
+                    max_iterations= CausalIdentifier.MAX_BACKDOOR_ITERATIONS)
+            if method_name == CausalIdentifier.BACKDOOR_DEFAULT and found_valid_adjustment_set:
+                # repeat the above search with BACKDOOR_MIN
+                backdoor_sets, _ = self.find_valid_adjustment_sets(
+                        treatment_name, outcome_name,
+                        backdoor_paths, bdoor_graph,
+                        dseparation_algo,
+                        backdoor_sets, filt_eligible_variables,
+                        method_name=CausalIdentifier.BACKDOOR_MIN,
+                        max_iterations= CausalIdentifier.MAX_BACKDOOR_ITERATIONS)
        else:
            raise ValueError(f"Identifier method {method_name} not supported. Try one of the following: {CausalIdentifier.METHOD_NAMES}")

        return backdoor_sets

+    def find_valid_adjustment_sets(self, treatment_name, outcome_name,
+            backdoor_paths, bdoor_graph, dseparation_algo,
+            backdoor_sets, filt_eligible_variables,
+            method_name, max_iterations):
+        num_iterations = 0
+        found_valid_adjustment_set = False
+        # If `minimal-adjustment` method is specified, start the search from the set with minimum size. Otherwise, start from the largest.
+        set_sizes = range(1, len(filt_eligible_variables) + 1, 1) if method_name == CausalIdentifier.BACKDOOR_MIN else range(len(filt_eligible_variables), 0, -1)
+        for size_candidate_set in set_sizes:
+            for candidate_set in itertools.combinations(filt_eligible_variables, size_candidate_set):
+                check = self._graph.check_valid_backdoor_set(treatment_name,
+                        outcome_name, candidate_set,
+                        backdoor_paths=backdoor_paths,
+                        new_graph = bdoor_graph,
+                        dseparation_algo = dseparation_algo)
+                self.logger.debug("Candidate backdoor set: {0}, is_dseparated: {1}".format(candidate_set, check["is_dseparated"]))
+                if check["is_dseparated"]:
+                    backdoor_sets.append({'backdoor_set': candidate_set})
+                    found_valid_adjustment_set = True
+                num_iterations += 1
+                if method_name == CausalIdentifier.BACKDOOR_EXHAUSTIVE and num_iterations > max_iterations:
+                    self.logger.warning(f"Max number of iterations {max_iterations} reached.")
+                    break
+            # If the backdoor method is `maximal-adjustment` or `minimal-adjustment`, return the first found adjustment set.
+            if method_name in {CausalIdentifier.BACKDOOR_DEFAULT, CausalIdentifier.BACKDOOR_MAX, CausalIdentifier.BACKDOOR_MIN} and found_valid_adjustment_set:
+                break
+            # If all variables are observed, and the biggest eligible set
+            # does not satisfy backdoor, then none of its subsets will.
+            if method_name in {CausalIdentifier.BACKDOOR_DEFAULT, CausalIdentifier.BACKDOOR_MAX} and self._graph.all_observed(filt_eligible_variables):
+                break
+            if num_iterations > max_iterations:
+                self.logger.warning(f"Max number of iterations {max_iterations} reached. Could not find a valid backdoor set.")
+                break
+        return backdoor_sets, found_valid_adjustment_set
+
+
    def get_default_backdoor_set_id(self, backdoor_sets_dict):
        # Adding a None estimand if no backdoor set found
        if len(backdoor_sets_dict) == 0:
@ -310,12 +361,12 @@ class CausalIdentifier:
        min_iv_keys = {key for key, iv_count in iv_count_dict.items() if iv_count == min_iv_count}
        min_iv_backdoor_sets_dict = {key: backdoor_sets_dict[key] for key in min_iv_keys}

-        # Default set is the one with the most number of adjustment variables (optimizing for minimum (unknown) bias not for efficiency)
-        max_set_length = -1
+        # Default set is the one with the least number of adjustment variables (optimizing for efficiency)
+        min_set_length = 1000000
        default_key = None
        for key, bdoor_set in min_iv_backdoor_sets_dict.items():
-            if len(bdoor_set) > max_set_length:
-                max_set_length = len(bdoor_set)
+            if len(bdoor_set) < min_set_length:
+                min_set_length = len(bdoor_set)
                default_key = key
        return default_key

@ -328,11 +379,12 @@ class CausalIdentifier:
            proceed_when_unidentifiable = self._proceed_when_unidentifiable
        is_identified = [ self._graph.all_observed(bset["backdoor_set"]) for bset in backdoor_sets ]

-        if all(is_identified):
-            self.logger.info("All common causes are observed. Causal effect can be identified.")
+        if any(is_identified):
+            self.logger.info("Causal effect can be identified.")
            backdoor_sets_arr = [list(
                bset["backdoor_set"])
-                for bset in backdoor_sets]
+                for bset in backdoor_sets
+                if self._graph.all_observed(bset["backdoor_set"]) ]
        else: # there is unobserved confounding
            self.logger.warning("If this is observed data (not from a randomized experiment), there might always be missing confounders. Causal effect cannot be identified perfectly.")
            response = False # user response
@ -349,11 +401,11 @@ class CausalIdentifier:
                    self.logger.warn("Identification failed due to unobserved variables.")
                    backdoor_sets_arr = []
            if proceed_when_unidentifiable or response is True:
-                max_paths_blocked = max( bset['num_paths_blocked_by_observed_nodes'] for bset in backdoor_sets)
-                backdoor_sets_arr = [list(
-                    self._graph.filter_unobserved_variables(bset["backdoor_set"]))
-                    for bset in backdoor_sets
-                    if bset["num_paths_blocked_by_observed_nodes"]==max_paths_blocked]
+                # Just removing the unobserved variable
+                backdoor_sets_arr = []
+                for bset in backdoor_sets:
+                    curr_set = list(self._graph.filter_unobserved_variables(bset["backdoor_set"]))
+                    backdoor_sets_arr.append(curr_set)

        for i in range(len(backdoor_sets_arr)):
            backdoor_estimand_expr = self.construct_backdoor_estimand(
@ -364,20 +416,59 @@ class CausalIdentifier:
            backdoor_variables_dict["backdoor"+str(i+1)] = backdoor_sets_arr[i]
        return estimands_dict, backdoor_variables_dict

-    def identify_frontdoor(self):
+    def identify_frontdoor(self, dseparation_algo="default"):
        """ Find a valid frontdoor variable if it exists.

        Currently only supports a single variable frontdoor set.
        """
        frontdoor_var = None
-        frontdoor_paths = self._graph.get_all_directed_paths(self.treatment_name, self.outcome_name)
+        frontdoor_paths = None
+        fdoor_graph = None
+        if dseparation_algo == "default":
+            cond1_graph = self._graph.do_surgery(self.treatment_name,
+                    remove_incoming_edges=True)
+            bdoor_graph1 = self._graph.do_surgery(self.treatment_name,
+                    remove_outgoing_edges=True)
+        elif dseparation_algo == "naive":
+            frontdoor_paths = self._graph.get_all_directed_paths(self.treatment_name, self.outcome_name)
+        else:
+            raise ValueError(f"d-separation algorithm {dseparation_algo} is not supported")
+
+
        eligible_variables = self._graph.get_descendants(self.treatment_name) \
-            - set(self.outcome_name)
+            - set(self.outcome_name) \
+            - set(self._graph.get_descendants(self.outcome_name))
        # For simplicity, assuming a one-variable frontdoor set
        for candidate_var in eligible_variables:
-            is_valid_frontdoor = self._graph.check_valid_frontdoor_set(self.treatment_name,
-                    self.outcome_name, parse_state(candidate_var), frontdoor_paths=frontdoor_paths)
-            self.logger.debug("Candidate frontdoor set: {0}, is_dseparated: {1}".format(candidate_var, is_valid_frontdoor))
+            # Cond 1: All directed paths intercepted by candidate_var
+            cond1 = self._graph.check_valid_frontdoor_set(
+                self.treatment_name, self.outcome_name,
+                parse_state(candidate_var),
+                frontdoor_paths=frontdoor_paths,
+                new_graph=cond1_graph,
+                dseparation_algo=dseparation_algo)
+            self.logger.debug("Candidate frontdoor set: {0}, is_dseparated: {1}".format(candidate_var, cond1))
+            if not cond1:
+                continue
+            # Cond 2: No confounding between treatment and candidate var
+            cond2 = self._graph.check_valid_backdoor_set(
+                self.treatment_name, parse_state(candidate_var),
+                set(),
+                backdoor_paths=None,
+                new_graph= bdoor_graph1,
+                dseparation_algo=dseparation_algo)
+            if not cond2:
+                continue
+            # Cond 3: treatment blocks all confounding between candidate_var and outcome
+            bdoor_graph2 = self._graph.do_surgery(candidate_var,
+                    remove_outgoing_edges=True)
+            cond3 = self._graph.check_valid_backdoor_set(
+                parse_state(candidate_var), self.outcome_name,
+                self.treatment_name,
+                backdoor_paths=None,
+                new_graph= bdoor_graph2,
+                dseparation_algo=dseparation_algo)
+            is_valid_frontdoor = cond1 and cond2 and cond3
            if is_valid_frontdoor:
                frontdoor_var = candidate_var
                break
--- a/tests/causal_identifiers/base.py
+++ b/tests/causal_identifiers/base.py
@ -8,6 +8,7 @@ class IdentificationTestGraphSolution(object):

    def __init__(self, graph_str, observed_variables, biased_sets, minimal_adjustment_sets, maximal_adjustment_sets):
        self.graph = CausalGraph("X", "Y", graph_str, observed_node_names=observed_variables)
+        self.graph_str = graph_str
        self.observed_variables = observed_variables
        self.biased_sets = biased_sets
        self.minimal_adjustment_sets = minimal_adjustment_sets
--- a/tests/causal_identifiers/example_graphs.py
+++ b/tests/causal_identifiers/example_graphs.py
@ -6,7 +6,7 @@ Each example graph is contained of the following values:
    * graph_str - The graph string in GML format.
    * observed_variables - A list of observed variables in the graph. This will be used to test no unobserved variables are offered in the solution.
    * biased_sets - The sets that we shouldn't get in the output as they incur biased estimates of the causal effect.
-    * minimal_adjustment_sets - Sets of observed variables that should be returned when 'minimal-adjustment' is specified as the backdoor method. 
+    * minimal_adjustment_sets - Sets of observed variables that should be returned when 'minimal-adjustment' is specified as the backdoor method.
        If no adjustment is necessary given the graph, minimal adjustment set should be the empty set.
    * maximal_adjustment_sets - Sets of observed variables that should be returned when 'maximal-adjustment' is specified as the backdoor method.
 """
@ -14,14 +14,14 @@ Each example graph is contained of the following values:
 TEST_GRAPH_SOLUTIONS = {
    # Example is selected from Pearl J. "Causality" 2nd Edition, from chapter 3.3.1 on backoor criterion.
    "pearl_backdoor_example_graph": dict(
-        graph_str = """graph[directed 1 node[id "Z1" label "Z1"]  
+        graph_str = """graph[directed 1 node[id "Z1" label "Z1"]
                        node[id "Z2" label "Z2"]
                        node[id "Z3" label "Z3"]
                        node[id "Z4" label "Z4"]
                        node[id "Z5" label "Z5"]
                        node[id "Z6" label "Z6"]
                        node[id "X" label "X"]
-                        node[id "Y" label "Y"]      
+                        node[id "Y" label "Y"]
                        edge[source "Z1" target "Z3"]
                        edge[source "Z1" target "Z4"]
                        edge[source "Z2" target "Z4"]
@ -31,7 +31,7 @@ TEST_GRAPH_SOLUTIONS = {
                        edge[source "Z4" target "Y"]
                        edge[source "Z5" target "Y"]
                        edge[source "Z6" target "Y"]
-                        edge[source "X" target "Z6"]]    
+                        edge[source "X" target "Z6"]]
                    """,
        observed_variables = ["Z1", "Z2", "Z3", "Z4", "Z5", "Z6", "X", "Y"],
        biased_sets = [{"Z4"}, {"Z6"}, {"Z5"}, {"Z2"}, {"Z1"}, {"Z3"}, {"Z1", "Z3"}, {"Z2", "Z5"}, {"Z1", "Z2"}],
@ -39,22 +39,22 @@ TEST_GRAPH_SOLUTIONS = {
        maximal_adjustment_sets = [{"Z1", "Z2", "Z3", "Z4", "Z5"}]
    ),
    "simple_selection_bias_graph": dict(
-        graph_str = """graph[directed 1 node[id "Z1" label "Z1"]  
+        graph_str = """graph[directed 1 node[id "Z1" label "Z1"]
                    node[id "X" label "X"]
-                    node[id "Y" label "Y"]      
+                    node[id "Y" label "Y"]
                    edge[source "X" target "Y"]
                    edge[source "X" target "Z1"]
                    edge[source "Y" target "Z1"]]
                    """,
        observed_variables = ["Z1", "X", "Y"],
-        biased_sets = [{"Z1",}], 
+        biased_sets = [{"Z1",}],
        minimal_adjustment_sets = [{}],
        maximal_adjustment_sets = [{}]
    ),
    "simple_no_confounder_graph": dict(
-        graph_str = """graph[directed 1 node[id "Z1" label "Z1"]  
+        graph_str = """graph[directed 1 node[id "Z1" label "Z1"]
                node[id "X" label "X"]
-                node[id "Y" label "Y"]      
+                node[id "Y" label "Y"]
                edge[source "X" target "Y"]
                edge[source "Z1" target "X"]]
                """,
@ -65,11 +65,11 @@ TEST_GRAPH_SOLUTIONS = {
    ),
    # The following simpsons paradox examples are taken from Pearl, J {2013}. "Understanding Simpson’s Paradox" - http://ftp.cs.ucla.edu/pub/stat_ser/r414.pdf
    "pearl_simpsons_paradox_1c": dict(
-        graph_str = """graph[directed 1 node[id "Z" label "Z"]  
+        graph_str = """graph[directed 1 node[id "Z" label "Z"]
                node[id "X" label "X"]
                node[id "Y" label "Y"]
                node[id "L1" label "L1"]
-                node[id "L2" label "L2"]      
+                node[id "L2" label "L2"]
                edge[source "X" target "Y"]
                edge[source "L1" target "X"]
                edge[source "L1" target "Z"]
@ -82,7 +82,7 @@ TEST_GRAPH_SOLUTIONS = {
        maximal_adjustment_sets = [{}]
    ),
    "pearl_simpsons_paradox_1d": dict(
-        graph_str = """graph[directed 1 node[id "Z" label "Z"]  
+        graph_str = """graph[directed 1 node[id "Z" label "Z"]
                node[id "X" label "X"]
                node[id "Y" label "Y"]
                node[id "L1" label "L1"]
@ -97,10 +97,10 @@ TEST_GRAPH_SOLUTIONS = {
        maximal_adjustment_sets = [{"Z",}]
    ),
    "pearl_simpsons_paradox_2a": dict(
-        graph_str = """graph[directed 1 node[id "Z" label "Z"]  
+        graph_str = """graph[directed 1 node[id "Z" label "Z"]
                node[id "X" label "X"]
                node[id "Y" label "Y"]
-                node[id "L" label "L"]      
+                node[id "L" label "L"]
                edge[source "X" target "Y"]
                edge[source "X" target "Z"]
                edge[source "L" target "Z"]
@ -112,30 +112,30 @@ TEST_GRAPH_SOLUTIONS = {
        maximal_adjustment_sets = [{}]
    ),
    "pearl_simpsons_paradox_2b": dict(
-        graph_str = """graph[directed 1 node[id "Z" label "Z"]  
+        graph_str = """graph[directed 1 node[id "Z" label "Z"]
                node[id "X" label "X"]
                node[id "Y" label "Y"]
-                node[id "L" label "L"]      
+                node[id "L" label "L"]
                edge[source "X" target "Y"]
                edge[source "Z" target "X"]
                edge[source "L" target "X"]
                edge[source "L" target "Y"]]""",
        observed_variables = ["Z", "X", "Y"],
-        biased_sets = [], 
+        biased_sets = [],
        minimal_adjustment_sets = [],
        maximal_adjustment_sets = [] # Should this be {"Z"}?
    ),
    "pearl_simpsons_paradox_2b_L_observed": dict(
-        graph_str = """graph[directed 1 node[id "Z" label "Z"]  
+        graph_str = """graph[directed 1 node[id "Z" label "Z"]
                node[id "X" label "X"]
                node[id "Y" label "Y"]
-                node[id "L" label "L"]      
+                node[id "L" label "L"]
                edge[source "X" target "Y"]
                edge[source "Z" target "X"]
                edge[source "L" target "X"]
                edge[source "L" target "Y"]]""",
        observed_variables = ["Z", "X", "Y", "L"],
-        biased_sets = [], 
+        biased_sets = [],
        minimal_adjustment_sets = [{"L"}],
        maximal_adjustment_sets = [{"L", "Z"}]
    ),
@ -143,7 +143,7 @@ TEST_GRAPH_SOLUTIONS = {
        graph_str = """graph[directed 1 node[id "Z1" label "Z1"]
                node[id "Z2" label "Z2"]
                node[id "Z3" label "Z3"]
-                node[id "L" label "L"]  
+                node[id "L" label "L"]
                node[id "X" label "X"]
                node[id "Y" label "Y"]
                edge[source "X" target "Y"]
@ -163,7 +163,7 @@ TEST_GRAPH_SOLUTIONS = {
        graph_str = """graph[directed 1 node[id "A" label "A"]
                node[id "B" label "B"]
                node[id "C" label "C"]
-                node[id "D" label "D"]  
+                node[id "D" label "D"]
                node[id "E" label "E"]
                node[id "X" label "X"]
                node[id "Y" label "Y"]
@ -220,7 +220,7 @@ TEST_GRAPH_SOLUTIONS = {
        graph_str = """graph[directed 1 node[id "X" label "X"]
                node[id "Y" label "Y"]
                node[id "Z1" label "Z1"]
-                node[id "Z2" label "Z2"]      
+                node[id "Z2" label "Z2"]
                edge[source "X" target "Y"]
                edge[source "X" target "Z1"]
                edge[source "Z1" target "Y"]
--- a/tests/causal_identifiers/test_backdoor_identifier.py
+++ b/tests/causal_identifiers/test_backdoor_identifier.py
@ -11,19 +11,19 @@ class TestBackdoorIdentification(object):
        graph = example_graph_solution.graph
        biased_sets = example_graph_solution.biased_sets
        identifier = CausalIdentifier(graph, "nonparametric-ate", method_name="exhaustive-search")
-        
+
        backdoor_results = identifier.identify_backdoor("X", "Y", include_unobserved=False)
        backdoor_sets = [
-            set(backdoor_result_dict["backdoor_set"]) 
+            set(backdoor_result_dict["backdoor_set"])
            for backdoor_result_dict in backdoor_results
            if len(backdoor_result_dict["backdoor_set"]) > 0
        ]

        assert (
            (len(backdoor_sets) == 0 and len(biased_sets) == 0) # No biased sets exist and that's expected.
-            or  
+            or
            all([
-                set(biased_backdoor_set) not in backdoor_sets 
+                set(biased_backdoor_set) not in backdoor_sets
                for biased_backdoor_set in biased_sets
            ]) # No sets that would induce biased results are present in the solution.
        )
@ -35,7 +35,7 @@ class TestBackdoorIdentification(object):

        backdoor_results = identifier.identify_backdoor("X", "Y", include_unobserved=False)
        backdoor_sets = [
-            set(backdoor_result_dict["backdoor_set"]) 
+            set(backdoor_result_dict["backdoor_set"])
            for backdoor_result_dict in backdoor_results
            if len(backdoor_result_dict["backdoor_set"]) > 0
        ]
@ -46,18 +46,18 @@ class TestBackdoorIdentification(object):
        graph = example_graph_solution.graph
        expected_sets = example_graph_solution.minimal_adjustment_sets
        identifier = CausalIdentifier(graph, "nonparametric-ate", method_name="minimal-adjustment", proceed_when_unidentifiable=False)
-        
+
        backdoor_results = identifier.identify_backdoor("X", "Y", include_unobserved=False)
        backdoor_sets = [
-            set(backdoor_result_dict["backdoor_set"]) 
+            set(backdoor_result_dict["backdoor_set"])
            for backdoor_result_dict in backdoor_results
        ]
-        
+
        assert (
            ((len(backdoor_sets) == 0) and (len(expected_sets) == 0)) # No adjustments exist and that's expected.
            or
            all([
-                set(expected_set) in backdoor_sets 
+                set(expected_set) in backdoor_sets
                for expected_set in expected_sets
            ])
        )
@ -66,19 +66,19 @@ class TestBackdoorIdentification(object):
        graph = example_graph_solution.graph
        expected_sets = example_graph_solution.maximal_adjustment_sets
        identifier = CausalIdentifier(graph, "nonparametric-ate", method_name="maximal-adjustment", proceed_when_unidentifiable=False)
-        
+
        backdoor_results = identifier.identify_backdoor("X", "Y", include_unobserved=False)
-        
+
        backdoor_sets = [
-            set(backdoor_result_dict["backdoor_set"]) 
+            set(backdoor_result_dict["backdoor_set"])
            for backdoor_result_dict in backdoor_results
        ]
-        
+        print(backdoor_sets, expected_sets, example_graph_solution.graph_str)
        assert (
            ((len(backdoor_sets) == 0) and (len(expected_sets) == 0)) # No adjustments exist and that's expected.
            or
            all([
-                set(expected_set) in backdoor_sets 
+                set(expected_set) in backdoor_sets
                for expected_set in expected_sets
            ])
        )
--- a/tests/causal_identifiers/test_optimize_backdoor.py
+++ b/tests/causal_identifiers/test_optimize_backdoor.py
@ -11,7 +11,7 @@ class TestOptimizeBackdoorIdentifier(object):
        outcome = "Y"
        variables = ["X1", "X2"]
        causal_graph = "digraph{X1->T;X2->T;X1->X2;X2->Y;T->Y}"
-        
+
        vars = list(treatment) + list(outcome) + list(variables)
        df = pd.DataFrame(columns=vars)

@ -30,12 +30,15 @@ class TestOptimizeBackdoorIdentifier(object):
        # Obtain backdoor sets
        path = Backdoor(identifier._graph._graph, treatment_name, outcome_name)
        backdoor_sets = path.get_backdoor_vars()
-
+        print(backdoor_sets)
        # Check if backdoor sets are valid i.e. if they block all paths between the treatment and the outcome
        backdoor_paths = identifier._graph.get_backdoor_paths(treatment_name, outcome_name)
        check_set = set(backdoor_sets[0]['backdoor_set'])
-        check = identifier._graph.check_valid_backdoor_set(treatment_name, outcome_name, check_set, backdoor_paths=backdoor_paths)
-        
+        check = identifier._graph.check_valid_backdoor_set(
+                        treatment_name, outcome_name, check_set,
+                        backdoor_paths=backdoor_paths,
+                        dseparation_algo="naive")
+        print(check)
        assert check["is_dseparated"]

    def test_2(self):
@ -94,7 +97,7 @@ class TestOptimizeBackdoorIdentifier(object):
        backdoor_paths = identifier._graph.get_backdoor_paths(treatment_name, outcome_name)
        check_set = set(backdoor_sets[0]['backdoor_set'])
        check = identifier._graph.check_valid_backdoor_set(treatment_name, outcome_name, check_set, backdoor_paths=backdoor_paths)
-        
+
        assert check["is_dseparated"]

    def test_4(self):
@ -102,7 +105,7 @@ class TestOptimizeBackdoorIdentifier(object):
        outcome = "Y"
        variables = ["X1", "X2"]
        causal_graph = "digraph{T->Y;X1->T;X1->Y;X2->T;}"
-        
+
        vars = list(treatment) + list(outcome) + list(variables)
        df = pd.DataFrame(columns=vars)

@ -126,7 +129,7 @@ class TestOptimizeBackdoorIdentifier(object):
        backdoor_paths = identifier._graph.get_backdoor_paths(treatment_name, outcome_name)
        check_set = set(backdoor_sets[0]['backdoor_set'])
        check = identifier._graph.check_valid_backdoor_set(treatment_name, outcome_name, check_set, backdoor_paths=backdoor_paths)
-        
+
        assert check["is_dseparated"]

    def test_5(self):
@ -158,7 +161,7 @@ class TestOptimizeBackdoorIdentifier(object):
        backdoor_paths = identifier._graph.get_backdoor_paths(treatment_name, outcome_name)
        check_set = set(backdoor_sets[0]['backdoor_set'])
        check = identifier._graph.check_valid_backdoor_set(treatment_name, outcome_name, check_set, backdoor_paths=backdoor_paths)
-        
+
        assert check["is_dseparated"]

    def test_6(self):
@ -166,7 +169,7 @@ class TestOptimizeBackdoorIdentifier(object):
        outcome = "Y"
        variables = ["X1", "X2", "X3", "X4"]
        causal_graph = "digraph{X1->T;X1->X2;Y->X2;X3->T;X3->X4;X4->Y;T->Y}"
-        
+
        vars = list(treatment) + list(outcome) + list(variables)
        df = pd.DataFrame(columns=vars)

@ -190,5 +193,5 @@ class TestOptimizeBackdoorIdentifier(object):
        backdoor_paths = identifier._graph.get_backdoor_paths(treatment_name, outcome_name)
        check_set = set(backdoor_sets[0]['backdoor_set'])
        check = identifier._graph.check_valid_backdoor_set(treatment_name, outcome_name, check_set, backdoor_paths=backdoor_paths)
-        
+
        assert check["is_dseparated"]