Source code for pgmpy.identification.adjustment

import itertools

import networkx as nx

from pgmpy.base import ADMG, DAG, MAG, PDAG
from pgmpy.identification import _BaseIdentification
from pgmpy.utils.sets import _powerset


[docs] class Adjustment(_BaseIdentification): """ Given a causal graph, finds the adjustment set. This class implements a few variants for computing adjustment sets for identifying the total causal effect of the variables in the `exposures` role on the variables in the `outcomes` role. Additionally, it provides methods to check if the current set of variables with role `adjustment` satisfy the backdoor criterion and to compute the backdoor adjustment formula. Parameters ---------- variant: str The variant of backdoor identification to use. Default is 'minimal'. - 'all': Returns all adjustment sets that satisfy the backdoor criterion. - 'minimal': Returns the smallest adjustment set. - 'minimal_variance': Returns the adjustment set for which estimators achieve minimal variance. Examples -------- >>> from pgmpy.base import DAG >>> dag = DAG( ... ebunch=[ ... ("x1", "y1"), ... ("x1", "z1"), ... ("z1", "z2"), ... ("z2", "x2"), ... ("y2", "z2"), ... ], ... roles={"exposures": "x1", "outcomes": "y1"}, ... ) >>> dag_with_adj, success = Adjustment(variant="minimal").identify(dag) >>> roles = dag_with_adj.get_role_dict() >>> roles["exposures"] ['x1'] >>> roles["outcomes"] ['y1'] >>> Adjustment(variant="minimal").validate(dag_with_adj) True References ---------- [1] Perkovi, Emilija, et al. "Complete graphical characterization and construction of adjustment sets in Markov equivalence classes of ancestral graphs." Journal of Machine Learning Research. [2] Witte, Janine, et al. "On efficient adjustment in causal graphs." Journal of Machine Learning Research. """ def __init__(self, variant="minimal"): self.variant = variant if self.variant in ("minimal", "all"): self.supported_graph_types = (DAG, PDAG, ADMG, MAG) elif self.variant == "minimal_variance": self.supported_graph_types = (DAG, PDAG) def _get_proper_backdoor_graph(self, causal_graph, inplace=False): """ Returns a proper backdoor graph of the `causal_graph`. For a `causal_graph` with variable roles `exposures` and `outcomes` defined, returns it's proper backdoor graph. A proper backdoor graph is a graph which removes the first edge of every proper causal path from `exposures` to `outcomes`. Parameters ---------- causal_graph: pgmpy.base.DAG, pgmpy.base.PDAG, pgmpy.base.ADMG, or pgmpy.base.MAG The causal graph for which the proper backdoor graph is to be computed. inplace: boolean If inplace is True, modifies the object itself. Otherwise returns a modified copy of self. Examples -------- >>> from pgmpy.base import DAG >>> from pgmpy.identification import Adjustment >>> dag = DAG( ... ebunch=[ ... ("x1", "y1"), ... ("x1", "z1"), ... ("z1", "z2"), ... ("z2", "x2"), ... ("y2", "z2"), ... ], ... roles={"exposures": "x1", "outcomes": "y1"}, ... ) >>> dag_proper = Adjustment()._get_proper_backdoor_graph(dag, inplace=False) >>> list(dag_proper.edges()) [('x1', 'z1'), ('z1', 'z2'), ('z2', 'x2'), ('y2', 'z2')] References ---------- [1] Perkovic, Emilija, et al. "Complete graphical characterization and construction of adjustment sets in Markov equivalence classes of ancestral graphs." The Journal of Machine Learning Research. """ # TODO: Make this work for all graph types. model = causal_graph if inplace else causal_graph.copy() edges_to_remove = [] for source in causal_graph.get_role("exposures"): paths = nx.all_simple_edge_paths(causal_graph, source, causal_graph.get_role("outcomes")) for path in paths: edges_to_remove.append(path[0]) model.remove_edges_from(edges_to_remove) return model def _identify(self, causal_graph): """ Identify adjustment sets using the backdoor criterion. Parameters ---------- causal_graph: DAG | PDAG | ADMG | MAG | PAG The causal graph for which the adjustment sets are to be identified. Returns ------- causal_graph: DAG | PDAG | ADMG | MAG | PAG The causal graph with the identified adjustment set added as role `adjustment`. success: bool True if the identification was successful, False otherwise. """ # Step 1: If variant = "minimal", use the algorithm from [1]. Get the # proper backdoor graph and compute the adjustment set. if self.variant == "minimal": if len(causal_graph.get_role("exposures")) != 1: raise NotImplementedError("Backdoor identification is only implemented for single exposure variable.") if len(causal_graph.get_role("outcomes")) != 1: raise NotImplementedError("Backdoor identification is only implemented for single outcome variable.") exposure = causal_graph.get_role("exposures")[0] outcome = causal_graph.get_role("outcomes")[0] backdoor_graph = self._get_proper_backdoor_graph(causal_graph, inplace=False) adjustment_set = backdoor_graph.minimal_dseparator(exposure, outcome) if adjustment_set is None: return causal_graph, False else: return ( causal_graph.with_role("adjustment", adjustment_set, inplace=False), True, ) # Step 2: If variant = "minimal_variance", use the algorithm from [2]. # O(X, Y, G) = pa(cn(X, Y, G), G) \ forb(X, Y, G) elif self.variant == "minimal_variance": raise NotImplementedError("Backdoor identification with minimal variance is not implemented yet.") # Step 3: If variant = "all", iterate over all possible sets of adjustment # variables, and return all that are valid. elif self.variant == "all": exposure = causal_graph.get_role("exposures")[0] outcome = causal_graph.get_role("outcomes")[0] ancestors = causal_graph.get_ancestors([exposure, outcome]) # Remove any variables on the path from exposure to outcome (these cannot be in the adjustment set) ancestors -= set(itertools.chain(*nx.all_simple_paths(causal_graph, exposure, outcome))) ancestors -= {exposure, outcome} ancestors -= set(causal_graph.latents) valid_adj_graphs = [] for s in _powerset(ancestors): adj_causal_graph = causal_graph.with_role("adjustment", s, inplace=False) if self.validate(causal_graph=adj_causal_graph): valid_adj_graphs.append(adj_causal_graph) return valid_adj_graphs, len(valid_adj_graphs) > 0 def _validate(self, causal_graph): """ Validate the causal graph for backdoor identification. Given a `causal_graph` with variable roles `exposures`, `outcomes`, and `adjustment` defined, this method checks if the given `adjustment` set is valid. Parameters ---------- causal_graph: DAG | PDAG | ADMG | MAG | PAG The causal graph to validate. Returns ------- bool: True if the `adjustment` set is valid, False otherwise. """ exposure = causal_graph.get_role("exposures") outcome = causal_graph.get_role("outcomes") adjustment_vars = causal_graph.get_role("adjustment") conditional_vars = exposure + adjustment_vars predecessors = set() for exposure_var in exposure: predecessors.update(causal_graph.predecessors(exposure_var)) parents_d_sep = [] for pred_var in predecessors: outcome_d_seps = [] for outcome_var in outcome: outcome_d_seps.append(causal_graph.is_dconnected(pred_var, outcome_var, observed=conditional_vars)) parents_d_sep.append(not any(outcome_d_seps)) return all(parents_d_sep)