Source code for pgmpy.identification.adjustment

import itertools

import networkx as nx

from pgmpy.base import ADMG, DAG, MAG, PDAG
from pgmpy.identification import _BaseIdentification
from pgmpy.utils.sets import _powerset



[docs]
class Adjustment(_BaseIdentification):
    """
    Given a causal graph, finds the adjustment set.

    This class implements a few variants for computing adjustment sets for
    identifying the total causal effect of the variables in the `exposures`
    role on the variables in the `outcomes` role. Additionally, it provides methods to check if the
    current set of variables with role `adjustment` satisfy the backdoor
    criterion and to compute the backdoor adjustment formula.

    Parameters
    ----------
    variant: str
        The variant of backdoor identification to use. Default is 'minimal'.

        - 'all': Returns all adjustment sets that satisfy the backdoor criterion.
        - 'minimal': Returns the smallest adjustment set.
        - 'minimal_variance': Returns the adjustment set for which estimators achieve minimal variance.

    Examples
    --------
    >>> from pgmpy.base import DAG
    >>> dag = DAG(
    ...     ebunch=[
    ...         ("x1", "y1"),
    ...         ("x1", "z1"),
    ...         ("z1", "z2"),
    ...         ("z2", "x2"),
    ...         ("y2", "z2"),
    ...     ],
    ...     roles={"exposures": "x1", "outcomes": "y1"},
    ... )
    >>> dag_with_adj, success = Adjustment(variant="minimal").identify(dag)
    >>> roles = dag_with_adj.get_role_dict()
    >>> roles["exposures"]
    ['x1']
    >>> roles["outcomes"]
    ['y1']
    >>> Adjustment(variant="minimal").validate(dag_with_adj)
    True

    References
    ----------
    [1] Perkovi, Emilija, et al. "Complete graphical characterization and
        construction of adjustment sets in Markov equivalence classes of ancestral
        graphs." Journal of Machine Learning Research.
    [2] Witte, Janine, et al. "On efficient adjustment in causal graphs."
        Journal of Machine Learning Research.
    """

    def __init__(self, variant="minimal"):
        self.variant = variant
        if self.variant in ("minimal", "all"):
            self.supported_graph_types = (DAG, PDAG, ADMG, MAG)
        elif self.variant == "minimal_variance":
            self.supported_graph_types = (DAG, PDAG)

    def _get_proper_backdoor_graph(self, causal_graph, inplace=False):
        """
        Returns a proper backdoor graph of the `causal_graph`.

        For a `causal_graph` with variable roles `exposures` and `outcomes`
        defined, returns it's proper backdoor graph. A proper backdoor graph is
        a graph which removes the first edge of every proper causal path from
        `exposures` to `outcomes`.

        Parameters
        ----------
        causal_graph: pgmpy.base.DAG, pgmpy.base.PDAG, pgmpy.base.ADMG, or pgmpy.base.MAG
            The causal graph for which the proper backdoor graph is to be computed.

        inplace: boolean
            If inplace is True, modifies the object itself. Otherwise returns
            a modified copy of self.

        Examples
        --------
        >>> from pgmpy.base import DAG
        >>> from pgmpy.identification import Adjustment
        >>> dag = DAG(
        ...     ebunch=[
        ...         ("x1", "y1"),
        ...         ("x1", "z1"),
        ...         ("z1", "z2"),
        ...         ("z2", "x2"),
        ...         ("y2", "z2"),
        ...     ],
        ...     roles={"exposures": "x1", "outcomes": "y1"},
        ... )
        >>> dag_proper = Adjustment()._get_proper_backdoor_graph(dag, inplace=False)
        >>> list(dag_proper.edges())
        [('x1', 'z1'), ('z1', 'z2'), ('z2', 'x2'), ('y2', 'z2')]

        References
        ----------
        [1] Perkovic, Emilija, et al. "Complete graphical characterization and
            construction of adjustment sets in Markov equivalence classes of
            ancestral graphs." The Journal of Machine Learning Research.
        """
        # TODO: Make this work for all graph types.
        model = causal_graph if inplace else causal_graph.copy()
        edges_to_remove = []
        for source in causal_graph.get_role("exposures"):
            paths = nx.all_simple_edge_paths(causal_graph, source, causal_graph.get_role("outcomes"))
            for path in paths:
                edges_to_remove.append(path[0])
        model.remove_edges_from(edges_to_remove)
        return model

    def _identify(self, causal_graph):
        """
        Identify adjustment sets using the backdoor criterion.

        Parameters
        ----------
        causal_graph: DAG | PDAG | ADMG | MAG | PAG
            The causal graph for which the adjustment sets are to be identified.

        Returns
        -------
        causal_graph: DAG | PDAG | ADMG | MAG | PAG
            The causal graph with the identified adjustment set added as role `adjustment`.

        success: bool
            True if the identification was successful, False otherwise.
        """
        # Step 1: If variant = "minimal", use the algorithm from [1]. Get the
        #         proper backdoor graph and compute the adjustment set.
        if self.variant == "minimal":
            if len(causal_graph.get_role("exposures")) != 1:
                raise NotImplementedError("Backdoor identification is only implemented for single exposure variable.")
            if len(causal_graph.get_role("outcomes")) != 1:
                raise NotImplementedError("Backdoor identification is only implemented for single outcome variable.")

            exposure = causal_graph.get_role("exposures")[0]
            outcome = causal_graph.get_role("outcomes")[0]

            backdoor_graph = self._get_proper_backdoor_graph(causal_graph, inplace=False)
            adjustment_set = backdoor_graph.minimal_dseparator(exposure, outcome)

            if adjustment_set is None:
                return causal_graph, False
            else:
                return (
                    causal_graph.with_role("adjustment", adjustment_set, inplace=False),
                    True,
                )

        # Step 2: If variant = "minimal_variance", use the algorithm from [2].
        #         O(X, Y, G) = pa(cn(X, Y, G), G) \ forb(X, Y, G)
        elif self.variant == "minimal_variance":
            raise NotImplementedError("Backdoor identification with minimal variance is not implemented yet.")

        # Step 3: If variant = "all", iterate over all possible sets of adjustment
        #         variables, and return all that are valid.
        elif self.variant == "all":
            exposure = causal_graph.get_role("exposures")[0]
            outcome = causal_graph.get_role("outcomes")[0]

            ancestors = causal_graph.get_ancestors([exposure, outcome])
            # Remove any variables on the path from exposure to outcome (these cannot be in the adjustment set)
            ancestors -= set(itertools.chain(*nx.all_simple_paths(causal_graph, exposure, outcome)))
            ancestors -= {exposure, outcome}
            ancestors -= set(causal_graph.latents)

            valid_adj_graphs = []
            for s in _powerset(ancestors):
                adj_causal_graph = causal_graph.with_role("adjustment", s, inplace=False)
                if self.validate(causal_graph=adj_causal_graph):
                    valid_adj_graphs.append(adj_causal_graph)

            return valid_adj_graphs, len(valid_adj_graphs) > 0

    def _validate(self, causal_graph):
        """
        Validate the causal graph for backdoor identification.

        Given a `causal_graph` with variable roles `exposures`, `outcomes`, and
        `adjustment` defined, this method checks if the given `adjustment` set
        is valid.

        Parameters
        ----------
        causal_graph: DAG | PDAG | ADMG | MAG | PAG
            The causal graph to validate.

        Returns
        -------
        bool:
            True if the `adjustment` set is valid, False otherwise.
        """
        exposure = causal_graph.get_role("exposures")
        outcome = causal_graph.get_role("outcomes")
        adjustment_vars = causal_graph.get_role("adjustment")

        conditional_vars = exposure + adjustment_vars

        predecessors = set()
        for exposure_var in exposure:
            predecessors.update(causal_graph.predecessors(exposure_var))

        parents_d_sep = []
        for pred_var in predecessors:
            outcome_d_seps = []
            for outcome_var in outcome:
                outcome_d_seps.append(causal_graph.is_dconnected(pred_var, outcome_var, observed=conditional_vars))
            parents_d_sep.append(not any(outcome_d_seps))

        return all(parents_d_sep)