Source code for pgmpy.base.MAG

from collections.abc import Hashable, Iterable

import networkx as nx

from pgmpy.base import AncestralBase



[docs]
class MAG(AncestralBase):
    """
    Class for representing Maximal Ancestral Graphs (MAGs).

    A MAG is a type of graph used in causal inference to represent conditional
    independence relations when some variables are latent (unobserved). Unlike
    simple directed acyclic graphs (DAGs), MAGs allow for special edge types
    (directed and bidirected) that capture the presence of latent confounding
    and selection bias. Every pair of nodes in a MAG is connected in such a way
    that the graph is "maximal," meaning no additional edges can be added
    without changing the set of implied conditional independence relations.


    Parameters
    ----------
    ebunch : iterable of tuples, optional
        A list or iterable of edges to add at initialization.

    latents : set, default=set()
        Set of latent (unobserved) variables.

    exposures : set, default=set()
        Set of exposure variables in the graph. These are the variables
        that represent the treatment or intervention being studied in a
        causal analysis. Default is an empty set.

    outcomes : set, default=set()
        Set of outcome variables in the graph. These are the variables
        that represent the response or dependent variables being studied
        in a causal analysis. Default is an empty set.

    roles : dict, optional (default: None)
        A dictionary mapping roles to node names.
        The keys are roles, and the values are role names (strings or iterables of str).
        If provided, this will automatically assign roles to the nodes in the graph.
        Passing a key-value pair via ``roles`` is equivalent to calling
        ``with_role(role, variables)`` for each key-value pair in the dictionary.

    Examples
    --------
    >>> from pgmpy.base import MAG
    >>> mag = MAG(ebunch=[("L", "A", "-", ">"), ("B", "C", "-", ">")], latents={"L"})
    >>> sorted(mag.nodes())
    ['A', 'B', 'C', 'L']

    Roles can be assigned to nodes in the graph at construction or using methods.

    At construction:

    >>> mag = MAG(
    ...     ebunch=[("L", "A", "-", ">"), ("B", "C", "-", ">")],
    ...     latents={"L"},
    ...     exposures={"A"},
    ...     outcomes={"B"},
    ... )

    Roles can also be assigned after creation using ``with_role`` method.

    >>> mag = mag.with_role("adjustment", {"L", "C"})

    Vertices of a specific role can be retrieved using ``get_role`` method.

    >>> mag.get_role("exposures")
    ['A']
    >>> mag.get_role("adjustment")
    ['L', 'C']

    References
    ----------
    .. [1] Zhang, J. (2008). Causal Reasoning with Ancestral Graphs. Journal of Machine Learning Research, 9(7).
    """

    def __init__(
        self,
        ebunch: Iterable[tuple[Hashable, Hashable]] | None = None,
        latents: set[Hashable] = set(),
        exposures: set[Hashable] = set(),
        outcomes: set[Hashable] = set(),
        roles=None,
    ):
        if ebunch:
            for _, _, u_mark, v_mark in ebunch:
                if (u_mark, v_mark) not in {
                    ("-", ">"),
                    (">", "-"),
                    (">", ">"),
                    ("-", "-"),
                }:
                    raise ValueError(
                        f"Invalid edge type ({u_mark}, {v_mark}). "
                        "MAGs only allow directed ('-', '>'), reverse directed ('>', '-'), "
                        "bidirected ('>', '>'), and undirected ('-', '-') edges."
                    )
        super().__init__(
            ebunch=ebunch,
            latents=latents,
            exposures=exposures,
            outcomes=outcomes,
            roles=roles,
        )

    def _is_collider(self, u, c, v):
        """
        Check if a node is a collider in a path u - c - v.

        A collider is a node with incoming arrowheads on both sides:
        u -> c <- v.

        Parameters
        ----------
        u : Hashable
            The first endpoint in the triplet (u, c, v).

        c : Hashable
            The middle node, candidate collider.

        v : Hashable
            The second endpoint in the triplet.

        Returns
        -------
        bool
            True if `c` is a collider on the path, False otherwise.

        Examples
        --------
        >>> from pgmpy.base import MAG
        >>> mag = MAG()
        >>> mag.add_edge("X", "Z", "-", ">")
        >>> mag.add_edge("Y", "Z", "-", ">")
        >>> mag._is_collider("X", "Z", "Y")
        True
        """
        if not (self.has_edge(u, c) and self.has_edge(c, v)):
            return False

        mark_uc_at_c = self.edges[u, c]["marks"][c]
        mark_cv_at_c = self.edges[c, v]["marks"][c]

        return mark_uc_at_c == ">" and mark_cv_at_c == ">"


[docs]
    def has_inducing_path(self, u, v, W):
        """
        Check if there exists an inducing path between two nodes relative to W.

        An inducing path between u and v is a path such that:
        - The path has length > 2 (at least one intermediate node),
        - Every intermediate node is a collider on the path,
        - Every intermediate node is either:
            * in W, or
            * an ancestor of u or v.

        Parameters
        ----------
        u : Hashable
            Source node.

        v : Hashable
            Target node.

        W : set
            Subset of nodes to check inducing paths through (often latents).

        Returns
        -------
        bool
            True if there exists an inducing path, False otherwise.

        Examples
        --------
        >>> from pgmpy.base import MAG
        >>> mag = MAG()
        >>> mag.add_edge("X", "L", "-", ">")
        >>> mag.add_edge("Y", "L", "-", ">")
        >>> mag.latents = {"L"}
        >>> mag.has_inducing_path("X", "Y", mag.latents)
        True
        """

        is_inducing = True
        for path in nx.all_simple_paths(self, source=u, target=v):
            if len(path) <= 2:
                continue

            for i in range(1, len(path) - 1):
                prev_node, curr_node, next_node = path[i - 1], path[i], path[i + 1]

                if not self._is_collider(prev_node, curr_node, next_node):
                    is_inducing = False
                    break

                ancestors_uv_vu = self.get_ancestors(u).union(self.get_ancestors(v))
                if curr_node not in W and curr_node not in ancestors_uv_vu:
                    is_inducing = False
                    break

        return is_inducing



[docs]
    def is_visible_edge(self, u, v) -> bool:
        """
        Check if a directed edge u -> v is visible in the MAG.

        A directed edge A → B in a MAG is considered visible if there exists a vertex C
        not adjacent to B such that either:
            1. C → A exists, or
            2. There is a collider path from C to A that is into A, and every vertex
            on that path is a parent of B.

        Parameters
        ----------
        u : Hashable
            Source node (tail of the edge).

        v : Hashable
            Target node (head of the edge).

        Returns
        -------
        bool
            True if the edge u -> v is visible, False otherwise.

        Examples
        --------
        >>> edges = [
        ...     ("A", "D", "-", ">"),
        ...     ("B", "C", "-", ">"),
        ...     ("X", "A", "-", ">"),
        ... ]
        >>> mag = MAG(ebunch=edges)
        >>> mag.is_visible_edge("A", "D")
        True
        >>> mag.is_visible_edge("B", "C")
        False
        """
        if not self.has_edge(u, v):
            return False
        marks = self.edges[u, v]["marks"]
        if marks.get(u) != "-" or marks.get(v) != ">":
            return False

        neighbors_v = set(self.neighbors(v))

        for c in self.nodes:
            if c in {u, v} or c in neighbors_v:
                continue

            if self.has_edge(c, u):
                cm = self.edges[c, u]["marks"]
                if cm.get(u) == ">":
                    return True

            for path in nx.all_simple_paths(self, source=c, target=u):
                if len(path) < 3:
                    continue

                last = path[-2]
                if not self.has_edge(last, u):
                    continue
                if self.edges[last, u]["marks"][u] != ">":
                    continue

                valid = True
                for i in range(1, len(path) - 1):
                    prev_node, curr_node, next_node = path[i - 1], path[i], path[i + 1]

                    if not self._is_collider(prev_node, curr_node, next_node):
                        valid = False
                        break

                    if not (
                        self.has_edge(curr_node, v)
                        and self.edges[curr_node, v]["marks"].get(curr_node) == "-"
                        and self.edges[curr_node, v]["marks"].get(v) == ">"
                    ):
                        valid = False
                        break

                if valid:
                    return True

        return False



[docs]
    def lower_manipulation(self, X, inplace=False):
        """
        Performs lower manipulation.

        Removes all edges that are visible and originate from nodes in X.
        For edges from X that are invisible, adds bidirected edges from the other
        endpoint to its neighbors outside X to preserve conditional independencies.
        All other edges remain unchanged.

        Parameters
        ----------
        X : set
            Set of nodes to perform manipulation on.

        inplace : bool, optional
            If True, modifies the current graph in place. Defaults to False.

        Returns
        -------
        MAG
            A new MAG with outgoing edges from X removed.

        Examples
        --------
        >>> from pgmpy.base import MAG
        >>> mag = MAG()
        >>> mag.add_edge("A", "B", "-", ">")
        >>> mag.add_edge("A", "C", "-", ">")
        >>> mag.add_edge("C", "B", "-", ">")
        >>> mag.add_edge("B", "C", ">", ">")
        >>> new_mag = mag.lower_manipulation({"A"})
        >>> edges = list(new_mag.edges(data=True))
        >>> len(edges)
        1
        >>> edges[0][0], edges[0][1]
        ('B', 'C')
        >>> edges[0][2]['marks']['B'], edges[0][2]['marks']['C']
        ('>', '>')
        """
        if not inplace:
            new_mag = self.copy()
        else:
            new_mag = self

        edges_to_remove = []
        edges_to_change = []

        for u in X:
            neighbors = self.get_neighbors(u, u_type="-", v_type=">")
            for v in neighbors:
                if self.is_visible_edge(u, v):
                    edges_to_remove.append((u, v))
                else:
                    edges_to_change.append((u, v))

        new_mag.remove_edges_from(edges_to_change + edges_to_remove)
        for u, v in edges_to_change:
            other = v if u in X else u
            for neighbor in self.neighbors(v):
                if neighbor != other and neighbor not in X:
                    new_mag.add_edge(other, neighbor, ">", ">")

        return new_mag



[docs]
    def upper_manipulation(self, X, inplace=False):
        """
        Performs upper manipulation.

        Deletes all edges (directed or bidirected) that have an arrowhead
        pointing to any variable in X. The rest of the graph remains unchanged.

        Parameters
        ----------
        X : set
            Set of nodes to perform manipulation on.

        inplace : bool, optional
            If True, modifies the current graph in place. Defaults to False.

        Returns
        -------
        MAG
            A new MAG with incoming edges to X removed.

        Examples
        --------
        >>> from pgmpy.base import MAG
        >>> mag = MAG()
        >>> mag.add_edge("X", "Y", ">", "-")
        >>> mag.add_edge("Z", "X", ">", "-")
        >>> mag.add_edge("A", "X", "-", ">")
        >>> new_mag = mag.upper_manipulation({"X"})
        >>> new_mag.has_edge("Z", "X")
        True
        >>> new_mag.has_edge("A", "X")
        False
        >>> new_mag.has_edge("X", "Y")
        False
        """
        if not inplace:
            new_mag = self.copy()
        else:
            new_mag = self
        edges_to_remove = []

        for u in X:
            neighbors = self.get_neighbors(u, u_type=">", v_type=None)
            for v in neighbors:
                edges_to_remove.append((u, v))

        new_mag.remove_edges_from(edges_to_remove)
        return new_mag