Source code for pgmpy.models.ClusterGraph

#!/usr/bin/env python3

from collections import defaultdict

import numpy as np

from pgmpy.base import UndirectedGraph
from pgmpy.factors import FactorDict, factor_product
from pgmpy.utils import compat_fns



[docs]
class ClusterGraph(UndirectedGraph):
    r"""
    Base class for representing Cluster Graph.

    Cluster graph is an undirected graph which is associated with a subset of variables. The graph contains undirected
    edges that connects clusters whose scopes have a non-empty intersection.

    Formally, a cluster graph is  :math:`\mathcal{U}` for a set of factors :math:`\Phi` over :math:`\mathcal{X}` is an
    undirected graph, each of whose nodes :math:`i` is associated with a subset :math:`C_i \subseteq X`. A cluster
    graph must be family-preserving - each factor :math:`\phi \in \Phi` must be associated with a cluster C, denoted
    :math:`\alpha(\phi)`, such that :math:`Scope[\phi] \subseteq C_i`. Each edge between a pair of clusters :math:`C_i`
    and :math:`C_j` is associated with a sepset :math:`S_{i,j} \subseteq C_i \cap C_j`.

    Parameters
    ----------
    data: input graph
        Data to initialize graph. If data=None (default) an empty graph is created. The data is an edge list

    Examples
    --------
    Create an empty ClusterGraph with no nodes and no edges

    >>> from pgmpy.models import ClusterGraph
    >>> G = ClusterGraph()

    G can be grown by adding clique nodes.

    **Nodes:**

    Add a tuple (or list or set) of nodes as single clique node.

    >>> G.add_node(("a", "b", "c"))
    >>> G.add_nodes_from([("a", "b"), ("a", "b", "c")])

    **Edges:**

    G can also be grown by adding edges.

    >>> G.add_edge(("a", "b", "c"), ("a", "b"))

    or a list of edges

    >>> G.add_edges_from([(("a", "b", "c"), ("a", "b")), (("a", "b", "c"), ("a", "c"))])
    """

    def __init__(self, ebunch=None):
        super(ClusterGraph, self).__init__()
        if ebunch:
            self.add_edges_from(ebunch)
        self.factors = []


[docs]
    def add_node(self, node, **kwargs):
        """
        Add a single node to the cluster graph.

        Parameters
        ----------
        node: node
            A node should be a collection of nodes forming a clique. It can be
            a list, set or tuple of nodes

        Examples
        --------
        >>> from pgmpy.models import ClusterGraph
        >>> G = ClusterGraph()
        >>> G.add_node(("a", "b", "c"))
        """
        if not isinstance(node, (list, set, tuple)):
            raise TypeError(
                "Node can only be a list, set or tuple of nodes forming a clique"
            )

        node = tuple(node)
        super(ClusterGraph, self).add_node(node, **kwargs)



[docs]
    def add_nodes_from(self, nodes, **kwargs):
        """
        Add multiple nodes to the cluster graph.

        Parameters
        ----------
        nodes: iterable container
            A container of nodes (list, dict, set, etc.).

        Examples
        --------
        >>> from pgmpy.models import ClusterGraph
        >>> G = ClusterGraph()
        >>> G.add_nodes_from([("a", "b"), ("a", "b", "c")])
        """
        for node in nodes:
            self.add_node(node, **kwargs)



[docs]
    def add_edge(self, u, v, **kwargs):
        """
        Add an edge between two clique nodes.

        Parameters
        ----------
        u, v: nodes
            Nodes can be any list or set or tuple of nodes forming a clique.

        Examples
        --------
        >>> from pgmpy.models import ClusterGraph
        >>> G = ClusterGraph()
        >>> G.add_nodes_from([("a", "b", "c"), ("a", "b"), ("a", "c")])
        >>> G.add_edges_from(
        ...     [(("a", "b", "c"), ("a", "b")), (("a", "b", "c"), ("a", "c"))]
        ... )
        """
        set_u = set(u)
        set_v = set(v)
        if set_u.isdisjoint(set_v):
            raise ValueError("No sepset found between these two edges.")

        super(ClusterGraph, self).add_edge(u, v)



[docs]
    def add_factors(self, *factors):
        """
        Associate a factor to the graph.
        See factors class for the order of potential values

        Parameters
        ----------
        *factor: pgmpy.factors.factors object
            A factor object on any subset of the variables of the model which
            is to be associated with the model.

        Returns
        -------
        None

        Examples
        --------
        >>> from pgmpy.models import ClusterGraph
        >>> from pgmpy.factors.discrete import DiscreteFactor
        >>> student = ClusterGraph()
        >>> student.add_node(("Alice", "Bob"))
        >>> factor = DiscreteFactor(
        ...     ["Alice", "Bob"], cardinality=[3, 2], values=np.random.rand(6)
        ... )
        >>> student.add_factors(factor)
        """
        for factor in factors:
            factor_scope = set(factor.scope())
            nodes = [set(node) for node in self.nodes()]
            if factor_scope not in nodes:
                raise ValueError(
                    "Factors defined on clusters of variable not" "present in model"
                )

            self.factors.append(factor)



[docs]
    def get_factors(self, node=None):
        """
        Return the factors that have been added till now to the graph.

        If node is not None, it would return the factor corresponding to the
        given node.

        Examples
        --------
        >>> from pgmpy.models import ClusterGraph
        >>> from pgmpy.factors.discrete import DiscreteFactor
        >>> G = ClusterGraph()
        >>> G.add_nodes_from([("a", "b", "c"), ("a", "b"), ("a", "c")])
        >>> G.add_edges_from(
        ...     [(("a", "b", "c"), ("a", "b")), (("a", "b", "c"), ("a", "c"))]
        ... )
        >>> phi1 = DiscreteFactor(["a", "b", "c"], [2, 2, 2], np.random.rand(8))
        >>> phi2 = DiscreteFactor(["a", "b"], [2, 2], np.random.rand(4))
        >>> phi3 = DiscreteFactor(["a", "c"], [2, 2], np.random.rand(4))
        >>> G.add_factors(phi1, phi2, phi3)
        >>> G.get_factors()
        >>> G.get_factors(node=("a", "b", "c"))
        """
        if node is None:
            return self.factors
        else:
            nodes = [set(n) for n in self.nodes()]

            if set(node) not in nodes:
                raise ValueError("Node not present in Cluster Graph")

            factors = filter(lambda x: set(x.scope()) == set(node), self.factors)
            return next(factors)



[docs]
    def remove_factors(self, *factors):
        """
        Removes the given factors from the added factors.

        Examples
        --------
        >>> from pgmpy.models import ClusterGraph
        >>> from pgmpy.factors.discrete import DiscreteFactor
        >>> student = ClusterGraph()
        >>> factor = DiscreteFactor(
        ...     ["Alice", "Bob"], cardinality=[2, 2], values=np.random.rand(4)
        ... )
        >>> student.add_node(("Alice", "Bob"))
        >>> student.add_factors(factor)
        >>> student.remove_factors(factor)
        """
        for factor in factors:
            self.factors.remove(factor)


    @property
    def clique_beliefs(self) -> FactorDict:
        """
        Return a mapping from the cliques to their factor representations.

        Returns
        -------
        FactorDict: mapping from cliques to factors

        Examples
        --------
        >>> from pgmpy.models import ClusterGraph
        >>> from pgmpy.factors.discrete import DiscreteFactor
        >>> G = ClusterGraph()
        >>> G.add_nodes_from([("a", "b", "c"), ("a", "b"), ("a", "c")])
        >>> G.add_edges_from(
        ...     [(("a", "b", "c"), ("a", "b")), (("a", "b", "c"), ("a", "c"))]
        ... )
        >>> phi1 = DiscreteFactor(["a", "b", "c"], [2, 2, 2], np.random.rand(8))
        >>> phi2 = DiscreteFactor(["a", "b"], [2, 2], np.random.rand(4))
        >>> phi3 = DiscreteFactor(["a", "c"], [2, 2], np.random.rand(4))
        >>> G.clique_beliefs
        """
        return FactorDict({clique: self.get_factors(clique) for clique in self.nodes()})

    @clique_beliefs.setter
    def clique_beliefs(self, clique_beliefs: FactorDict) -> None:
        self.remove_factors(*self.get_factors())
        self.add_factors(*clique_beliefs.values())


[docs]
    def get_cardinality(self, node=None):
        """
        Returns the cardinality of the node

        Parameters
        ----------
        node: any hashable python object (optional)
            The node whose cardinality we want. If node is not specified returns a
            dictionary with the given variable as keys and their respective cardinality
            as values.

        Returns
        -------
        int or dict : If node is specified returns the cardinality of the node.
                      If node is not specified returns a dictionary with the given
                      variable as keys and their respective cardinality as values.


        Examples
        --------
        >>> from pgmpy.models import ClusterGraph
        >>> from pgmpy.factors.discrete import DiscreteFactor
        >>> student = ClusterGraph()
        >>> factor = DiscreteFactor(
        ...     ["Alice", "Bob"], cardinality=[2, 2], values=np.random.rand(4)
        ... )
        >>> student.add_node(("Alice", "Bob"))
        >>> student.add_factors(factor)
        >>> student.get_cardinality()
        defaultdict(<class 'int'>, {'Alice': 2, 'Bob': 2})

        >>> student.get_cardinality(node="Alice")
        2
        """
        if node:
            for factor in self.factors:
                for variable, cardinality in zip(factor.scope(), factor.cardinality):
                    if node == variable:
                        return cardinality

        else:
            cardinalities = defaultdict(int)
            for factor in self.factors:
                for variable, cardinality in zip(factor.scope(), factor.cardinality):
                    cardinalities[variable] = cardinality
            return cardinalities



[docs]
    def get_partition_function(self):
        r"""
        Returns the partition function for a given undirected graph.

        A partition function is defined as

        .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i)

        where m is the number of factors present in the graph
        and X are all the random variables present.

        Examples
        --------
        >>> from pgmpy.models import ClusterGraph
        >>> from pgmpy.factors.discrete import DiscreteFactor
        >>> G = ClusterGraph()
        >>> G.add_nodes_from([("a", "b", "c"), ("a", "b"), ("a", "c")])
        >>> G.add_edges_from(
        ...     [(("a", "b", "c"), ("a", "b")), (("a", "b", "c"), ("a", "c"))]
        ... )
        >>> phi1 = DiscreteFactor(["a", "b", "c"], [2, 2, 2], np.random.rand(8))
        >>> phi2 = DiscreteFactor(["a", "b"], [2, 2], np.random.rand(4))
        >>> phi3 = DiscreteFactor(["a", "c"], [2, 2], np.random.rand(4))
        >>> G.add_factors(phi1, phi2, phi3)
        >>> G.get_partition_function()
        """
        if self.check_model():
            factor = self.factors[0]
            factor = factor_product(
                factor, *[self.factors[i] for i in range(1, len(self.factors))]
            )
            return compat_fns.sum(factor.values)



[docs]
    def check_model(self):
        """
        Check the model for various errors. This method checks for the following
        errors.

        * Checks if factors are defined for all the cliques or not.
        * Check for running intersection property is not done explicitly over
          here as it done in the add_edges method.
        * Checks if cardinality information for all the variables is available or not. If
          not it raises an error.
        * Check if cardinality of random variable remains same across all the
          factors.

        Returns
        -------
        check: boolean
            True if all the checks are passed
        """
        for clique in self.nodes():
            factors = filter(lambda x: set(x.scope()) == set(clique), self.factors)
            if not any(factors):
                raise ValueError("Factors for all the cliques or clusters not defined.")

        cardinalities = self.get_cardinality()
        if len(set((x for clique in self.nodes() for x in clique))) != len(
            cardinalities
        ):
            raise ValueError("Factors for all the variables not defined.")

        for factor in self.factors:
            for variable, cardinality in zip(factor.scope(), factor.cardinality):
                if cardinalities[variable] != cardinality:
                    raise ValueError(
                        f"Cardinality of variable {variable} not matching among factors"
                    )

        return True



[docs]
    def copy(self):
        """
        Returns a copy of ClusterGraph.

        Returns
        -------
        ClusterGraph: copy of ClusterGraph

        Examples
        --------
        >>> from pgmpy.factors.discrete import DiscreteFactor
        >>> G = ClusterGraph()
        >>> G.add_nodes_from([("a", "b"), ("b", "c")])
        >>> G.add_edge(("a", "b"), ("b", "c"))
        >>> phi1 = DiscreteFactor(["a", "b"], [2, 2], np.random.rand(4))
        >>> phi2 = DiscreteFactor(["b", "c"], [2, 2], np.random.rand(4))
        >>> G.add_factors(phi1, phi2)
        >>> graph_copy = G.copy()
        >>> graph_copy.factors
        [<DiscreteFactor representing phi(a:2, b:2) at 0xb71b19cc>,
         <DiscreteFactor representing phi(b:2, c:2) at 0xb4eaf3ac>]
        >>> graph_copy.edges()
        [(('a', 'b'), ('b', 'c'))]
        >>> graph_copy.nodes()
        [('a', 'b'), ('b', 'c')]
        """
        copy = ClusterGraph(self.edges())
        if self.factors:
            factors_copy = [factor.copy() for factor in self.factors]
            copy.add_factors(*factors_copy)
        return copy
Source code for pgmpy.models.ClusterGraph

Navigation

Related Topics