Source code for pgmpy.models.ClusterGraph

#!/usr/bin/env python3

from collections import defaultdict

from pgmpy.base import UndirectedGraph
from pgmpy.factors import FactorDict, factor_product
from pgmpy.utils import compat_fns


[docs] class ClusterGraph(UndirectedGraph): r""" Base class for representing Cluster Graph. Cluster graph is an undirected graph which is associated with a subset of variables. The graph contains undirected edges that connects clusters whose scopes have a non-empty intersection. Formally, a cluster graph is :math:`\mathcal{U}` for a set of factors :math:`\Phi` over :math:`\mathcal{X}` is an undirected graph, each of whose nodes :math:`i` is associated with a subset :math:`C_i \subseteq X`. A cluster graph must be family-preserving - each factor :math:`\phi \in \Phi` must be associated with a cluster C, denoted :math:`\alpha(\phi)`, such that :math:`Scope[\phi] \subseteq C_i`. Each edge between a pair of clusters :math:`C_i` and :math:`C_j` is associated with a sepset :math:`S_{i,j} \subseteq C_i \cap C_j`. Parameters ---------- data: input graph Data to initialize graph. If data=None (default) an empty graph is created. The data is an edge list Examples -------- Create an empty ClusterGraph with no nodes and no edges >>> from pgmpy.models import ClusterGraph >>> G = ClusterGraph() G can be grown by adding clique nodes. **Nodes:** Add a tuple (or list or set) of nodes as single clique node. >>> G.add_node(("a", "b", "c")) >>> G.add_nodes_from([("a", "b"), ("a", "b", "c")]) **Edges:** G can also be grown by adding edges. >>> G.add_edge(("a", "b", "c"), ("a", "b")) or a list of edges >>> G.add_edges_from([(("a", "b", "c"), ("a", "b")), (("a", "b", "c"), ("a", "c"))]) """ def __init__(self, ebunch=None): super().__init__() if ebunch: self.add_edges_from(ebunch) self.factors = []
[docs] def add_node(self, node, **kwargs): """ Add a single node to the cluster graph. Parameters ---------- node: node A node should be a collection of nodes forming a clique. It can be a list, set or tuple of nodes Examples -------- >>> from pgmpy.models import ClusterGraph >>> G = ClusterGraph() >>> G.add_node(("a", "b", "c")) """ if not isinstance(node, (list, set, tuple)): raise TypeError("Node can only be a list, set or tuple of nodes forming a clique") node = tuple(node) super().add_node(node, **kwargs)
[docs] def add_nodes_from(self, nodes, **kwargs): """ Add multiple nodes to the cluster graph. Parameters ---------- nodes: iterable container A container of nodes (list, dict, set, etc.). Examples -------- >>> from pgmpy.models import ClusterGraph >>> G = ClusterGraph() >>> G.add_nodes_from([("a", "b"), ("a", "b", "c")]) """ for node in nodes: self.add_node(node, **kwargs)
[docs] def add_edge(self, u, v, **kwargs): """ Add an edge between two clique nodes. Parameters ---------- u, v: nodes Nodes can be any list or set or tuple of nodes forming a clique. Examples -------- >>> from pgmpy.models import ClusterGraph >>> G = ClusterGraph() >>> G.add_nodes_from([("a", "b", "c"), ("a", "b"), ("a", "c")]) >>> G.add_edges_from( ... [(("a", "b", "c"), ("a", "b")), (("a", "b", "c"), ("a", "c"))] ... ) """ set_u = set(u) set_v = set(v) if set_u.isdisjoint(set_v): raise ValueError("No sepset found between these two edges.") super().add_edge(u, v)
[docs] def add_factors(self, *factors): """ Associate a factor to the graph. See factors class for the order of potential values Parameters ---------- *factor: pgmpy.factors.factors object A factor object on any subset of the variables of the model which is to be associated with the model. Returns ------- None Examples -------- >>> import numpy as np >>> from pgmpy.models import ClusterGraph >>> from pgmpy.factors.discrete import DiscreteFactor >>> student = ClusterGraph() >>> student.add_node(("Alice", "Bob")) >>> factor = DiscreteFactor( ... ["Alice", "Bob"], cardinality=[3, 2], values=np.random.rand(6) ... ) >>> student.add_factors(factor) """ for factor in factors: factor_scope = set(factor.scope()) nodes = [set(node) for node in self.nodes()] if factor_scope not in nodes: raise ValueError("Factors defined on clusters of variable notpresent in model") self.factors.append(factor)
[docs] def get_factors(self, node=None): """ Return the factors that have been added till now to the graph. If node is not None, it would return the factor corresponding to the given node. Examples -------- >>> import numpy as np >>> from pgmpy.models import ClusterGraph >>> from pgmpy.factors.discrete import DiscreteFactor >>> G = ClusterGraph() >>> G.add_nodes_from([("a", "b", "c"), ("a", "b"), ("a", "c")]) >>> G.add_edges_from( ... [(("a", "b", "c"), ("a", "b")), (("a", "b", "c"), ("a", "c"))] ... ) >>> phi1 = DiscreteFactor(["a", "b", "c"], [2, 2, 2], np.random.rand(8)) >>> phi2 = DiscreteFactor(["a", "b"], [2, 2], np.random.rand(4)) >>> phi3 = DiscreteFactor(["a", "c"], [2, 2], np.random.rand(4)) >>> G.add_factors(phi1, phi2, phi3) >>> len(G.get_factors()) 3 >>> G.get_factors(node=("a", "b", "c")) # doctest: +ELLIPSIS <DiscreteFactor representing phi(a:2, b:2, c:2) at 0x...> """ if node is None: return self.factors else: nodes = [set(n) for n in self.nodes()] if set(node) not in nodes: raise ValueError("Node not present in Cluster Graph") factors = filter(lambda x: set(x.scope()) == set(node), self.factors) return next(factors)
[docs] def remove_factors(self, *factors): """ Removes the given factors from the added factors. Examples -------- >>> import numpy as np >>> from pgmpy.models import ClusterGraph >>> from pgmpy.factors.discrete import DiscreteFactor >>> student = ClusterGraph() >>> factor = DiscreteFactor( ... ["Alice", "Bob"], cardinality=[2, 2], values=np.random.rand(4) ... ) >>> student.add_node(("Alice", "Bob")) >>> student.add_factors(factor) >>> student.remove_factors(factor) """ for factor in factors: self.factors.remove(factor)
@property def clique_beliefs(self) -> FactorDict: """ Return a mapping from the cliques to their factor representations. Returns ------- FactorDict: mapping from cliques to factors Examples -------- >>> import numpy as np >>> from pgmpy.models import ClusterGraph >>> from pgmpy.factors.discrete import DiscreteFactor >>> G = ClusterGraph() >>> G.add_nodes_from([("a", "b", "c"), ("a", "b"), ("a", "c")]) >>> G.add_edges_from( ... [(("a", "b", "c"), ("a", "b")), (("a", "b", "c"), ("a", "c"))] ... ) >>> phi1 = DiscreteFactor(["a", "b", "c"], [2, 2, 2], np.random.rand(8)) >>> phi2 = DiscreteFactor(["a", "b"], [2, 2], np.random.rand(4)) >>> phi3 = DiscreteFactor(["a", "c"], [2, 2], np.random.rand(4)) >>> G.add_factors(phi1, phi2, phi3) >>> len(G.clique_beliefs) 3 """ return FactorDict({clique: self.get_factors(clique) for clique in self.nodes()}) @clique_beliefs.setter def clique_beliefs(self, clique_beliefs: FactorDict) -> None: self.remove_factors(*self.get_factors()) self.add_factors(*clique_beliefs.values())
[docs] def get_cardinality(self, node=None): """ Returns the cardinality of the node Parameters ---------- node: any hashable python object (optional) The node whose cardinality we want. If node is not specified returns a dictionary with the given variable as keys and their respective cardinality as values. Returns ------- int or dict : If node is specified returns the cardinality of the node. If node is not specified returns a dictionary with the given variable as keys and their respective cardinality as values. Examples -------- >>> import numpy as np >>> from pgmpy.models import ClusterGraph >>> from pgmpy.factors.discrete import DiscreteFactor >>> student = ClusterGraph() >>> factor = DiscreteFactor( ... ["Alice", "Bob"], cardinality=[2, 2], values=np.random.rand(4) ... ) >>> student.add_node(("Alice", "Bob")) >>> student.add_factors(factor) >>> student.get_cardinality() defaultdict(<class 'int'>, {'Alice': np.int64(2), 'Bob': np.int64(2)}) >>> student.get_cardinality(node="Alice") np.int64(2) """ if node: for factor in self.factors: for variable, cardinality in zip(factor.scope(), factor.cardinality): if node == variable: return cardinality else: cardinalities = defaultdict(int) for factor in self.factors: for variable, cardinality in zip(factor.scope(), factor.cardinality): cardinalities[variable] = cardinality return cardinalities
[docs] def get_partition_function(self): r""" Returns the partition function for a given undirected graph. A partition function is defined as .. math:: \sum_{X}(\prod_{i=1}^{m} \phi_i) where m is the number of factors present in the graph and X are all the random variables present. Examples -------- >>> import numpy as np >>> from pgmpy.models import ClusterGraph >>> from pgmpy.factors.discrete import DiscreteFactor >>> G = ClusterGraph() >>> G.add_nodes_from([("a", "b", "c"), ("a", "b"), ("a", "c")]) >>> G.add_edges_from( ... [(("a", "b", "c"), ("a", "b")), (("a", "b", "c"), ("a", "c"))] ... ) >>> phi1 = DiscreteFactor(["a", "b", "c"], [2, 2, 2], np.random.rand(8)) >>> phi2 = DiscreteFactor(["a", "b"], [2, 2], np.random.rand(4)) >>> phi3 = DiscreteFactor(["a", "c"], [2, 2], np.random.rand(4)) >>> G.add_factors(phi1, phi2, phi3) >>> G.get_partition_function() # doctest: +ELLIPSIS np.float64(...) """ if self.check_model(): factor = self.factors[0] factor = factor_product(factor, *[self.factors[i] for i in range(1, len(self.factors))]) return compat_fns.sum(factor.values)
[docs] def check_model(self): """ Check the model for various errors. This method checks for the following errors. * Checks if factors are defined for all the cliques or not. * Check for running intersection property is not done explicitly over here as it done in the add_edges method. * Checks if cardinality information for all the variables is available or not. If not it raises an error. * Check if cardinality of random variable remains same across all the factors. Returns ------- check: boolean True if all the checks are passed """ for clique in self.nodes(): factors = filter(lambda x: set(x.scope()) == set(clique), self.factors) if not any(factors): raise ValueError("Factors for all the cliques or clusters not defined.") cardinalities = self.get_cardinality() if len({x for clique in self.nodes() for x in clique}) != len(cardinalities): raise ValueError("Factors for all the variables not defined.") for factor in self.factors: for variable, cardinality in zip(factor.scope(), factor.cardinality): if cardinalities[variable] != cardinality: raise ValueError(f"Cardinality of variable {variable} not matching among factors") return True
[docs] def copy(self): """ Returns a copy of ClusterGraph. Returns ------- ClusterGraph: copy of ClusterGraph Examples -------- >>> import numpy as np >>> from pgmpy.factors.discrete import DiscreteFactor >>> G = ClusterGraph() >>> G.add_nodes_from([("a", "b"), ("b", "c")]) >>> G.add_edge(("a", "b"), ("b", "c")) >>> phi1 = DiscreteFactor(["a", "b"], [2, 2], np.random.rand(4)) >>> phi2 = DiscreteFactor(["b", "c"], [2, 2], np.random.rand(4)) >>> G.add_factors(phi1, phi2) >>> graph_copy = G.copy() >>> graph_copy.factors # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE [<DiscreteFactor representing phi(a:2, b:2) at 0x...>, <DiscreteFactor representing phi(b:2, c:2) at 0x...>] >>> sorted(graph_copy.edges()) [(('a', 'b'), ('b', 'c'))] >>> sorted(graph_copy.nodes()) [('a', 'b'), ('b', 'c')] """ copy = ClusterGraph(self.edges()) if self.factors: factors_copy = [factor.copy() for factor in self.factors] copy.add_factors(*factors_copy) return copy