Source code for pgmpy.factors.discrete.JointProbabilityDistribution

import itertools
from functools import reduce
from operator import mul

import numpy as np

from pgmpy.factors.discrete import DiscreteFactor
from pgmpy.independencies import Independencies


[docs] class JointProbabilityDistribution(DiscreteFactor): """ Base class for Joint Probability Distribution """ def __init__(self, variables, cardinality, values): """ Initialize a Joint Probability Distribution class. Defined above, we have the following mapping from variable assignments to the index of the row vector in the value field: +-----+-----+-----+-------------------------+ | x1 | x2 | x3 | P(x1, x2, x2) | +-----+-----+-----+-------------------------+ | x1_0| x2_0| x3_0| P(x1_0, x2_0, x3_0) | +-----+-----+-----+-------------------------+ | x1_1| x2_0| x3_0| P(x1_1, x2_0, x3_0) | +-----+-----+-----+-------------------------+ | x1_0| x2_1| x3_0| P(x1_0, x2_1, x3_0) | +-----+-----+-----+-------------------------+ | x1_1| x2_1| x3_0| P(x1_1, x2_1, x3_0) | +-----+-----+-----+-------------------------+ | x1_0| x2_0| x3_1| P(x1_0, x2_0, x3_1) | +-----+-----+-----+-------------------------+ | x1_1| x2_0| x3_1| P(x1_1, x2_0, x3_1) | +-----+-----+-----+-------------------------+ | x1_0| x2_1| x3_1| P(x1_0, x2_1, x3_1) | +-----+-----+-----+-------------------------+ | x1_1| x2_1| x3_1| P(x1_1, x2_1, x3_1) | +-----+-----+-----+-------------------------+ Parameters ---------- variables: list List of scope of Joint Probability Distribution. cardinality: list, array_like List of cardinality of each variable value: list, array_like List or array of values of factor. A Joint Probability Distribution's values are stored in a row vector in the value using an ordering such that the left-most variables as defined in the variable field cycle through their values the fastest. Examples -------- >>> import numpy as np >>> from pgmpy.factors.discrete import JointProbabilityDistribution >>> prob = JointProbabilityDistribution( ... variables=["x1", "x2", "x3"], ... cardinality=[2, 2, 2], ... values=np.ones(8) / 8, ... ) >>> print(prob) +-------+-------+-------+---------------+ | x1 | x2 | x3 | P(x1,x2,x3) | +=======+=======+=======+===============+ | x1(0) | x2(0) | x3(0) | 0.1250 | +-------+-------+-------+---------------+ | x1(0) | x2(0) | x3(1) | 0.1250 | +-------+-------+-------+---------------+ | x1(0) | x2(1) | x3(0) | 0.1250 | +-------+-------+-------+---------------+ | x1(0) | x2(1) | x3(1) | 0.1250 | +-------+-------+-------+---------------+ | x1(1) | x2(0) | x3(0) | 0.1250 | +-------+-------+-------+---------------+ | x1(1) | x2(0) | x3(1) | 0.1250 | +-------+-------+-------+---------------+ | x1(1) | x2(1) | x3(0) | 0.1250 | +-------+-------+-------+---------------+ | x1(1) | x2(1) | x3(1) | 0.1250 | +-------+-------+-------+---------------+ """ if np.isclose(np.sum(values), 1): super(JointProbabilityDistribution, self).__init__( variables, cardinality, values ) else: raise ValueError("The probability values doesn't sum to 1.") def __repr__(self): var_card = ", ".join( [f"{var}:{card}" for var, card in zip(self.variables, self.cardinality)] ) return f"<Joint Distribution representing P({var_card}) at {hex(id(self))}>" def __str__(self): return self._str(phi_or_p="P")
[docs] def marginal_distribution(self, variables, inplace=True): """ Returns the marginal distribution over variables. Parameters ---------- variables: string, list, tuple, set, dict Variable or list of variables over which marginal distribution needs to be calculated inplace: Boolean (default True) If False return a new instance of JointProbabilityDistribution Examples -------- >>> import numpy as np >>> from pgmpy.factors.discrete import JointProbabilityDistribution >>> values = np.random.rand(12) >>> prob = JointProbabilityDistribution( ... ["x1", "x2", "x3"], [2, 3, 2], values / np.sum(values) ... ) >>> prob.marginal_distribution(variables=["x1", "x2"]) >>> print(prob) # doctest: +SKIP +-------+-------+------------+ | x1 | x2 | P(x1,x2) | +=======+=======+============+ | x1(0) | x2(0) | 0.1408 | +-------+-------+------------+ | x1(0) | x2(1) | 0.3372 | +-------+-------+------------+ | x1(0) | x2(2) | 0.1530 | +-------+-------+------------+ | x1(1) | x2(0) | 0.2122 | +-------+-------+------------+ | x1(1) | x2(1) | 0.0950 | +-------+-------+------------+ | x1(1) | x2(2) | 0.0619 | +-------+-------+------------+ """ return self.marginalize( list( set(list(self.variables)) - set( variables if isinstance(variables, (list, set, dict, tuple)) else [variables] ) ), inplace=inplace, )
[docs] def check_independence( self, event1, event2, event3=None, condition_random_variable=False ): """ Check if the Joint Probability Distribution satisfies the given independence condition. Parameters ---------- event1: list random variable whose independence is to be checked. event2: list random variable from which event1 is independent. values: 2D array or list like or 1D array or list like A 2D list of tuples of the form (variable_name, variable_state). A 1D list or array-like to condition over randome variables (condition_random_variable must be True) The values on which to condition the Joint Probability Distribution. condition_random_variable: Boolean (Default false) If true and event3 is not None than will check independence condition over random variable. For random variables say X, Y, Z to check if X is independent of Y given Z. event1 should be either X or Y. event2 should be either Y or X. event3 should Z. Examples -------- >>> from pgmpy.factors.discrete import JointProbabilityDistribution as JPD >>> prob = JPD( ... variables=["I", "D", "G"], ... cardinality=[2, 2, 3], ... values=[ ... 0.126, ... 0.168, ... 0.126, ... 0.009, ... 0.045, ... 0.126, ... 0.252, ... 0.0224, ... 0.0056, ... 0.06, ... 0.036, ... 0.024, ... ], ... ) >>> prob.check_independence(event1=["I"], event2=["D"]) True >>> prob.check_independence(["I"], ["D"], [("G", 1)]) # Conditioning over G_1 False >>> # Conditioning over random variable G >>> prob.check_independence( ... ["I"], ["D"], ("G",), condition_random_variable=True ... ) False """ JPD = self.copy() if isinstance(event1, str): raise TypeError("Event 1 should be a list or array-like structure") if isinstance(event2, str): raise TypeError("Event 2 should be a list or array-like structure") if event3: if isinstance(event3, str): raise TypeError("Event 3 cannot of type string") elif condition_random_variable: if not all(isinstance(var, str) for var in event3): raise TypeError("event3 should be a 1d list of strings") event3 = list(event3) # Using the definition of conditional independence # If P(X,Y|Z) = P(X|Z)*P(Y|Z) # This can be expanded to P(X,Y,Z)*P(Z) == P(X,Z)*P(Y,Z) phi_z = JPD.marginal_distribution(event3, inplace=False).to_factor() for variable_pair in itertools.product(event1, event2): phi_xyz = JPD.marginal_distribution( event3 + list(variable_pair), inplace=False ).to_factor() phi_xz = JPD.marginal_distribution( event3 + [variable_pair[0]], inplace=False ).to_factor() phi_yz = JPD.marginal_distribution( event3 + [variable_pair[1]], inplace=False ).to_factor() if phi_xyz * phi_z != phi_xz * phi_yz: return False return True else: JPD.conditional_distribution(event3) for variable_pair in itertools.product(event1, event2): if JPD.marginal_distribution( variable_pair, inplace=False ) != JPD.marginal_distribution( variable_pair[0], inplace=False ) * JPD.marginal_distribution( variable_pair[1], inplace=False ): return False return True
[docs] def get_independencies(self, condition=None): """ Returns the independent variables in the joint probability distribution. Returns marginally independent variables if condition=None. Returns conditionally independent variables if condition!=None Parameters ---------- condition: array_like Random Variable on which to condition the Joint Probability Distribution. Examples -------- >>> import numpy as np >>> from pgmpy.factors.discrete import JointProbabilityDistribution >>> prob = JointProbabilityDistribution( ... variables=["x1", "x2", "x3"], ... cardinality=[2, 3, 2], ... values=np.ones(12) / 12, ... ) >>> prob.get_independencies() (x1 \u27c2 x2) (x1 \u27c2 x3) (x2 \u27c2 x3) """ JPD = self.copy() if condition: JPD.conditional_distribution(condition) independencies = Independencies() for variable_pair in itertools.combinations(list(JPD.variables), 2): if JPD.marginal_distribution( variable_pair, inplace=False ) == JPD.marginal_distribution( variable_pair[0], inplace=False ) * JPD.marginal_distribution( variable_pair[1], inplace=False ): independencies.add_assertions(variable_pair) return independencies
[docs] def conditional_distribution(self, values, inplace=True): """ Returns Conditional Probability Distribution after setting values to 1. Parameters ---------- values: list or array_like A list of tuples of the form (variable_name, variable_state). The values on which to condition the Joint Probability Distribution. inplace: Boolean (default True) If False returns a new instance of JointProbabilityDistribution Examples -------- >>> import numpy as np >>> from pgmpy.factors.discrete import JointProbabilityDistribution >>> prob = JointProbabilityDistribution( ... variables=["x1", "x2", "x3"], ... cardinality=[2, 2, 2], ... values=np.ones(8) / 8, ... ) >>> prob.conditional_distribution(values=[("x1", 1)]) >>> print(prob) +-------+-------+------------+ | x2 | x3 | P(x2,x3) | +=======+=======+============+ | x2(0) | x3(0) | 0.2500 | +-------+-------+------------+ | x2(0) | x3(1) | 0.2500 | +-------+-------+------------+ | x2(1) | x3(0) | 0.2500 | +-------+-------+------------+ | x2(1) | x3(1) | 0.2500 | +-------+-------+------------+ """ JPD = self if inplace else self.copy() JPD.reduce(values) JPD.normalize() if not inplace: return JPD
[docs] def copy(self): """ Returns A copy of JointProbabilityDistribution object Examples --------- >>> import numpy as np >>> from pgmpy.factors.discrete import JointProbabilityDistribution >>> prob = JointProbabilityDistribution( ... variables=["x1", "x2", "x3"], ... cardinality=[2, 3, 2], ... values=np.ones(12) / 12, ... ) >>> prob_copy = prob.copy() >>> (prob_copy.values == prob.values).all() np.True_ >>> prob_copy.variables == prob.variables True >>> prob_copy.variables[1] = "y" >>> prob_copy.variables == prob.variables False """ return JointProbabilityDistribution(self.scope(), self.cardinality, self.values)
[docs] def minimal_imap(self, order): """ Returns a Bayesian Model which is minimal IMap of the Joint Probability Distribution considering the order of the variables. Parameters ---------- order: array-like The order of the random variables. Examples -------- >>> import numpy as np >>> from pgmpy.factors.discrete import JointProbabilityDistribution >>> prob = JointProbabilityDistribution( ... variables=["x1", "x2", "x3"], ... cardinality=[2, 3, 2], ... values=np.ones(12) / 12, ... ) >>> bayesian_model = prob.minimal_imap(order=["x2", "x1", "x3"]) >>> bayesian_model <pgmpy.models.DiscreteBayesianNetwork.DiscreteBayesianNetwork object at 0x...> >>> bayesian_model.edges() OutEdgeView([('x2', 'x3'), ('x1', 'x3')]) """ from pgmpy.models import DiscreteBayesianNetwork def get_subsets(u): for r in range(len(u) + 1): for i in itertools.combinations(u, r): yield i G = DiscreteBayesianNetwork() for variable_index in range(len(order)): u = order[:variable_index] for subset in get_subsets(u): if len(subset) < len(u) and self.check_independence( [order[variable_index]], set(u) - set(subset), subset, True ): G.add_edges_from( [(variable, order[variable_index]) for variable in subset] ) return G
[docs] def is_imap(self, model): """ Checks whether the given DiscreteBayesianNetwork is Imap of JointProbabilityDistribution Parameters ---------- model : An instance of DiscreteBayesianNetwork Class, for which you want to check the Imap Returns ------- Is IMAP: bool True if given Bayesian Network is Imap for Joint Probability Distribution False otherwise Examples -------- >>> from pgmpy.models import DiscreteBayesianNetwork >>> from pgmpy.factors.discrete import TabularCPD >>> from pgmpy.factors.discrete import JointProbabilityDistribution >>> bm = DiscreteBayesianNetwork([("diff", "grade"), ("intel", "grade")]) >>> diff_cpd = TabularCPD("diff", 2, [[0.2], [0.8]]) >>> intel_cpd = TabularCPD("intel", 3, [[0.5], [0.3], [0.2]]) >>> grade_cpd = TabularCPD( ... "grade", ... 3, ... [ ... [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], ... [0.1, 0.1, 0.1, 0.1, 0.1, 0.1], ... [0.8, 0.8, 0.8, 0.8, 0.8, 0.8], ... ], ... evidence=["diff", "intel"], ... evidence_card=[2, 3], ... ) >>> bm.add_cpds(diff_cpd, intel_cpd, grade_cpd) >>> val = [ ... 0.01, ... 0.01, ... 0.08, ... 0.006, ... 0.006, ... 0.048, ... 0.004, ... 0.004, ... 0.032, ... 0.04, ... 0.04, ... 0.32, ... 0.024, ... 0.024, ... 0.192, ... 0.016, ... 0.016, ... 0.128, ... ] >>> JPD = JointProbabilityDistribution( ... ["diff", "intel", "grade"], [2, 3, 3], val ... ) >>> JPD.is_imap(bm) True """ from pgmpy.models import DiscreteBayesianNetwork if not isinstance(model, DiscreteBayesianNetwork): raise TypeError("model must be an instance of DiscreteBayesianNetwork") factors = [cpd.to_factor() for cpd in model.get_cpds()] factor_prod = reduce(mul, factors) JPD_fact = DiscreteFactor(self.variables, self.cardinality, self.values) if JPD_fact == factor_prod: return True else: return False
[docs] def to_factor(self): """ Returns JointProbabilityDistribution as a DiscreteFactor object Examples -------- >>> import numpy as np >>> from pgmpy.factors.discrete import JointProbabilityDistribution >>> prob = JointProbabilityDistribution( ... ["x1", "x2", "x3"], [2, 3, 2], np.ones(12) / 12 ... ) >>> phi = prob.to_factor() >>> type(phi) <class 'pgmpy.factors.discrete.DiscreteFactor.DiscreteFactor'> """ return DiscreteFactor(self.variables, self.cardinality, self.values)
def pmap(self): pass