Source code for pgmpy.structure_score.bic_gauss

import numpy as np

from pgmpy.structure_score.log_likelihood_gauss import LogLikelihoodGauss


[docs] class BICGauss(LogLikelihoodGauss): r""" BIC structure score for Gaussian Bayesian networks. This score penalizes the Gaussian log-likelihood to discourage overfitting. The local score is computed as: .. math:: \operatorname{BIC}(X_i, \Pi_i) = \ell(X_i, \Pi_i) - \frac{d_i}{2} \log n, where :math:`\ell(X_i, \Pi_i)` is the fitted Gaussian log-likelihood, :math:`d_i = \text{df\_model} + 2` is the effective parameter count used by the implementation, and :math:`n` is the number of rows in `self.data`. Here `df_model` is the statsmodels degree-of-freedom count for the fitted regressors and excludes the intercept. The additional `+ 2` accounts for one intercept parameter and one Gaussian variance parameter. Parameters ---------- data : pandas.DataFrame DataFrame where each column represents a continuous variable. state_names : dict, optional Accepted for API consistency but not typically used for Gaussian networks. Examples -------- >>> import numpy as np >>> import pandas as pd >>> from pgmpy.structure_score import BICGauss >>> rng = np.random.default_rng(0) >>> data = pd.DataFrame( ... { ... "A": rng.normal(size=100), ... "B": rng.normal(size=100), ... "C": rng.normal(size=100), ... } ... ) >>> score = BICGauss(data) >>> round(score.local_score("B", ("A", "C")), 3) np.float64(-146.37) Raises ------ ValueError If the model cannot be fitted because the data contains incompatible or non-numeric variables. """ _tags = { "name": "bic-g", "supported_datatype": "continuous", "default_for": "continuous", "is_parameteric": False, } def __init__(self, data, state_names=None): super().__init__(data, state_names=state_names) def _local_score(self, variable: str, parents: tuple[str, ...]) -> float: ll, df_model = self._log_likelihood(variable=variable, parents=parents) return ll - (((df_model + 2) / 2) * np.log(self.data.shape[0]))