Source code for pgmpy.structure_score.log_likelihood_gauss
import statsmodels.formula.api as smf
from pgmpy.structure_score._base import BaseStructureScore
[docs]
class LogLikelihoodGauss(BaseStructureScore):
r"""
Log-likelihood structure score for Gaussian Bayesian networks.
This score evaluates a continuous Bayesian network structure by fitting a Gaussian GLM for each local family and
returning the fitted log-likelihood. The local score is computed as:
.. math::
X_i = \beta_0 + \beta^\top \Pi_i + \varepsilon_i, \qquad \varepsilon_i \sim \mathcal{N}(0, \sigma_i^2),
and returns
.. math::
\ell(X_i, \Pi_i) = \log p(x_i \mid \hat{\beta}_0, \hat{\beta}, \hat{\sigma}_i^2, \Pi_i).
If `parents` is empty, the fitted model reduces to :math:`X_i = \beta_0 + \varepsilon_i`.
Parameters
----------
data : pandas.DataFrame
DataFrame where each column represents a continuous variable.
state_names : dict, optional
Accepted for API consistency but not typically used for Gaussian networks.
Examples
--------
>>> import numpy as np
>>> import pandas as pd
>>> from pgmpy.structure_score import LogLikelihoodGauss
>>> rng = np.random.default_rng(0)
>>> data = pd.DataFrame(
... {
... "A": rng.normal(size=100),
... "B": rng.normal(size=100),
... "C": rng.normal(size=100),
... }
... )
>>> score = LogLikelihoodGauss(data)
>>> round(score.local_score("B", ("A", "C")), 3)
np.float64(-137.16)
Raises
------
ValueError
If the model cannot be fitted because the data contains incompatible or non-numeric variables.
"""
_tags = {
"name": "ll-g",
"supported_datatype": "continuous",
"default_for": None,
"is_parameteric": False,
}
def __init__(self, data, state_names=None):
super().__init__(data, state_names=state_names)
def _log_likelihood(self, variable: str, parents: tuple[str, ...]) -> tuple[float, float]:
if len(parents) == 0:
glm_model = smf.glm(formula=f"{variable} ~ 1", data=self.data).fit()
else:
glm_model = smf.glm(formula=f"{variable} ~ {' + '.join(parents)}", data=self.data).fit()
return (glm_model.llf, glm_model.df_model)
def _local_score(self, variable: str, parents: tuple[str, ...]) -> float:
ll, _ = self._log_likelihood(variable=variable, parents=parents)
return ll