Source code for pgmpy.structure_score.aic_gauss
from pgmpy.structure_score.log_likelihood_gauss import LogLikelihoodGauss
[docs]
class AICGauss(LogLikelihoodGauss):
r"""
AIC structure score for Gaussian Bayesian networks.
This score penalizes the Gaussian log-likelihood using a sample-size independent complexity term. The local score
is defined as:
.. math::
\operatorname{AIC}(X_i, \Pi_i) = \ell(X_i, \Pi_i) - d_i,
where :math:`\ell(X_i, \Pi_i)` is the fitted Gaussian log-likelihood and :math:`d_i = \text{df\_model} + 2` is the
effective parameter count used by the implementation.
Here `df_model` is the statsmodels degree-of-freedom count for the fitted regressors and excludes the intercept. The
additional `+ 2` accounts for one intercept parameter and one Gaussian variance parameter.
Parameters
----------
data : pandas.DataFrame
DataFrame where each column represents a continuous variable.
state_names : dict, optional
Accepted for API consistency but not typically used for Gaussian networks.
Examples
--------
>>> import numpy as np
>>> import pandas as pd
>>> from pgmpy.structure_score import AICGauss
>>> rng = np.random.default_rng(0)
>>> data = pd.DataFrame(
... {
... "A": rng.normal(size=100),
... "B": rng.normal(size=100),
... "C": rng.normal(size=100),
... }
... )
>>> score = AICGauss(data)
>>> round(score.local_score("B", ("A", "C")), 3)
np.float64(-141.16)
Raises
------
ValueError
If the model cannot be fitted because the data contains incompatible or non-numeric
variables.
"""
_tags = {
"name": "aic-g",
"supported_datatype": "continuous",
"default_for": None,
"is_parameteric": False,
}
def __init__(self, data, state_names=None):
super().__init__(data, state_names=state_names)
def _local_score(self, variable: str, parents: tuple[str, ...]) -> float:
ll, df_model = self._log_likelihood(variable=variable, parents=parents)
return ll - (df_model + 2)