Source code for pgmpy.estimators.StructureScore

#!/usr/bin/env python
from math import lgamma, log
from typing import Optional, Tuple, Union

import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
from scipy.special import gammaln
from scipy.stats import multivariate_normal

from pgmpy.estimators import BaseEstimator
from pgmpy.utils import get_dataset_type


def get_scoring_method(
    scoring_method: Optional[Union[str, "StructureScore"]],
    data: pd.DataFrame,
    use_cache: bool,
) -> Tuple["StructureScore", "StructureScore"]:
    available_methods = {
        "continuous": {
            "ll-g": LogLikelihoodGauss,
            "aic-g": AICGauss,
            "bic-g": BICGauss,
        },
        "discrete": {
            "k2": K2,
            "bdeu": BDeu,
            "bds": BDs,
            "bic-d": BIC,
            "aic-d": AIC,
        },
        "mixed": {
            "ll-cg": LogLikelihoodCondGauss,
            "aic-cg": AICCondGauss,
            "bic-cg": BICCondGauss,
        },
    }
    all_available_methods = [
        key for subdict in available_methods.values() for key in subdict.keys()
    ]

    var_type = get_dataset_type(data)
    supported_methods = available_methods[var_type] | available_methods["mixed"]

    if isinstance(scoring_method, str):
        if scoring_method.lower() in [
            "k2score",
            "bdeuscore",
            "bdsscore",
            "bicscore",
            "aicscore",
        ]:
            raise ValueError(
                "The scoring method names have been changed. Please refer the documentation."
            )
        elif scoring_method.lower() not in list(all_available_methods):
            raise ValueError(
                "Unknown scoring method. Please refer documentation for a list of supported score metrics."
            )
        elif scoring_method.lower() not in list(supported_methods.keys()):
            raise ValueError(
                f"Incorrect scoring method for {var_type}, scoring_method should be one of"
                f"{list(supported_methods.keys())}, received {scoring_method}. {data.dtypes.unique()}"
            )
    elif isinstance(scoring_method, type(None)):
        # automatically determine scoring method, pick first one
        scoring_method = list(available_methods[var_type].keys())[0]

    elif not isinstance(scoring_method, StructureScore):
        raise ValueError(
            f"scoring_method should either be one of {all_available_methods} or an instance of StructureScore"
        )

    score: StructureScore
    if isinstance(scoring_method, str):
        score = supported_methods[scoring_method.lower()](data=data)
    else:
        score = scoring_method

    if use_cache:
        from pgmpy.estimators.ScoreCache import ScoreCache

        score_c = ScoreCache(score, data)
    else:
        score_c = score

    return score, score_c


class StructureScore(BaseEstimator):
    """
    Abstract base class for structure scoring in pgmpy.

    Structure scores are used to evaluate how well a given Bayesian network structure
    fits observed data. This class should not be used directly. Use one of the derived
    classes such as `K2`, `BDeu`, `BIC`, or `AIC` for concrete scoring methods.

    Structure scores are central to model selection in Bayesian networks and are
    particularly useful when comparing candidate network structures in discrete data scenarios.

    Parameters
    ----------
    data : pandas.DataFrame
        DataFrame in which each column represents a variable. Missing values should
        be marked as `numpy.nan`. Note: Columns with `numpy.nan` will have dtype `float`.

    state_names : dict, optional
        Dictionary mapping each variable name to the set of its discrete states.
        If not specified, the observed values in the data are used as possible states.

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> from pgmpy.models import DiscreteBayesianNetwork
    >>> from pgmpy.estimators import K2
    >>> # Create random data sample with 3 variables, where B and C are identical:
    >>> data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)), columns=list("AB"))
    >>> data["C"] = data["B"]
    >>> model1 = DiscreteBayesianNetwork([["A", "B"], ["A", "C"]])
    >>> model2 = DiscreteBayesianNetwork([["A", "B"], ["B", "C"]])
    >>> K2(data).score(model1)
    -24242.367348745247
    >>> K2(data).score(model2)
    -16273.793897051042

    Notes
    -----
    - Use this class as a base for implementing custom structure scores.
    - Use derived classes (`K2`, `BDeu`, `BIC`, `AIC`) for standard scoring approaches.
    - If you provide data with continuous variables or incompatible states, a `ValueError` may be raised.
    - For best results, ensure all variables are discrete and states are correctly specified.

    Raises
    ------
    ValueError
        If data contains unsupported (non-discrete) types, or if the variables
        in the model do not match the data columns.

    References
    ----------
    Koller & Friedman, Probabilistic Graphical Models: Principles and Techniques, 2009, Section 18.3.
    """

    def __init__(self, data, **kwargs):
        super(StructureScore, self).__init__(data, **kwargs)

    def score(self, model):
        """
        Computes a structure score for a given Bayesian network model.

        This method evaluates how well the specified `DiscreteBayesianNetwork`
        fits the observed data, using the structure score metric implemented in the subclass.
        The higher (or less negative) the score, the better the fit between the model and the data.

        Parameters
        ----------
        model : DiscreteBayesianNetwork
            The Bayesian network whose structure is to be scored. All nodes in the
            model must correspond to columns in the input data.

        Returns
        -------
        score : float
            The computed structure score representing the model's fit to the data.

        Examples
        --------
        >>> import pandas as pd
        >>> import numpy as np
        >>> from pgmpy.models import DiscreteBayesianNetwork
        >>> from pgmpy.estimators import K2
        >>> # create random data sample with 3 variables, where B and C are identical:
        >>> data = pd.DataFrame(
        ...     np.random.randint(0, 5, size=(5000, 2)), columns=list("AB")
        ... )
        >>> data["C"] = data["B"]
        >>> K2(data).score(DiscreteBayesianNetwork([["A", "B"], ["A", "C"]]))
        -24242.367348745247
        >>> K2(data).score(DiscreteBayesianNetwork([["A", "B"], ["B", "C"]]))
        -16273.793897051042

        Raises
        ------
        ValueError
            If the model contains nodes not present in the data columns, or if the
            data contains unsupported variable types.
        """

        score = 0
        for node in model.nodes():
            score += self.local_score(node, list(model.predecessors(node)))
        score += self.structure_prior(model)
        return score

    def structure_prior(self, model):
        """
        Computes the (log) prior distribution over Bayesian network structures.

        This method returns a uniform prior by default and is currently unused in scoring.
        Override this method in subclasses to implement custom prior distributions
        over network structures.

        Parameters
        ----------
        model : DiscreteBayesianNetwork
            The Bayesian network model for which the structure prior is to be computed.

        Returns
        -------
        prior : float
            The log prior probability of the given model structure. By default, returns 0.

        Examples
        --------
        >>> from pgmpy.models import DiscreteBayesianNetwork
        >>> from pgmpy.estimators import K2
        >>> model = DiscreteBayesianNetwork([("A", "B")])
        >>> score = K2(data)
        >>> prior = score.structure_prior(model)
        >>> print(prior)
        0
        """
        return 0

    def structure_prior_ratio(self, operation):
        """
        Computes the log ratio of prior probabilities for a proposed change to the model structure.

        This method returns the log prior probability ratio for a structural operation
        (e.g., adding, removing, or reversing an edge) in the Bayesian network. By default,
        it assumes a uniform prior and returns 0, meaning no structural operation is favored.

        Parameters
        ----------
        operation : tuple or object
            The proposed operation on the Directed Acyclic Graph (DAG), typically represented as a tuple
            describing the change (such as ('add', 'A', 'B') for adding an edge from A to B).

        Returns
        -------
        prior_ratio : float
            The log ratio of the prior probabilities for the proposed operation. By default, returns 0.

        Examples
        --------
        >>> from pgmpy.estimators import K2
        >>> op = ("add", "A", "B")  # Example operation
        >>> score = K2(data)
        >>> ratio = score.structure_prior_ratio(op)
        >>> print(ratio)
        0
        """
        return 0



[docs]
class K2(StructureScore):
    """
    K2 structure score for discrete Bayesian networks using Dirichlet priors.

    The K2 score is commonly used to evaluate the fit of a Bayesian network structure
    on fully discrete data, assuming all Dirichlet hyperparameters (pseudo-counts) are set to 1.
    This metric is suitable for structure learning when variables are categorical and no
    prior preference for particular parameterizations is assumed.

    Parameters
    ----------
    data : pandas.DataFrame
        DataFrame where each column represents a discrete variable. Missing values
        should be set to `numpy.nan`. (Note: pandas will convert columns with `numpy.nan` to dtype float.)
    state_names : dict, optional
        Dictionary mapping each variable to its discrete states. If not specified, the unique
        values observed in the data are used as possible states.

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> from pgmpy.models import DiscreteBayesianNetwork
    >>> from pgmpy.estimators import K2
    >>> data = pd.DataFrame({"A": [0, 1, 1, 0], "B": [1, 0, 1, 0], "C": [1, 1, 1, 0]})
    >>> model = DiscreteBayesianNetwork([("A", "B"), ("A", "C")])
    >>> k2_score = K2(data)
    >>> print(k2_score.score(model))
    -356.1785

    Raises
    ------
    ValueError
        If the data contains continuous variables, or if the model variables are not present in the data.

    References
    ----------
    [1] Koller & Friedman, Probabilistic Graphical Models - Principles and Techniques, 2009,
        Section 18.3.4–18.3.6 (esp. page 806).
    [2] AM Carvalho, Scoring functions for learning Bayesian networks,
        http://www.lx.it.pt/~asmc/pub/talks/09-TA/ta_pres.pdf
    """

    def __init__(self, data, **kwargs):
        super(K2, self).__init__(data, **kwargs)


[docs]
    def local_score(self, variable, parents):
        """
        Computes the local K2 score for a discrete variable and its parent variables.

        The K2 local score measures how well the conditional probability distribution
        of `variable` given its parents fits the observed data, assuming uniform Dirichlet
        priors (all hyperparameters set to 1). The calculation is based on marginal and
        conditional counts, and is suitable for fully discrete Bayesian networks.

        Parameters
        ----------
        variable : str
            The name of the target variable (child node).
        parents : list of str
            List of parent variable names (categorical/discrete).

        Returns
        -------
        score : float
            The local K2 score for the specified variable and parent configuration.

        Examples
        --------
        >>> variable = "B"
        >>> parents = ["A"]
        >>> s = k2_score.local_score(variable, parents)
        >>> print(s)
        -42.18

        Raises
        ------
        ValueError
            If `variable` or any parent is not present in `state_names` or data, or if the data
            is not fully discrete.

        References
        ----------
        [1] Koller & Friedman, Probabilistic Graphical Models - Principles and Techniques, 2009,
            Section 18.3.4–18.3.6 (esp. page 806).
        """

        var_states = self.state_names[variable]
        var_cardinality = len(var_states)
        parents = list(parents)
        state_counts = self.state_counts(variable, parents, reindex=False)
        num_parents_states = np.prod([len(self.state_names[var]) for var in parents])

        counts = np.asarray(state_counts)
        log_gamma_counts = np.zeros_like(counts, dtype=float)

        # Compute log(gamma(counts + 1))
        gammaln(counts + 1, out=log_gamma_counts)

        # Compute the log-gamma conditional sample size
        log_gamma_conds = np.sum(counts, axis=0, dtype=float)
        gammaln(log_gamma_conds + var_cardinality, out=log_gamma_conds)

        # TODO: Check why is this needed
        #
        # Adjustments when using reindex=False as it drops columns of 0 state counts
        # gamma_counts_adj = (
        #     (num_parents_states - counts.shape[1]) * var_cardinality * gammaln(1)
        # )
        # gamma_conds_adj = (num_parents_states - counts.shape[1]) * gammaln(
        #     var_cardinality
        # )
        # log_gamma_counts += gamma_counts_adj
        # log_gamma_conds += gamma_conds_adj

        score = (
            np.sum(log_gamma_counts)
            - np.sum(log_gamma_conds)
            + num_parents_states * lgamma(var_cardinality)
        )

        return score





[docs]
class BDeu(StructureScore):
    """
    BDeu structure score for discrete Bayesian networks with Dirichlet priors.

    The BDeu score evaluates Bayesian network structures using an "equivalent sample size"
    to define Dirichlet prior hyperparameters, making it flexible for various data sizes
    and uncertainty levels. Use this score when you want to control the influence of your prior
    belief through the equivalent sample size.

    Parameters
    ----------
    data : pandas.DataFrame
        DataFrame where each column represents a discrete variable.
        Missing values should be set as `numpy.nan`.
        Note: pandas converts such columns to dtype float.
    equivalent_sample_size : int, optional (default: 10)
        The equivalent (imaginary) sample size for the Dirichlet hyperparameters.
        The score is sensitive to this value; experiment with different values as needed.
    state_names : dict, optional
        Dictionary mapping variable names to their discrete states.
        If not specified, unique values observed in the data are used as possible states.

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> from pgmpy.models import DiscreteBayesianNetwork
    >>> from pgmpy.estimators import BDeu
    >>> data = pd.DataFrame({"A": [0, 1, 1, 0], "B": [1, 0, 1, 0], "C": [1, 1, 1, 0]})
    >>> model = DiscreteBayesianNetwork([("A", "B"), ("A", "C")])
    >>> bdeu_score = BDeu(data, equivalent_sample_size=5)
    >>> print(bdeu_score.score(model))
    -241.872

    Raises
    ------
    ValueError
        If the data contains continuous variables, or if the model variables are not present in the data.

    References
    ----------
    [1] Koller & Friedman, Probabilistic Graphical Models - Principles and Techniques, 2009,
        Section 18.3.4–18.3.6 (esp. page 806).
    [2] AM Carvalho, Scoring functions for learning Bayesian networks,
        http://www.lx.it.pt/~asmc/pub/talks/09-TA/ta_pres.pdf
    """

    def __init__(self, data, equivalent_sample_size=10, **kwargs):
        self.equivalent_sample_size = equivalent_sample_size
        super(BDeu, self).__init__(data, **kwargs)


[docs]
    def local_score(self, variable, parents):
        """
        Computes the local BDeu score for a given variable and its parent variables.

        This method calculates how well a given variable is explained by its parents
        according to the BDeu scoring metric, incorporating the equivalent sample size
        as the Dirichlet prior.

        Parameters
        ----------
        variable : str
            The name of the variable for which the local score is to be computed.
        parents : list of str
            List of variable names considered as parents of `variable`.

        Returns
        -------
        score : float
            The local BDeu score for the specified variable and parent configuration.

        Raises
        ------
        ValueError
            If `variable` or any parent is not found in state_names or data.
        """

        parents = list(parents)
        state_counts = self.state_counts(variable, parents, reindex=False)
        num_parents_states = np.prod([len(self.state_names[var]) for var in parents])

        counts = np.asarray(state_counts)
        # The counts_size reflects the full possible table, including dropped zero-count columns.
        counts_size = num_parents_states * len(self.state_names[variable])
        log_gamma_counts = np.zeros_like(counts, dtype=float)
        alpha = self.equivalent_sample_size / num_parents_states
        beta = self.equivalent_sample_size / counts_size
        # Compute log(gamma(counts + beta)) for the observed state counts.
        gammaln(counts + beta, out=log_gamma_counts)

        # Compute the log-gamma of the conditional sample size.
        log_gamma_conds = np.sum(counts, axis=0, dtype=float)
        gammaln(log_gamma_conds + alpha, out=log_gamma_conds)

        # Adjustment for missing zero-count columns (when using reindex=False to save memory).
        gamma_counts_adj = (
            (num_parents_states - counts.shape[1])
            * len(self.state_names[variable])
            * gammaln(beta)
        )
        gamma_conds_adj = (num_parents_states - counts.shape[1]) * gammaln(alpha)

        # Final BDeu local score calculation.
        score = (
            (np.sum(log_gamma_counts) + gamma_counts_adj)
            - (np.sum(log_gamma_conds) + gamma_conds_adj)
            + num_parents_states * lgamma(alpha)
            - counts_size * lgamma(beta)
        )
        return score





[docs]
class BDs(BDeu):
    """
    BDs (Bayesian Dirichlet sparse) structure score for discrete Bayesian networks.

    The BDs score is a variant of the BDeu score that sets Dirichlet hyperparameters
    (pseudo-counts) proportional to the number of observed parent configurations,
    leading to improved scoring in sparse or partially observed data scenarios.

    Use this score when you expect many possible parent configurations in your data
    to be unobserved (common in sparse or high-dimensional discrete datasets).

    Parameters
    ----------
    data : pandas.DataFrame
        DataFrame where each column represents a discrete variable.
        Missing values should be set as `numpy.nan`.
        Note: pandas converts such columns to dtype float.
    equivalent_sample_size : int, optional (default: 10)
        The equivalent (imaginary) sample size for the Dirichlet hyperparameters.
        The score is sensitive to this value; try different values if needed.
    state_names : dict, optional
        Dictionary mapping variable names to their discrete states.
        If not specified, unique values observed in the data are used as possible states.

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> from pgmpy.models import DiscreteBayesianNetwork
    >>> from pgmpy.estimators import BDs
    >>> data = pd.DataFrame({"A": [0, 1, 1, 0], "B": [1, 0, 1, 0], "C": [1, 1, 1, 0]})
    >>> model = DiscreteBayesianNetwork([("A", "B"), ("A", "C")])
    >>> bds_score = BDs(data, equivalent_sample_size=5)
    >>> print(bds_score.score(model))
    -210.314

    Raises
    ------
    ValueError
        If the data contains continuous variables, or if the model variables are not present in the data.

    References
    ----------
    [1] Scutari, Marco. An Empirical-Bayes Score for Discrete Bayesian Networks.
        Journal of Machine Learning Research, 2016, pp. 438–48
    """

    def __init__(self, data, equivalent_sample_size=10, **kwargs):
        super(BDs, self).__init__(data, equivalent_sample_size, **kwargs)


[docs]
    def structure_prior_ratio(self, operation):
        """
        Computes the log ratio of prior probabilities for a proposed change to the DAG structure.

        This method implements the marginal uniform prior for the graph structure, where the
        log prior probability ratio is -log(2) for adding an edge, log(2) for removing an edge,
        and 0 otherwise.

        Parameters
        ----------
        operation : str
            The proposed operation on the Directed Acyclic Graph (DAG).
            Use "+" for adding an edge, "-" for removing an edge, or other values for no change.

        Returns
        -------
        prior_ratio : float
            The log ratio of the prior probabilities for the proposed operation.

        Examples
        --------
        >>> from pgmpy.estimators import BDs
        >>> score = BDs(data)
        >>> score.structure_prior_ratio("+")
        -0.6931471805599453
        >>> score.structure_prior_ratio("-")
        0.6931471805599453
        >>> score.structure_prior_ratio("noop")
        0
        """
        if operation == "+":
            return -log(2.0)
        if operation == "-":
            return log(2.0)
        return 0



[docs]
    def structure_prior(self, model):
        """
        Computes the marginal uniform prior for a Bayesian network structure.

        This method assigns a marginal uniform prior to the graph structure, where
        the probability of an arc (edge) between any two nodes (in either direction) is 1/4,
        and the probability of no arc between any two nodes is 1/2. The returned value
        is the log prior probability for the given model structure.

        Parameters
        ----------
        model : DiscreteBayesianNetwork
            The Bayesian network model for which to compute the structure prior.

        Returns
        -------
        score : float
            The log prior probability of the given network structure under the marginal uniform prior.

        Examples
        --------
        >>> from pgmpy.models import DiscreteBayesianNetwork
        >>> from pgmpy.estimators import BDs
        >>> model = DiscreteBayesianNetwork([("A", "B"), ("C", "D")])
        >>> score = BDs(data)
        >>> prior = score.structure_prior(model)
        >>> print(prior)
        -4.1588830833596715
        """
        nedges = float(len(model.edges()))
        nnodes = float(len(model.nodes()))
        possible_edges = nnodes * (nnodes - 1) / 2.0
        score = -(nedges + possible_edges) * log(2.0)
        return score



[docs]
    def local_score(self, variable, parents):
        """
        Computes the local BDs score for a variable and its parent variables.

        The BDs local score quantifies how well the given variable is explained by its
        specified parent set, using a Bayesian Dirichlet sparse prior. The hyperparameters
        are adjusted based on the number of observed parent configurations, making the score
        more robust in sparse data scenarios.

        Parameters
        ----------
        variable : str
            The name of the variable (node) for which the local score is to be computed.
        parents : list of str
            List of variable names considered as parents of `variable`.

        Returns
        -------
        score : float
            The local BDs score for the specified variable and parent configuration.

        Examples
        --------
        >>> variable = "B"
        >>> parents = ["A"]
        >>> score = bds_score.local_score(variable, parents)
        >>> print(score)
        -38.215

        Raises
        ------
        ValueError
            If `variable` or any parent is not present in `state_names` or data, or if
            the data contains unsupported types (e.g., continuous values).
        """

        parents = list(parents)
        state_counts = self.state_counts(variable, parents, reindex=False)
        num_parents_states = np.prod([len(self.state_names[var]) for var in parents])

        counts = np.asarray(state_counts)
        # counts size is different because reindex=False is dropping columns.
        counts_size = num_parents_states * len(self.state_names[variable])
        log_gamma_counts = np.zeros_like(counts, dtype=float)
        alpha = self.equivalent_sample_size / state_counts.shape[1]
        beta = self.equivalent_sample_size / counts_size
        # Compute log(gamma(counts + beta))
        gammaln(counts + beta, out=log_gamma_counts)

        # Compute the log-gamma conditional sample size
        log_gamma_conds = np.sum(counts, axis=0, dtype=float)
        gammaln(log_gamma_conds + alpha, out=log_gamma_conds)

        # Adjustment because of missing 0 columns when using reindex=False for computing state_counts to save memory.
        gamma_counts_adj = (
            (num_parents_states - counts.shape[1])
            * len(self.state_names[variable])
            * gammaln(beta)
        )
        gamma_conds_adj = (num_parents_states - counts.shape[1]) * gammaln(alpha)

        score = (
            (np.sum(log_gamma_counts) + gamma_counts_adj)
            - (np.sum(log_gamma_conds) + gamma_conds_adj)
            + state_counts.shape[1] * lgamma(alpha)
            - counts_size * lgamma(beta)
        )
        return score





[docs]
class BIC(StructureScore):
    """
    BIC (Bayesian Information Criterion) structure score for discrete Bayesian networks.

    The BIC score, also known as the Minimal Descriptive Length (MDL) score, evaluates
    Bayesian network structures using a log-likelihood term with a complexity penalty to
    discourage overfitting. Use this score for structure learning when you want to balance
    model fit with simplicity.

    Parameters
    ----------
    data : pandas.DataFrame
        DataFrame where each column represents a discrete variable.
        Missing values should be set as `numpy.nan`.
        Note: pandas converts such columns to dtype float.
    state_names : dict, optional
        Dictionary mapping variable names to their discrete states.
        If not specified, unique values observed in the data are used as possible states.

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> from pgmpy.models import DiscreteBayesianNetwork
    >>> from pgmpy.estimators import BIC
    >>> data = pd.DataFrame({"A": [0, 1, 1, 0], "B": [1, 0, 1, 0], "C": [1, 1, 1, 0]})
    >>> model = DiscreteBayesianNetwork([("A", "B"), ("A", "C")])
    >>> bic_score = BIC(data)
    >>> print(bic_score.score(model))
    -151.47

    Raises
    ------
    ValueError
        If the data contains continuous variables, or if the model variables are not present in the data.

    References
    ----------
    [1] Koller & Friedman, Probabilistic Graphical Models - Principles and Techniques, 2009,
        Section 18.3.4–18.3.6 (esp. page 802).
    [2] AM Carvalho, Scoring functions for learning Bayesian networks,
        http://www.lx.it.pt/~asmc/pub/talks/09-TA/ta_pres.pdf
    """

    def __init__(self, data, **kwargs):
        super(BIC, self).__init__(data, **kwargs)


[docs]
    def local_score(self, variable, parents):
        """
        Computes the local BIC/MDL score for a variable and its parent variables.

        This method quantifies the fit of a variable to its parent set in the network,
        balancing log-likelihood with a complexity penalty to discourage overfitting.

        Parameters
        ----------
        variable : str
            The name of the variable (node) for which the local score is to be computed.
        parents : list of str
            List of variable names considered as parents of `variable`.

        Returns
        -------
        score : float
            The local BIC score for the specified variable and parent configuration.

        Examples
        --------
        >>> variable = "B"
        >>> parents = ["A"]
        >>> score = bic_score.local_score(variable, parents)
        >>> print(score)
        -19.315

        Raises
        ------
        ValueError
            If `variable` or any parent is not present in `state_names` or data, or if
            the data contains unsupported types (e.g., continuous values).
        """

        var_states = self.state_names[variable]
        var_cardinality = len(var_states)
        parents = list(parents)
        state_counts = self.state_counts(variable, parents, reindex=False)
        sample_size = len(self.data)
        num_parents_states = np.prod([len(self.state_names[var]) for var in parents])

        counts = np.asarray(state_counts)
        log_likelihoods = np.zeros_like(counts, dtype=float)

        # Compute the log-counts
        np.log(counts, out=log_likelihoods, where=counts > 0)

        # Compute the log-conditional sample size
        log_conditionals = np.sum(counts, axis=0, dtype=float)
        np.log(log_conditionals, out=log_conditionals, where=log_conditionals > 0)

        # Compute the log-likelihoods
        log_likelihoods -= log_conditionals
        log_likelihoods *= counts

        score = np.sum(log_likelihoods)
        score -= 0.5 * log(sample_size) * num_parents_states * (var_cardinality - 1)

        return score





[docs]
class AIC(StructureScore):
    """
    AIC (Akaike Information Criterion) structure score for discrete Bayesian networks.

    The AIC score evaluates Bayesian network structures using a log-likelihood term
    with a penalty for model complexity to discourage overfitting. Unlike BIC,
    the penalty term is independent of sample size, making AIC more sensitive to
    goodness of fit in smaller datasets.

    Use this score when you want to select a network structure that balances model
    fit with simplicity, especially in contexts with moderate or small sample sizes.

    Parameters
    ----------
    data : pandas.DataFrame
        DataFrame where each column represents a discrete variable.
        Missing values should be set as `numpy.nan`.
        Note: pandas converts such columns to dtype float.
    state_names : dict, optional
        Dictionary mapping variable names to their discrete states.
        If not specified, unique values observed in the data are used as possible states.

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> from pgmpy.models import DiscreteBayesianNetwork
    >>> from pgmpy.estimators import AIC
    >>> data = pd.DataFrame({"A": [0, 1, 1, 0], "B": [1, 0, 1, 0], "C": [1, 1, 1, 0]})
    >>> model = DiscreteBayesianNetwork([("A", "B"), ("A", "C")])
    >>> aic_score = AIC(data)
    >>> print(aic_score.score(model))
    -140.12

    Raises
    ------
    ValueError
        If the data contains continuous variables, or if the model variables are not present in the data.

    References
    ----------
    [1] Koller & Friedman, Probabilistic Graphical Models - Principles and Techniques, 2009,
        Section 18.3.4–18.3.6 (esp. page 802).
    [2] AM Carvalho, Scoring functions for learning Bayesian networks,
        http://www.lx.it.pt/~asmc/pub/talks/09-TA/ta_pres.pdf
    """

    def __init__(self, data, **kwargs):
        super(AIC, self).__init__(data, **kwargs)


[docs]
    def local_score(self, variable, parents):
        """
        Computes the local AIC score for a variable and its parent variables.

        This method quantifies the fit of a variable to its parent set in the network,
        balancing log-likelihood with a complexity penalty to avoid overfitting.

        Parameters
        ----------
        variable : str
            The name of the variable (node) for which the local score is to be computed.
        parents : list of str
            List of variable names considered as parents of `variable`.

        Returns
        -------
        score : float
            The local AIC score for the specified variable and parent configuration.

        Examples
        --------
        >>> variable = "B"
        >>> parents = ["A"]
        >>> score = aic_score.local_score(variable, parents)
        >>> print(score)
        -17.032

        Raises
        ------
        ValueError
            If `variable` or any parent is not present in `state_names` or data, or if
            the data contains unsupported types (e.g., continuous values).
        """

        var_states = self.state_names[variable]
        var_cardinality = len(var_states)
        parents = list(parents)
        state_counts = self.state_counts(variable, parents, reindex=False)
        num_parents_states = np.prod([len(self.state_names[var]) for var in parents])

        counts = np.asarray(state_counts)
        log_likelihoods = np.zeros_like(counts, dtype=float)

        # Compute the log-counts
        np.log(counts, out=log_likelihoods, where=counts > 0)

        # Compute the log-conditional sample size
        log_conditionals = np.sum(counts, axis=0, dtype=float)
        np.log(log_conditionals, out=log_conditionals, where=log_conditionals > 0)

        # Compute the log-likelihoods
        log_likelihoods -= log_conditionals
        log_likelihoods *= counts

        score = np.sum(log_likelihoods)
        score -= num_parents_states * (var_cardinality - 1)

        return score





[docs]
class LogLikelihoodGauss(StructureScore):
    """
    Log-likelihood structure score for Gaussian Bayesian networks.

    This score evaluates the fit of a continuous (Gaussian) Bayesian network structure
    by computing the (unpenalized) log-likelihood of the observed data given the model,
    using generalized linear modeling. It is suitable for networks with continuous variables.

    Parameters
    ----------
    data : pandas.DataFrame
        DataFrame where each column represents a continuous variable.

    state_names : dict, optional
        Dictionary mapping variable names to possible states. Not typically used for Gaussian networks.

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> from pgmpy.estimators import LogLikelihoodGauss
    >>> data = pd.DataFrame(
    ...     {
    ...         "A": np.random.randn(100),
    ...         "B": np.random.randn(100),
    ...         "C": np.random.randn(100),
    ...     }
    ... )
    >>> score = LogLikelihoodGauss(data)
    >>> ll = score.local_score("B", ["A", "C"])
    >>> print(ll)
    -142.125

    Raises
    ------
    ValueError
        If the data contains discrete or non-numeric variables.
    """

    def __init__(self, data, **kwargs):
        super(LogLikelihoodGauss, self).__init__(data, **kwargs)

    def _log_likelihood(self, variable, parents):
        """
        Computes the log-likelihood and degrees of freedom for a Gaussian model.

        This internal method fits a generalized linear model (GLM) for the specified variable
        as a function of its parent variables, using the statsmodels library, and returns the
        log-likelihood and degrees of freedom of the fitted model.

        Parameters
        ----------
        variable : str
            The name of the variable (node) to be predicted.
        parents : list of str
            List of variable names to be used as predictors (parents). If empty, fits an intercept-only model.

        Returns
        -------
        llf : float
            The log-likelihood of the fitted model.
        df_model : int
            The degrees of freedom of the fitted model (number of model parameters estimated).

        Examples
        --------
        >>> llf, df = score._log_likelihood("B", ["A", "C"])
        >>> print(llf, df)
        -142.125 2

        Raises
        ------
        ValueError
            If the GLM cannot be fitted due to missing or non-numeric data.
        """
        if len(parents) == 0:
            glm_model = smf.glm(formula=f"{variable} ~ 1", data=self.data).fit()
        else:
            glm_model = smf.glm(
                formula=f"{variable} ~ {' + '.join(parents)}", data=self.data
            ).fit()

        return (glm_model.llf, glm_model.df_model)


[docs]
    def local_score(self, variable, parents):
        """
        Computes the log-likelihood score for a variable given its parent variables.

        Fits a generalized linear model (GLM) for the variable as a function of its parents,
        and returns the resulting log-likelihood as the structure score.

        Parameters
        ----------
        variable : str
            The name of the variable (node) for which the local score is to be computed.
        parents : list of str
            List of variable names considered as parents of `variable`.

        Returns
        -------
        score : float
            The log-likelihood score for the specified variable and parent configuration.

        Examples
        --------
        >>> ll = score.local_score("B", ["A", "C"])
        >>> print(ll)
        -142.125

        Raises
        ------
        ValueError
            If the GLM cannot be fitted due to non-numeric data or missing columns.
        """
        ll, df_model = self._log_likelihood(variable=variable, parents=parents)

        return ll





[docs]
class BICGauss(LogLikelihoodGauss):
    """
    BIC (Bayesian Information Criterion) structure score for Gaussian Bayesian networks.

    The BICGauss score evaluates continuous Bayesian network structures by penalizing
    the log-likelihood with a term proportional to the number of model parameters,
    discouraging overfitting. This is the Gaussian version of the BIC/MDL score,
    suitable for networks where all variables are continuous.

    Parameters
    ----------
    data : pandas.DataFrame
        DataFrame where each column represents a continuous variable.

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> from pgmpy.estimators import BICGauss
    >>> data = pd.DataFrame(
    ...     {
    ...         "A": np.random.randn(100),
    ...         "B": np.random.randn(100),
    ...         "C": np.random.randn(100),
    ...     }
    ... )
    >>> score = BICGauss(data)
    >>> s = score.local_score("B", ["A", "C"])
    >>> print(s)
    -111.42

    Raises
    ------
    ValueError
        If the GLM cannot be fitted due to missing or non-numeric data.
    """

    def __init__(self, data, **kwargs):
        super(BICGauss, self).__init__(data, **kwargs)


[docs]
    def local_score(self, variable, parents):
        """
        Computes the local BIC/MDL score for a variable and its parent variables
        in a Gaussian Bayesian network.

        The score is the log-likelihood minus a penalty term that increases
        with the number of model parameters and sample size.

        Parameters
        ----------
        variable : str
            The name of the variable (node) for which the local score is to be computed.
        parents : list of str
            List of variable names considered as parents of `variable`.

        Returns
        -------
        score : float
            The local BICGauss score for the specified variable and parent configuration.

        Examples
        --------
        >>> s = score.local_score("B", ["A", "C"])
        >>> print(s)
        -111.42

        Raises
        ------
        ValueError
            If the GLM cannot be fitted due to missing or non-numeric data.
        """
        ll, df_model = self._log_likelihood(variable=variable, parents=parents)

        # Adding +2 to model df to compute the likelihood df.
        return ll - (((df_model + 2) / 2) * np.log(self.data.shape[0]))





[docs]
class AICGauss(LogLikelihoodGauss):
    """
    AIC (Akaike Information Criterion) structure score for Gaussian Bayesian networks.

    The AICGauss score evaluates continuous Bayesian network structures by penalizing
    the log-likelihood with a term proportional to the number of model parameters.
    The penalty is less severe than BIC and does not depend on sample size, making AIC
    preferable for model selection with smaller datasets.

    Parameters
    ----------
    data : pandas.DataFrame
        DataFrame where each column represents a continuous variable.

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> from pgmpy.estimators import AICGauss
    >>> data = pd.DataFrame(
    ...     {
    ...         "A": np.random.randn(100),
    ...         "B": np.random.randn(100),
    ...         "C": np.random.randn(100),
    ...     }
    ... )
    >>> score = AICGauss(data)
    >>> s = score.local_score("B", ["A", "C"])
    >>> print(s)
    -97.53

    Raises
    ------
    ValueError
        If the GLM cannot be fitted due to missing or non-numeric data.
    """

    def __init__(self, data, **kwargs):
        super(AICGauss, self).__init__(data, **kwargs)


[docs]
    def local_score(self, variable, parents):
        """
        Computes the local AIC score for a variable and its parent variables
        in a Gaussian Bayesian network.

        The score is the log-likelihood minus a penalty term that increases with
        the number of model parameters (but not sample size).

        Parameters
        ----------
        variable : str
            The name of the variable (node) for which the local score is to be computed.
        parents : list of str
            List of variable names considered as parents of `variable`.

        Returns
        -------
        score : float
            The local AICGauss score for the specified variable and parent configuration.

        Examples
        --------
        >>> s = score.local_score("B", ["A", "C"])
        >>> print(s)
        -97.53

        Raises
        ------
        ValueError
            If the GLM cannot be fitted due to missing or non-numeric data.
        """
        ll, df_model = self._log_likelihood(variable=variable, parents=parents)

        # Adding +2 to model df to compute the likelihood df.
        return ll - (df_model + 2)





[docs]
class LogLikelihoodCondGauss(StructureScore):
    """
    Log-likelihood score for Bayesian networks with mixed discrete and continuous variables.

    This score is based on conditional Gaussian distributions and supports networks
    with both discrete and continuous variables, using the methodology described in [1].
    The local score computes the log-likelihood of the observed data given the
    network structure, handling mixed parent sets as described in the reference.

    Parameters
    ----------
    data : pandas.DataFrame
        DataFrame where columns can be discrete or continuous variables.
        Variable types should be consistent with the structure.

    state_names : dict, optional
        Dictionary mapping discrete variable names to their possible states.

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> from pgmpy.estimators import LogLikelihoodCondGauss
    >>> data = pd.DataFrame(
    ...     {
    ...         "A": np.random.randn(100),
    ...         "B": np.random.randint(0, 2, 100),
    ...         "C": np.random.randn(100),
    ...     }
    ... )
    >>> score = LogLikelihoodCondGauss(data)
    >>> ll = score.local_score("A", ["B", "C"])
    >>> print(ll)
    -98.452

    Raises
    ------
    ValueError
        If the data or variable types are not suitable for conditional Gaussian modeling.

    References
    ----------
    [1] Andrews, B., Ramsey, J., & Cooper, G. F. (2018). Scoring Bayesian
        Networks of Mixed Variables. International journal of data science and
        analytics, 6(1), 3–18. https://doi.org/10.1007/s41060-017-0085-7
    """

    def __init__(self, data, **kwargs):
        super(LogLikelihoodCondGauss, self).__init__(data, **kwargs)

    @staticmethod
    def _adjusted_cov(df):
        """
        Computes an adjusted covariance matrix from the given DataFrame.

        This method returns the sample covariance matrix for the columns in `df`, making sure
        the result is always positive semi-definite. If there are not enough rows to estimate
        covariance (i.e., fewer rows than variables), the identity matrix is returned. If the
        covariance matrix is not positive semi-definite, a small value is added to the diagonal.

        Parameters
        ----------
        df : pandas.DataFrame
            DataFrame whose columns are the variables for which the covariance matrix is computed.

        Returns
        -------
        cov_matrix : pandas.DataFrame
            The adjusted covariance matrix. If not enough data, returns the identity matrix.

        Examples
        --------
        >>> import pandas as pd
        >>> import numpy as np
        >>> df = pd.DataFrame(np.random.randn(5, 3), columns=["A", "B", "C"])
        >>> cov = LogLikelihoodCondGauss._adjusted_cov(df)
        >>> print(cov)
                A         B         C
        A  0.802359  0.100722 -0.006956
        B  0.100722  0.818795  0.154614
        C -0.006956  0.154614  0.540758
        """
        # If a number of rows less than number of variables, return variance 1 with no covariance.
        if (df.shape[0] == 1) or (df.shape[0] < len(df.columns)):
            return pd.DataFrame(
                np.eye(len(df.columns)), index=df.columns, columns=df.columns
            )

        # If the matrix is not positive semidefinite, add a small error to make it.
        df_cov = df.cov()
        if np.any(np.isclose(np.linalg.eig(df_cov)[0], 0)):
            df_cov = df_cov + 1e-6
        return df_cov

    def _cat_parents_product(self, parents):
        """
        Computes the product of the number of unique states for each categorical parent.

        For each parent in `parents` that is discrete (not continuous), this method multiplies
        the number of observed unique states. Parents with only one unique value are ignored
        (i.e., do not contribute to the product).

        Parameters
        ----------
        parents : list of str
            List of parent variable names to consider.

        Returns
        -------
        k : int
            The product of unique state counts for each discrete parent in `parents`.

        Examples
        --------
        >>> score._cat_parents_product(["A", "B", "C"])
        6
        """
        k = 1
        for pa in parents:
            if self.dtypes[pa] != "N":
                n_states = self.data[pa].nunique()
                if n_states > 1:
                    k *= self.data[pa].nunique()
        return k

    def _get_num_parameters(self, variable, parents):
        """
        Computes the number of free parameters required for the conditional distribution
        of a variable given its parents in a mixed (discrete and continuous) Bayesian network.

        For a continuous variable, the number of parameters depends on the number of continuous
        parents and the number of configurations of discrete parents. For a discrete variable,
        it depends on the number of categories and parent configurations.

        Parameters
        ----------
        variable : str
            The name of the target variable (child node).
        parents : list of str
            List of parent variable names.

        Returns
        -------
        k : int
            The number of free parameters for the conditional distribution of `variable`
            given its parents.

        Examples
        --------
        >>> score._get_num_parameters("A", ["B", "C"])
        12
        """
        parent_dtypes = [self.dtypes[pa] for pa in parents]
        n_cont_parents = parent_dtypes.count("N")

        if self.dtypes[variable] == "N":
            k = self._cat_parents_product(parents=parents) * (n_cont_parents + 2)
        else:
            if n_cont_parents == 0:
                k = self._cat_parents_product(parents=parents) * (
                    self.data[variable].nunique() - 1
                )
            else:
                k = (
                    self._cat_parents_product(parents=parents)
                    * (self.data[variable].nunique() - 1)
                    * (n_cont_parents + 2)
                )

        return k

    def _log_likelihood(self, variable, parents):
        """
        Computes the conditional log-likelihood for a variable given its parent set,
        supporting both continuous and discrete variables (mixed Bayesian networks).

        For a continuous variable, computes the log-likelihood using conditional Gaussian
        distributions as described in [1]. For a discrete variable, computes the
        log-likelihood based on the joint and marginal probabilities involving both
        discrete and continuous parents.

        Parameters
        ----------
        variable : str
            The variable (node) for which the log-likelihood is computed.
        parents : list of str
            List of parent variable names.

        Returns
        -------
        log_like : float
            The log-likelihood value for the specified variable and parent set.

        Examples
        --------
        >>> ll = score._log_likelihood("A", ["B", "C"])
        >>> print(ll)
        -99.242

        Raises
        ------
        ValueError
            If data is not suitable for log-likelihood computation (e.g., unsupported variable types).

        References
        ----------
        [1] Andrews, B., Ramsey, J., & Cooper, G. F. (2018). Scoring Bayesian
            Networks of Mixed Variables. International journal of data science and
            analytics, 6(1), 3–18. https://doi.org/10.1007/s41060-017-0085-7
        """
        df = self.data.loc[:, [variable] + parents]

        # If variable is continuous, the probability is computed as:
        # P(C1 | C2, D) = p(C1, C2 | D) / p(C2 | D)
        if self.dtypes[variable] == "N":
            c1 = variable
            c2 = [var for var in parents if self.dtypes[var] == "N"]
            d = list(set(parents) - set(c2))

            # If D = {}, p(C1, C2 | D) = p(C1, C2) and p(C2 | D) = p(C2)
            if len(d) == 0:
                # If C2 = {}, p(C1, C2 | D) = p(C1) and p(C2 | D) = 1.
                if len(c2) == 0:
                    p_c1c2_d = multivariate_normal.pdf(
                        x=df,
                        mean=df.mean(axis=0),
                        cov=LogLikelihoodCondGauss._adjusted_cov(df),
                        allow_singular=True,
                    )
                    return np.sum(np.log(p_c1c2_d))
                else:
                    p_c1c2_d = multivariate_normal.pdf(
                        x=df,
                        mean=df.mean(axis=0),
                        cov=LogLikelihoodCondGauss._adjusted_cov(df),
                        allow_singular=True,
                    )
                    df_c2 = df.loc[:, c2]
                    p_c2_d = np.maximum(
                        1e-8,
                        multivariate_normal.pdf(
                            x=df_c2,
                            mean=df_c2.mean(axis=0),
                            cov=LogLikelihoodCondGauss._adjusted_cov(df_c2),
                            allow_singular=True,
                        ),
                    )

                    return np.sum(np.log(p_c1c2_d / p_c2_d))
            else:
                log_like = 0
                for d_states, df_d in df.groupby(d, observed=True):
                    p_c1c2_d = multivariate_normal.pdf(
                        x=df_d.loc[:, [c1] + c2],
                        mean=df_d.loc[:, [c1] + c2].mean(axis=0),
                        cov=LogLikelihoodCondGauss._adjusted_cov(
                            df_d.loc[:, [c1] + c2]
                        ),
                        allow_singular=True,
                    )
                    if len(c2) == 0:
                        p_c2_d = 1
                    else:
                        p_c2_d = np.maximum(
                            1e-8,
                            multivariate_normal.pdf(
                                x=df_d.loc[:, c2],
                                mean=df_d.loc[:, c2].mean(axis=0),
                                cov=LogLikelihoodCondGauss._adjusted_cov(
                                    df_d.loc[:, c2]
                                ),
                                allow_singular=True,
                            ),
                        )

                    log_like += np.sum(np.log(p_c1c2_d / p_c2_d))
                return log_like

        # If variable is discrete, the probability is computed as:
        # P(D1 | C, D2) = (p(C| D1, D2) p(D1, D2)) / (p(C| D2) p(D2))
        else:
            d1 = variable
            c = [var for var in parents if self.dtypes[var] == "N"]
            d2 = list(set(parents) - set(c))

            log_like = 0
            for d_states, df_d1d2 in df.groupby([d1] + d2, observed=True):
                # Check if df_d1d2 also has the discrete variables.
                # If C={}, p(C | D1, D2) = 1.
                if len(c) == 0:
                    p_c_d1d2 = 1
                else:
                    p_c_d1d2 = multivariate_normal.pdf(
                        x=df_d1d2.loc[:, c],
                        mean=df_d1d2.loc[:, c].mean(axis=0),
                        cov=LogLikelihoodCondGauss._adjusted_cov(df_d1d2.loc[:, c]),
                        allow_singular=True,
                    )

                # P(D1, D2)
                p_d1d2 = np.repeat(df_d1d2.shape[0] / df.shape[0], df_d1d2.shape[0])

                # If D2 = {}, p(D1 | C, D2) = (p(C | D1, D2) p(D1, D2)) / p(C)
                if len(d2) == 0:
                    if len(c) == 0:
                        p_c_d2 = 1
                    else:
                        p_c_d2 = np.maximum(
                            1e-8,
                            multivariate_normal.pdf(
                                x=df_d1d2.loc[:, c],
                                mean=df.loc[:, c].mean(axis=0),
                                cov=LogLikelihoodCondGauss._adjusted_cov(df.loc[:, c]),
                                allow_singular=True,
                            ),
                        )

                    log_like += np.sum(np.log(p_c_d1d2 * p_d1d2 / p_c_d2))
                else:
                    if len(c) == 0:
                        p_c_d2 = 1
                    else:
                        df_d2 = df
                        for var, state in zip(d2, d_states[1:]):
                            df_d2 = df_d2.loc[df_d2[var] == state]

                        p_c_d2 = np.maximum(
                            1e-8,
                            multivariate_normal.pdf(
                                x=df_d1d2.loc[:, c],
                                mean=df_d2.loc[:, c].mean(axis=0),
                                cov=LogLikelihoodCondGauss._adjusted_cov(
                                    df_d2.loc[:, c]
                                ),
                                allow_singular=True,
                            ),
                        )

                    p_d2 = df.groupby(d2, observed=True).count() / df.shape[0]
                    for var, value in zip(d2, d_states[1:]):
                        p_d2 = p_d2.loc[p_d2.index.get_level_values(var) == value]

                    log_like += np.sum(
                        np.log((p_c_d1d2 * p_d1d2) / (p_c_d2 * p_d2.values.ravel()[0]))
                    )
            return log_like


[docs]
    def local_score(self, variable, parents):
        """
        Computes the local log-likelihood score for a variable given its parent variables
        in a mixed (discrete and continuous) Bayesian network.

        Parameters
        ----------
        variable : str
            The name of the variable (node) for which the local score is to be computed.
        parents : list of str
            List of variable names considered as parents of `variable`.

        Returns
        -------
        score : float
            The local conditional Gaussian log-likelihood score for the specified variable and parent configuration.

        Examples
        --------
        >>> ll = score.local_score("A", ["B", "C"])
        >>> print(ll)
        -98.452

        Raises
        ------
        ValueError
            If the log-likelihood cannot be computed due to incompatible data or variable types.
        """
        ll = self._log_likelihood(variable=variable, parents=parents)
        return ll





[docs]
class BICCondGauss(LogLikelihoodCondGauss):
    """
    BIC (Bayesian Information Criterion) score for Bayesian networks with mixed (discrete and continuous) variables.

    The BICCondGauss score evaluates network structures by penalizing the conditional log-likelihood
    with a term proportional to the number of free parameters and the logarithm of sample size.
    This approach generalizes the classic BIC to handle mixed discrete/continuous data as
    described in [1].

    Parameters
    ----------
    data : pandas.DataFrame
        DataFrame where columns may be discrete or continuous variables.

    state_names : dict, optional
        Dictionary mapping discrete variable names to possible states.

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> from pgmpy.estimators import BICCondGauss
    >>> data = pd.DataFrame(
    ...     {
    ...         "A": np.random.randn(100),
    ...         "B": np.random.randint(0, 2, 100),
    ...         "C": np.random.randn(100),
    ...     }
    ... )
    >>> score = BICCondGauss(data)
    >>> s = score.local_score("A", ["B", "C"])
    >>> print(s)
    -115.37

    Raises
    ------
    ValueError
        If the log-likelihood or number of parameters cannot be computed for the provided variables.

    References
    ----------
    [1] Andrews, B., Ramsey, J., & Cooper, G. F. (2018). Scoring Bayesian
        Networks of Mixed Variables. International journal of data science and
        analytics, 6(1), 3–18. https://doi.org/10.1007/s41060-017-0085-7
    """

    def __init__(self, data, **kwargs):
        super(BICCondGauss, self).__init__(data, **kwargs)


[docs]
    def local_score(self, variable, parents):
        """
        Computes the local BIC score for a variable and its parent set in a mixed Bayesian network.

        The score is calculated as the log-likelihood minus a complexity penalty, which
        is proportional to the number of free parameters and the log of the sample size.

        Parameters
        ----------
        variable : str
            The name of the variable (node) for which the local score is to be computed.
        parents : list of str
            List of variable names considered as parents of `variable`.

        Returns
        -------
        score : float
            The local BICCondGauss score for the specified variable and parent configuration.

        Examples
        --------
        >>> s = score.local_score("A", ["B", "C"])
        >>> print(s)
        -115.37

        Raises
        ------
        ValueError
            If the log-likelihood or parameter count cannot be computed for the given configuration.
        """

        ll = self._log_likelihood(variable=variable, parents=parents)
        k = self._get_num_parameters(variable=variable, parents=parents)

        return ll - ((k / 2) * np.log(self.data.shape[0]))





[docs]
class AICCondGauss(LogLikelihoodCondGauss):
    """
    AIC (Akaike Information Criterion) score for Bayesian networks with mixed (discrete and continuous) variables.

    The AICCondGauss score evaluates network structures by penalizing the conditional log-likelihood
    with a term equal to the number of free parameters. This generalizes the classic AIC
    to handle Bayesian networks with both discrete and continuous variables [1].

    Parameters
    ----------
    data : pandas.DataFrame
        DataFrame where columns may be discrete or continuous variables.

    state_names : dict, optional
        Dictionary mapping discrete variable names to possible states.

    Examples
    --------
    >>> import pandas as pd
    >>> import numpy as np
    >>> from pgmpy.estimators import AICCondGauss
    >>> data = pd.DataFrame(
    ...     {
    ...         "A": np.random.randn(100),
    ...         "B": np.random.randint(0, 2, 100),
    ...         "C": np.random.randn(100),
    ...     }
    ... )
    >>> score = AICCondGauss(data)
    >>> s = score.local_score("A", ["B", "C"])
    >>> print(s)
    -99.75

    Raises
    ------
    ValueError
        If the log-likelihood or number of parameters cannot be computed for the provided variables.

    References
    ----------
    [1] Andrews, B., Ramsey, J., & Cooper, G. F. (2018). Scoring Bayesian
        Networks of Mixed Variables. International journal of data science and
        analytics, 6(1), 3–18. https://doi.org/10.1007/s41060-017-0085-7
    """

    def __init__(self, data, **kwargs):
        super(AICCondGauss, self).__init__(data, **kwargs)


[docs]
    def local_score(self, variable, parents):
        """
        Computes the local AIC score for a variable and its parent set in a mixed Bayesian network.

        The score is calculated as the log-likelihood minus the number of free parameters.

        Parameters
        ----------
        variable : str
            The name of the variable (node) for which the local score is to be computed.
        parents : list of str
            List of variable names considered as parents of `variable`.

        Returns
        -------
        score : float
            The local AICCondGauss score for the specified variable and parent configuration.

        Examples
        --------
        >>> s = score.local_score("A", ["B", "C"])
        >>> print(s)
        -99.75

        Raises
        ------
        ValueError
            If the log-likelihood or parameter count cannot be computed for the given configuration.
        """
        ll = self._log_likelihood(variable=variable, parents=parents)
        k = self._get_num_parameters(variable=variable, parents=parents)

        return ll - k
Source code for pgmpy.estimators.StructureScore

Navigation

Related Topics