Source code for pgmpy.structure_score._base

from __future__ import annotations

from functools import lru_cache

import pandas as pd
from skbase.base import BaseObject
from skbase.lookup import all_objects

from pgmpy.utils import build_state_names, get_dataset_type, get_state_counts, preprocess_data


[docs] class BaseStructureScore(BaseObject): """ Abstract base class for structure scoring in pgmpy. Structure scores evaluate how well a candidate Bayesian network structure fits observed data. This class implements the shared scoring workflow, caching for local scores, and utilities for computing conditional state counts. Use one of the concrete score classes such as `K2`, `BDeu`, `BIC`, or `AIC` instead of instantiating this class directly. Parameters ---------- data : pandas.DataFrame DataFrame in which each column represents a variable. Missing values should be marked as `numpy.nan`. state_names : dict, optional Dictionary mapping each variable name to its allowed states. If not specified, the observed values in the data are used. """ _tags = { "name": None, "supported_datatype": None, "default_for": None, "is_parameteric": False, } def __init__(self, data, state_names=None): self.data, self.dtypes = preprocess_data(data) if self.data is not None: self.variables = list(self.data.columns.values) self.state_names = build_state_names(self.data, state_names=state_names) self._cached_local_score = lru_cache(maxsize=10000)(self._local_score)
[docs] def local_score(self, variable: str, parents: tuple[str, ...]) -> float: """Compute the cached local score for `variable` given `parents`.""" return self._cached_local_score(variable, parents)
def _local_score(self, variable: str, parents: tuple[str, ...]) -> float: """Compute the uncached local score for `variable` given `parents`.""" raise NotImplementedError
[docs] def score(self, model) -> float: """Compute a structure score for a model.""" score = 0 for node in model.nodes(): score += self.local_score(node, tuple(model.predecessors(node))) score += self.structure_prior(model) return score
[docs] def structure_prior(self, model) -> float: """Return the log prior over structures.""" return 0
[docs] def structure_prior_ratio(self, operation) -> float: """Return the log prior ratio for a structure operation.""" return 0
[docs] def state_counts( self, variable: str, parents: tuple[str, ...] = (), weighted: bool = False, reindex: bool = True, ) -> pd.DataFrame: """Return state counts for `variable`, optionally conditioned on `parents`.""" return get_state_counts( data=self.data, state_names=self.state_names, variable=variable, parents=parents, weighted=weighted, reindex=reindex, )
[docs] def get_scoring_method( scoring_method: str | BaseStructureScore | None, data: pd.DataFrame, ) -> BaseStructureScore: if isinstance(scoring_method, BaseStructureScore): return scoring_method if scoring_method is None: if data is None: raise ValueError("Cannot determine scoring method: both `scoring_method` and `data` are None.") var_type = get_dataset_type(data) filter_tags = {"default_for": var_type} elif isinstance(scoring_method, str): filter_tags = {"name": scoring_method.lower()} else: raise ValueError(f"Invalid `scoring_method` argument: {scoring_method!r}") scores = all_objects( object_types=BaseStructureScore, package_name="pgmpy.structure_score", return_names=False, filter_tags=filter_tags, ) if scores: cls = scores[0] if data is None: raise ValueError(f"Scoring method '{cls.__name__}' requires data, but data is None.") return cls(data=data) else: raise ValueError(f"Unknown scoring method: {scoring_method!r}")