Source code for pgmpy.metrics.implied_cis

import math
from itertools import combinations

import pandas as pd
from tqdm import tqdm

from pgmpy.base import DAG
from pgmpy.ci_tests import get_ci_test
from pgmpy.global_vars import config
from pgmpy.metrics import _BaseUnsupervisedMetric


[docs] class ImpliedCIs(_BaseUnsupervisedMetric): """ Tests the implied Conditional Independences (CI) of the DAG in the given data. Each missing edge in a model structure implies a CI statement. If the distribution of the data is faithful to the constraints of the model structure, these CI statements should hold in the data as well. This function runs statistical tests for each implied CI on the given data. Parameters ---------- ci_test: str or callable The CI test to use for statistical testing. Can be a string name of any test in :mod:`pgmpy.ci_tests` (e.g. ``"chi_square"``, ``"pearsonr"``) or a callable. show_progress: bool (default: True) Whether to show the progress of testing. Returns ------- pd.DataFrame: Returns a dataframe with each implied CI of the model and a p-value corresponding to it from the statistical test. A low p-value (e.g. <0.05) represents that the CI does not hold in the data. Examples -------- >>> from pgmpy.example_models import load_model >>> from pgmpy.metrics import ImpliedCIs >>> model = load_model("bnlearn/cancer") >>> df = model.simulate(int(1e3)) >>> implied_cis = ImpliedCIs(ci_test="chi_square", show_progress=False) >>> implied_cis.evaluate(X=df, causal_graph=model) u v cond_vars p-value 0 Pollution Smoker [] 0.189851 1 Pollution Xray [Cancer] 0.404149 2 Pollution Dyspnoea [Cancer] 0.613370 3 Smoker Xray [Cancer] 0.352665 4 Smoker Dyspnoea [Cancer] 1.000000 5 Xray Dyspnoea [Cancer] 0.888619 """ _tags = { "name": "implied_cis", "requires_true_graph": False, "requires_data": True, "lower_is_better": None, "supported_graph_types": (DAG,), "is_default": False, } def __init__(self, ci_test=None, show_progress=True): self.ci_test = ci_test self.show_progress = show_progress def _evaluate(self, X, causal_graph): cis = [] ci_test = get_ci_test(test=self.ci_test, data=X) if self.show_progress and config.SHOW_PROGRESS: comb_iter = tqdm( combinations(causal_graph.nodes(), 2), total=math.comb(len(causal_graph.nodes()), 2), ) else: comb_iter = combinations(causal_graph.nodes(), 2) for u, v in comb_iter: if not ((u in causal_graph[v]) or (v in causal_graph[u])): Z = list(causal_graph.minimal_dseparator(u, v)) ci_test(X=u, Y=v, Z=Z) cis.append([u, v, Z, ci_test.p_value_]) cis = pd.DataFrame(cis, columns=["u", "v", "cond_vars", "p-value"]) return cis