Source code for pgmpy.metrics.implied_cis
import math
from itertools import combinations
import pandas as pd
from tqdm import tqdm
from pgmpy.base import DAG
from pgmpy.ci_tests import get_ci_test
from pgmpy.global_vars import config
from pgmpy.metrics import _BaseUnsupervisedMetric
[docs]
class ImpliedCIs(_BaseUnsupervisedMetric):
"""
Tests the implied Conditional Independences (CI) of the DAG in the given data.
Each missing edge in a model structure implies a CI statement. If the distribution of the data is faithful to the
constraints of the model structure, these CI statements should hold in the data as well. This function runs
statistical tests for each implied CI on the given data.
Parameters
----------
ci_test: str or callable
The CI test to use for statistical testing. Can be a string name of any test
in :mod:`pgmpy.ci_tests` (e.g. ``"chi_square"``, ``"pearsonr"``) or a callable.
show_progress: bool (default: True)
Whether to show the progress of testing.
Returns
-------
pd.DataFrame: Returns a dataframe with each implied CI of the model and a p-value
corresponding to it from the statistical test. A low p-value (e.g. <0.05)
represents that the CI does not hold in the data.
Examples
--------
>>> from pgmpy.example_models import load_model
>>> from pgmpy.metrics import ImpliedCIs
>>> model = load_model("bnlearn/cancer")
>>> df = model.simulate(int(1e3))
>>> implied_cis = ImpliedCIs(ci_test="chi_square", show_progress=False)
>>> implied_cis.evaluate(X=df, causal_graph=model)
u v cond_vars p-value
0 Pollution Smoker [] 0.189851
1 Pollution Xray [Cancer] 0.404149
2 Pollution Dyspnoea [Cancer] 0.613370
3 Smoker Xray [Cancer] 0.352665
4 Smoker Dyspnoea [Cancer] 1.000000
5 Xray Dyspnoea [Cancer] 0.888619
"""
_tags = {
"name": "implied_cis",
"requires_true_graph": False,
"requires_data": True,
"lower_is_better": None,
"supported_graph_types": (DAG,),
"is_default": False,
}
def __init__(self, ci_test=None, show_progress=True):
self.ci_test = ci_test
self.show_progress = show_progress
def _evaluate(self, X, causal_graph):
cis = []
ci_test = get_ci_test(test=self.ci_test, data=X)
if self.show_progress and config.SHOW_PROGRESS:
comb_iter = tqdm(
combinations(causal_graph.nodes(), 2),
total=math.comb(len(causal_graph.nodes()), 2),
)
else:
comb_iter = combinations(causal_graph.nodes(), 2)
for u, v in comb_iter:
if not ((u in causal_graph[v]) or (v in causal_graph[u])):
Z = list(causal_graph.minimal_dseparator(u, v))
ci_test(X=u, Y=v, Z=Z)
cis.append([u, v, Z, ci_test.p_value_])
cis = pd.DataFrame(cis, columns=["u", "v", "cond_vars", "p-value"])
return cis