import networkx as nx
import numpy as np
import pandas as pd
from pgmpy.base import DAG, PDAG
from pgmpy.metrics import _BaseSupervisedMetric
[docs]
class AdjacencyConfusionMatrix(_BaseSupervisedMetric):
"""
Computes confusion matrix based metrics for comparing causal graph skeletons.
Treats edge presence/absence in the undirected skeleton as a binary classification
problem and computes confusion matrix based metrics.
Parameters
----------
metrics : List[str], optional
List of metrics to compute. If None, computes all available metrics.
cm : Confusion matrix for skeleton edge presence.
precision : Fraction of estimated skeleton edges that are correct (TP / (TP + FP)).
recall : Fraction of true skeleton edges that are recovered (TP / (TP + FN)).
f1 : Harmonic mean of precision and recall.
npv : Fraction of absent estimated edges that are truly absent (TN / (TN + FN)).
specificity : Fraction of truly absent edges correctly predicted absent (TN / (TN + FP)).
Returns
-------
Dict[str, float]
Dictionary containing computed metrics.
Examples
--------
>>> from pgmpy.metrics import AdjacencyConfusionMatrix
>>> from pgmpy.base import DAG
>>> true_dag = DAG(
... [
... ("Smoking", "Lung_Cancer"),
... ("Smoking", "Heart_Disease"),
... ("Age", "Heart_Disease"),
... ("Age", "Lung_Cancer"),
... ]
... )
>>> est_dag = DAG([("Smoking", "Lung_Cancer"), ("Age", "Heart_Disease")])
>>> cm = AdjacencyConfusionMatrix()
>>> result = cm.evaluate(true_dag, est_dag)
>>> result["precision"]
1.0
>>> result["recall"]
0.5
>>> result["cm"] # doctest: +NORMALIZE_WHITESPACE
Estimated Est Present Est Absent
Actual
Actual Present 2 2
Actual Absent 0 2
Compute only selected metrics:
>>> cm = AdjacencyConfusionMatrix(metrics=["precision", "recall", "f1"])
>>> result = cm.evaluate(true_dag, est_dag)
>>> "f1" in result
True
>>> "npv" in result
False
References
----------
.. [1] Petersen, A. H. (2025). Are you doing better than random guessing? a call for using negative controls
when evaluating causal discovery algorithms. Proceedings of the Forty-First Conference on Uncertainty
in Artificial Intelligence. Rio de Janeiro, Brazil: JMLR.org. https://arxiv.org/abs/2412.10039
"""
_tags = {
"name": "adjacency_confusion_matrix",
"requires_true_graph": True,
"requires_data": False,
"lower_is_better": False,
"is_symmetric": False,
"supported_graph_types": (DAG, PDAG),
}
def __init__(self, metrics: list[str] | None = None):
self.metrics = metrics or [
"cm",
"precision",
"recall",
"f1",
"npv",
"specificity",
]
super().__init__()
def _evaluate(self, true_causal_graph, est_causal_graph):
"""Evaluate adjacency confusion matrix metrics."""
# Step 1: Get adjacency matrices for both graphs
nodes_list = sorted(true_causal_graph.nodes())
true_adj = nx.adjacency_matrix(true_causal_graph, nodelist=nodes_list, weight=None).todense()
est_adj = nx.adjacency_matrix(est_causal_graph, nodelist=nodes_list, weight=None).todense()
true_skel = (true_adj + true_adj.T) > 0
est_skel = (est_adj + est_adj.T) > 0
mask = np.triu(np.ones_like(true_skel, dtype=bool), k=1)
true_edges = np.asarray(true_skel[mask]).flatten()
est_edges = np.asarray(est_skel[mask]).flatten()
# Step 2: Compute confusion matrix components
tp = int(np.sum(true_edges & est_edges))
fp = int(np.sum(~true_edges & est_edges))
fn = int(np.sum(true_edges & ~est_edges))
tn = int(np.sum(~true_edges & ~est_edges))
# Step 3: Compute specified metrics
results = {}
if "cm" in self.metrics:
results["cm"] = pd.DataFrame(
[[tp, fn], [fp, tn]],
index=pd.Index(["Actual Present", "Actual Absent"], name="Actual"),
columns=pd.Index(["Est Present", "Est Absent"], name="Estimated"),
)
if "precision" in self.metrics:
results["precision"] = tp / (tp + fp) if (tp + fp) > 0 else 0.0
if "recall" in self.metrics:
results["recall"] = tp / (tp + fn) if (tp + fn) > 0 else 0.0
if "f1" in self.metrics:
prec = tp / (tp + fp) if (tp + fp) > 0 else 0.0
rec = tp / (tp + fn) if (tp + fn) > 0 else 0.0
results["f1"] = 2 * prec * rec / (prec + rec) if (prec + rec) > 0 else 0.0
if "npv" in self.metrics:
results["npv"] = tn / (tn + fn) if (tn + fn) > 0 else 0.0
if "specificity" in self.metrics:
results["specificity"] = tn / (tn + fp) if (tn + fp) > 0 else 0.0
return results