Source code for pgmpy.metrics.shd
import networkx as nx
import numpy as np
from pgmpy.base import DAG
from pgmpy.metrics import _BaseSupervisedMetric
[docs]
class SHD(_BaseSupervisedMetric):
"""
Computes the Structural Hamming Distance between `true_causal_graph` and `est_causal_graph`.
SHD is defined as total number of basic operations: adding edges, removing edges, and reversing edges required to
transform one graph to the other. It is a symmetrical measure.
The code first accounts for edges that need to be deleted (from true_model), added (to true_model) and finally edges
that need to be reversed. All operations count as 1.
Examples
--------
>>> from pgmpy.metrics import SHD
>>> from pgmpy.base import DAG
>>> dag1 = DAG([(1, 2), (2, 3)])
>>> dag2 = DAG([(2, 1), (2, 3)])
>>> shd = SHD()
>>> shd(true_causal_graph=dag1, est_causal_graph=dag2)
1
"""
_tags = {
"name": "SHD",
"requires_true_graph": True,
"requires_data": False,
"lower_is_better": True,
"is_symmetric": True,
"supported_graph_types": (DAG,),
"is_default": True,
}
def _evaluate(self, true_causal_graph, est_causal_graph):
nodes_list = true_causal_graph.nodes()
dag_true = nx.DiGraph(true_causal_graph.edges())
dag_true.add_nodes_from(list(nx.isolates(true_causal_graph)))
m1 = nx.adjacency_matrix(dag_true, nodelist=nodes_list).todense()
dag_est = nx.DiGraph(est_causal_graph.edges())
dag_est.add_nodes_from(list(nx.isolates(est_causal_graph)))
m2 = nx.adjacency_matrix(dag_est, nodelist=nodes_list).todense()
shd = 0
s1 = m1 + m1.T
s2 = m2 + m2.T
# Edges that are in m1 but not in m2 (deletions from m1)
ds = s1 - s2
ind = np.where(ds > 0)
m1[ind] = 0
shd = shd + (len(ind[0]) / 2)
# Edges that are in m2 but not in m1 (additions to m1)
ind = np.where(ds < 0)
m1[ind] = m2[ind]
shd = shd + (len(ind[0]) / 2)
# Edges that need to be simply reversed
d = np.abs(m1 - m2)
shd = shd + (np.sum((d + d.T) > 0) / 2)
return int(shd)