Basic Operations on Bayesian Networks#

This notebook shows examples of some basic operations that can be performed on a Bayesian Network. We use the Protein Signalling network from the bnlearn repository as the example model: https://www.bnlearn.com/bnrepository/discrete-medium.html#sachs

The DiscreteBayesianNetwork class in pgmpy inherits the networkx.DiGraph class, hence all the methods defined for networkx.DiGraph should also work for DiscreteBayesianNetwork.

[1]:
import pprint
import shutil
import importlib
from IPython.display import Image, display
import networkx as nx
from pgmpy.factors.discrete import TabularCPD


# Load the sachs model.
# For other ways to define a model, please refer: https://pgmpy.org/examples/Creating%20a%20Discrete%20Bayesian%20Network.htmll
from pgmpy.example_models import load_model
sachs_model = load_model('bnlearn/sachs')

# Visualize the model (optional dependency)
if (importlib.util.find_spec("pygraphviz") is not None
    and shutil.which("dot") is not None):
    viz = sachs_model.to_graphviz()
    viz.draw('sachs.png', prog='dot')
    display(Image('sachs.png'))
else:
    print("Note: Graph visualization requires pygraphviz (optional dependency).")
../_images/examples_Basic_Operations_on_BN_2_0.png

Attributes of the Model Structure#

[2]:
# Get all the nodes/random variables in the model
all_nodes = sachs_model.nodes()
print(f"Nodes: {all_nodes} \n")

# Get all the edges in the model.
all_edges = sachs_model.edges()
print(f"Edges: {all_edges} \n")

# Get all the CPDs.
all_cpds = sachs_model.get_cpds()

# Get parents of a specific node
akt_parents = sachs_model.get_parents('Akt')
print(f"Parents of Akt: {akt_parents} \n")

# Get children of a specific node
pka_children = sachs_model.get_children('PKA')
print(f"Children of PKA: {pka_children} \n")

# Get all the leaf nodes of the model
leaves = sachs_model.get_leaves()
print(f"Leaf nodes in the model: {leaves} \n")

# Get the root nodes of the model
roots = sachs_model.get_roots()
print(f"Root nodes in the model: {roots} \n")
Nodes: ['Akt', 'Erk', 'Jnk', 'Mek', 'P38', 'PIP2', 'PIP3', 'PKA', 'PKC', 'Plcg', 'Raf']

Edges: [('Erk', 'Akt'), ('Mek', 'Erk'), ('PIP3', 'PIP2'), ('PKA', 'Akt'), ('PKA', 'Erk'), ('PKA', 'Jnk'), ('PKA', 'Mek'), ('PKA', 'P38'), ('PKA', 'Raf'), ('PKC', 'Jnk'), ('PKC', 'Mek'), ('PKC', 'P38'), ('PKC', 'PKA'), ('PKC', 'Raf'), ('Plcg', 'PIP2'), ('Plcg', 'PIP3'), ('Raf', 'Mek')]

Parents of Akt: ['Erk', 'PKA']

Children of PKA: ['Akt', 'Erk', 'Jnk', 'Mek', 'P38', 'Raf']

Leaf nodes in the model: ['Akt', 'Jnk', 'P38', 'PIP2']

Root nodes in the model: ['PKC', 'Plcg']

Modifying the Model Structure#

[3]:
# Adding nodes to the model.
sachs_model.add_node('new_node')
sachs_model.add_nodes_from(['new_node1', 'new_node2'])

# Adding edges to the model.
sachs_model.add_edge('Akt', 'new_node')
sachs_model.add_edges_from([('Akt', 'new_node1'), ('Akt', 'new_node2')])

# Removing edges from the model.
sachs_model.remove_edge('Akt', 'new_node')
sachs_model.remove_edges_from([('Akt', 'new_node1'), ('Akt', 'new_node2')])

# Removing nodes from the model
sachs_model.remove_node('new_node')
sachs_model.remove_nodes_from(['new_node1', 'new_node2'])
[4]:
# At any point, check_model can be called to check if the specified model is correct.
sachs_model.check_model()
[4]:
True

Modifying associated parameterization#

[5]:
# Getting an associated CPD
sachs_model.get_cpds('Akt')

# Adding new CPDs to the model
sachs_model.add_node('new_node')
new_cpd = TabularCPD('new_node', 2, [[0.2], [0.8]])
sachs_model.add_cpds(new_cpd)

# Removing the CPD and the node
sachs_model.remove_cpds('new_node')
sachs_model.remove_node('new_node')

sachs_model.check_model()
[5]:
True

D-Separation#

[6]:
# Check if two variables in the network are conditionally / unconditionally d-connected.
print(sachs_model.is_dconnected('PKC', 'Akt'))
print(sachs_model.is_dconnected('PKC', 'Akt', observed=['Mek']))
print(sachs_model.is_dconnected('PKC', 'Akt', observed=['Mek', 'PKA']))
True
True
False
[7]:
# List all the variables that are d-connected to a given variable.
print(sachs_model.active_trail_nodes('PKA'))
print(sachs_model.active_trail_nodes(['PKA', 'Raf']))

print()

# List all d-connected variables when conditioned on some other variables
print(sachs_model.active_trail_nodes('PKA', observed=['Mek', 'PKC']))
print(sachs_model.active_trail_nodes(['PKA', 'Raf'], observed=['Mek', 'PKC']))
{'PKA': {'P38', 'PKA', 'Akt', 'Erk', 'Mek', 'PKC', 'Jnk', 'Raf'}}
{'PKA': {'P38', 'PKA', 'Akt', 'Erk', 'Mek', 'PKC', 'Jnk', 'Raf'}, 'Raf': {'P38', 'PKA', 'Akt', 'Erk', 'Mek', 'PKC', 'Jnk', 'Raf'}}

{'PKA': {'Akt', 'Erk', 'Jnk', 'P38', 'PKA', 'Raf'}}
{'PKA': {'Akt', 'Erk', 'Jnk', 'P38', 'PKA', 'Raf'}, 'Raf': {'Akt', 'Erk', 'Jnk', 'P38', 'PKA', 'Raf'}}
[8]:
# Find the minimal d-separator of any two variables
print(sachs_model.minimal_dseparator('PKC', 'Akt'))
{'Erk', 'PKA'}

Other Methods#

[9]:
# Get the Markov blanket of a variable
sachs_model.get_markov_blanket('Raf')
[9]:
['PKC', 'PKA', 'Mek']
[10]:
# List all local indpeendencies of a node
sachs_model.local_independencies('Raf')
[10]:
(Raf ⟂ PIP3, Jnk, Plcg, P38, PIP2 | PKC, PKA)
[11]:
# List all implied independencies in the network
sachs_model.get_independencies().independencies[:10]
[11]:
[(Plcg ⟂ Raf),
 (PIP2 ⟂ Erk),
 (PIP3 ⟂ Mek),
 (Akt ⟂ Raf | Erk, PKA),
 (PIP2 ⟂ Jnk),
 (Akt ⟂ Jnk | PKC, PKA),
 (Erk ⟂ Jnk | PKA, Mek),
 (Erk ⟂ Raf | PKA, Mek),
 (PIP3 ⟂ P38),
 (PIP2 ⟂ PKA)]