<no title>

[1]:

# Imports

from pgmpy.models import DynamicBayesianNetwork as DBN
from pgmpy.factors.discrete import TabularCPD

[2]:

# Initialize a simple DBN model modeling the Weather (W), Rain (O), Temperature (T), and Humidity (H).

dbn = DBN()

# pgmpy requires the user to define the structure of the first time slice and the edges connecting the first time slice to the second time slice.
# pgmpy assumes that this structure remains constant for further time slices, i.e., it is a 2-TBN.

# Add intra-slice edges for both time slices
dbn.add_edges_from([
    (('W', 0), ('O', 0)),  # Weather influences ground observation
    (('T', 0), ('H', 0)),  # Temperature influences humidity
    (('W', 0), ('H', 0))   # Weather influences humidity
])

# Add inter-slice edges
dbn.add_edges_from([
    (('W', 0), ('W', 1)),  # Weather transition
    (('T', 0), ('T', 1)),  # Temperature transition
    (('W', 0), ('T', 1))   # Weather influences future temperature
])

# Define the parameters of the model. Again pgmpy assumes that these CPDs remain the same for future time slices.

# Define CPDs
# CPD for W (Weather transition)
cpd_w_0 = TabularCPD(
    variable=('W', 0),
    variable_card=3,  # Sunny, Cloudy, Rainy
    values=[[0.6], [0.3], [0.1]],  # Initial probabilities
)

cpd_w_1 = TabularCPD(
    variable=('W', 1),
    variable_card=3,
    evidence=[('W', 0)],
    evidence_card=[3],
    values=[
        [0.7, 0.3, 0.2],  # P(Sunny | W_0)
        [0.2, 0.4, 0.3],  # P(Cloudy | W_0)
        [0.1, 0.3, 0.5]   # P(Rainy | W_0)
    ],
)

# CPD for T (Temperature transition)
cpd_t_0 = TabularCPD(
    variable=('T', 0),
    variable_card=3,  # Hot, Mild, Cold
    values=[[0.5], [0.4], [0.1]]  # Initial probabilities
)

cpd_t_1 = TabularCPD(
    variable=('T', 1),
    variable_card=3,
    evidence=[('T', 0), ('W', 0)],
    evidence_card=[3, 3],
    values=[
        [0.8, 0.6, 0.1, 0.7, 0.4, 0.2, 0.6, 0.3, 0.1],  # P(Hot | T_0, W_0)
        [0.2, 0.3, 0.7, 0.2, 0.5, 0.3, 0.3, 0.4, 0.3],  # P(Mild | T_0, W_0)
        [0.0, 0.1, 0.2, 0.1, 0.1, 0.5, 0.1, 0.3, 0.6]   # P(Cold | T_0, W_0)
    ]
)

# CPD for O (Ground observation)
cpd_o = TabularCPD(
    variable=('O', 0),
    variable_card=2,  # Dry, Wet
    evidence=[('W', 0)],
    evidence_card=[3],
    values=[
        [0.9, 0.6, 0.2],  # P(Dry | Sunny, Cloudy, Rainy)
        [0.1, 0.4, 0.8]   # P(Wet | Sunny, Cloudy, Rainy)
    ]
)

# CPD for H (Humidity observation)
cpd_h = TabularCPD(
    variable=('H', 0),
    variable_card=3,  # Low, Medium, High
    evidence=[('T', 0), ('W', 0)],
    evidence_card=[3, 3],
    values=[
        [0.7, 0.4, 0.1, 0.5, 0.3, 0.2, 0.3, 0.2, 0.1],  # P(Low | T_0, W_0)
        [0.2, 0.5, 0.3, 0.4, 0.5, 0.3, 0.4, 0.3, 0.2],  # P(Medium | T_0, W_0)
        [0.1, 0.1, 0.6, 0.1, 0.2, 0.5, 0.3, 0.5, 0.7]   # P(High | T_0, W_0)
    ]
)

# Add CPDs to the DBN
dbn.add_cpds(cpd_w_0, cpd_w_1, cpd_t_0, cpd_t_1, cpd_o, cpd_h)

# After defining the model, call the initialization method that generates the required data structures for further computation.
dbn.initialize_initial_state()

# Simulate some data from the defined model.
samples = dbn.simulate(n_samples=1000, n_time_slices=10)
samples.head()

[2]:

	(W, 0)	(O, 0)	(H, 0)	(T, 0)	(W, 1)	(O, 1)	(H, 1)	(T, 1)	(W, 2)	(O, 2)	...	(H, 7)	(T, 7)	(W, 8)	(O, 8)	(H, 8)	(W, 9)	(T, 9)	(O, 9)	(H, 9)	(T, 8)
0	1	0	1	0	0	0	0	0	1	0	...	1	0	1	1	1	1	0	0	1	0
1	1	1	1	1	0	0	0	2	2	1	...	1	1	1	1	0	1	1	0	1	0
2	2	1	2	1	2	1	2	1	2	0	...	0	0	1	0	1	2	0	0	2	0
3	0	0	0	0	0	0	0	0	0	0	...	2	2	1	0	1	2	1	1	2	1
4	1	1	0	0	1	1	1	0	1	0	...	1	2	1	0	0	1	0	1	0	1

5 rows × 40 columns

[3]:

# Fitting model parameters to a defined network structure.

# Define the network structure for which to learn the model parameters. Here, we have assumeed the same model
# structure that we simulated the data from
dbn = DBN()
dbn.add_edges_from([
    (('W', 0), ('O', 0)),  # Weather influences ground observation
    (('T', 0), ('H', 0)),  # Temperature influences humidity
    (('W', 0), ('H', 0))   # Weather influences humidity
])
dbn.add_edges_from([
    (('W', 0), ('W', 1)),  # Weather transition
    (('T', 0), ('T', 1)),  # Temperature transition
    (('W', 0), ('T', 1))   # Weather influences future temperature
])

# Fit the model using simulated samples
dbn.fit(samples)
dbn.get_cpds()

[3]:

[<TabularCPD representing P((H, 0):3 | (T, 0):3, (W, 0):3) at 0x726d1a2b0cb0>,
 <TabularCPD representing P((T, 0):3) at 0x726d1a2b08c0>,
 <TabularCPD representing P((W, 0):3) at 0x726d1a2b0dd0>,
 <TabularCPD representing P((O, 0):2 | (W, 0):3) at 0x726d1a2b0d40>,
 <TabularCPD representing P((H, 1):3 | (T, 1):3, (W, 1):3) at 0x726d1a2b0830>,
 <TabularCPD representing P((T, 1):3 | (T, 0):3, (W, 0):3) at 0x726d1a2b03b0>,
 <TabularCPD representing P((W, 1):3 | (W, 0):3) at 0x726d1a2b0320>,
 <TabularCPD representing P((O, 1):2 | (W, 1):3) at 0x726d1a2b07a0>]

[4]:

print(dbn.get_cpds(('W', 1)))

+-----------+---------------------+---------------------+---------------------+
| (W, 0)    | (W, 0)(0)           | (W, 0)(1)           | (W, 0)(2)           |
+-----------+---------------------+---------------------+---------------------+
| (W, 1)(0) | 0.7004921418172009  | 0.31049438383598194 | 0.20921204376971736 |
+-----------+---------------------+---------------------+---------------------+
| (W, 1)(1) | 0.19523741956078264 | 0.4021998199276747  | 0.2642262957939027  |
+-----------+---------------------+---------------------+---------------------+
| (W, 1)(2) | 0.10427043862201643 | 0.2873057962363434  | 0.5265616604363799  |
+-----------+---------------------+---------------------+---------------------+

[5]:

# Learning the model structure from data.

# pgmpy doesn't implement any specific methods for DBN structure learning. This is a hackish method to utilize the
# existing BN learning algorithms to estimate the structure of the DBN. Essentially, we remove the time-information from the
# given data and try to learn the 2-DBN network that remains constant across time-slices.

# First convert the given dataset into long form removing the time information such that it is suitable to learn the 2-DBN network.

import pandas as pd

# Use sorted() to ensure consistent node ordering across all iterations
nodes = sorted(dbn._nodes())
colnames = [node + '0' for node in nodes] + [node + '1' for node in nodes]
df_long = pd.DataFrame(columns=colnames)

for t in range(9):
    cols = [(node, t) for node in nodes] + [(node, t+1) for node in nodes]
    samples_t = samples.loc[:, cols]
    samples_t.columns = colnames
    df_long = pd.concat([df_long, samples_t], ignore_index=True)

df_long = df_long.reset_index(drop=True)

df_long.head()

[5]:

	H0	O0	T0	W0	H1	O1	T1	W1
0	1	0	0	1	0	0	0	0
1	1	1	1	1	0	0	2	0
2	2	1	1	2	2	1	1	2
3	0	0	0	0	0	0	0	0
4	0	1	0	1	1	1	0	1

[6]:

# Use this long data frame to learn the first two time frames of the DBN. Because we are using structure learning algorithms we\n",
# need to add constraints such that the algorithm doesn't learn edges from time slice 1 to 0.\n",
from pgmpy.causal_discovery import HillClimbSearch
from pgmpy.estimators import ExpertKnowledge

# Constraints to learn edges in only time forward direction.
expert = ExpertKnowledge(forbidden_edges=
                         [('W1', 'W0'), ('W1', 'O0'), ('W1', 'T0'), ('W1', 'H0'),
                          ('O1', 'W0'), ('O1', 'O0'), ('O1', 'T0'), ('O1', 'H0'),
                          ('T1', 'W0'), ('T1', 'O0'), ('T1', 'T0'), ('T1', 'H0'),
                          ('H1', 'W0'), ('H1', 'O0'), ('H1', 'T0'), ('H1', 'H0'),])
est = HillClimbSearch(expert_knowledge=expert)
est.fit(df_long)
est.causal_graph_.edges() # Use this learned DAG to define a DBN."

[6]:

OutEdgeView([('W0', 'T0'), ('W0', 'H0'), ('W0', 'T1'), ('W0', 'O0'), ('W0', 'W1'), ('T0', 'T1'), ('T0', 'H0'), ('T0', 'W0'), ('H0', 'W0'), ('H0', 'T0'), ('T1', 'H1'), ('T1', 'T0'), ('T1', 'W0'), ('W1', 'H1'), ('W1', 'O1'), ('W1', 'W0'), ('O1', 'W1'), ('O0', 'W0')])

	(W, 0)	(O, 0)	(H, 0)	(T, 0)	(W, 1)	(O, 1)	(H, 1)	(T, 1)	(W, 2)	(O, 2)	...	(H, 7)	(T, 7)	(W, 8)	(O, 8)	(H, 8)	(W, 9)	(T, 9)	(O, 9)	(H, 9)	(T, 8)
0	1	0	1	0	0	0	0	0	1	0	...	1	0	1	1	1	1	0	0	1	0
1	1	1	1	1	0	0	0	2	2	1	...	1	1	1	1	0	1	1	0	1	0
2	2	1	2	1	2	1	2	1	2	0	...	0	0	1	0	1	2	0	0	2	0
3	0	0	0	0	0	0	0	0	0	0	...	2	2	1	0	1	2	1	1	2	1
4	1	1	0	0	1	1	1	0	1	0	...	1	2	1	0	0	1	0	1	0	1

	(W, 0)	(O, 0)	(H, 0)	(T, 0)	(W, 1)	(O, 1)	(H, 1)	(T, 1)	(W, 2)	(O, 2)	...	(H, 7)	(T, 7)	(W, 8)	(O, 8)	(H, 8)	(W, 9)	(T, 9)	(O, 9)	(H, 9)	(T, 8)
0	1	0	1	0	0	0	0	0	1	0	...	1	0	1	1	1	1	0	0	1	0
1	1	1	1	1	0	0	0	2	2	1	...	1	1	1	1	0	1	1	0	1	0
2	2	1	2	1	2	1	2	1	2	0	...	0	0	1	0	1	2	0	0	2	0
3	0	0	0	0	0	0	0	0	0	0	...	2	2	1	0	1	2	1	1	2	1
4	1	1	0	0	1	1	1	0	1	0	...	1	2	1	0	0	1	0	1	0	1

	(W, 0)	(O, 0)	(H, 0)	(T, 0)	(W, 1)	(O, 1)	(H, 1)	(T, 1)	(W, 2)	(O, 2)	...	(H, 7)	(T, 7)	(W, 8)	(O, 8)	(H, 8)	(W, 9)	(T, 9)	(O, 9)	(H, 9)	(T, 8)
0	1	0	1	0	0	0	0	0	1	0	...	1	0	1	1	1	1	0	0	1	0
1	1	1	1	1	0	0	0	2	2	1	...	1	1	1	1	0	1	1	0	1	0
2	2	1	2	1	2	1	2	1	2	0	...	0	0	1	0	1	2	0	0	2	0
3	0	0	0	0	0	0	0	0	0	0	...	2	2	1	0	1	2	1	1	2	1
4	1	1	0	0	1	1	1	0	1	0	...	1	2	1	0	0	1	0	1	0	1