[1]:
# Imports

from pgmpy.models import DynamicBayesianNetwork as DBN
from pgmpy.factors.discrete import TabularCPD
[2]:
# Initialize a simple DBN model modeling the Weather (W), Rain (O), Temperature (T), and Humidity (H).

dbn = DBN()

# pgmpy requires the user to define the structure of the first time slice and the edges connecting the first time slice to the second time slice.
# pgmpy assumes that this structure remains constant for further time slices, i.e., it is a 2-TBN.

# Add intra-slice edges for both time slices
dbn.add_edges_from([
    (('W', 0), ('O', 0)),  # Weather influences ground observation
    (('T', 0), ('H', 0)),  # Temperature influences humidity
    (('W', 0), ('H', 0))   # Weather influences humidity
])

# Add inter-slice edges
dbn.add_edges_from([
    (('W', 0), ('W', 1)),  # Weather transition
    (('T', 0), ('T', 1)),  # Temperature transition
    (('W', 0), ('T', 1))   # Weather influences future temperature
])

# Define the parameters of the model. Again pgmpy assumes that these CPDs remain the same for future time slices.

# Define CPDs
# CPD for W (Weather transition)
cpd_w_0 = TabularCPD(
    variable=('W', 0),
    variable_card=3,  # Sunny, Cloudy, Rainy
    values=[[0.6], [0.3], [0.1]],  # Initial probabilities
)

cpd_w_1 = TabularCPD(
    variable=('W', 1),
    variable_card=3,
    evidence=[('W', 0)],
    evidence_card=[3],
    values=[
        [0.7, 0.3, 0.2],  # P(Sunny | W_0)
        [0.2, 0.4, 0.3],  # P(Cloudy | W_0)
        [0.1, 0.3, 0.5]   # P(Rainy | W_0)
    ],
)

# CPD for T (Temperature transition)
cpd_t_0 = TabularCPD(
    variable=('T', 0),
    variable_card=3,  # Hot, Mild, Cold
    values=[[0.5], [0.4], [0.1]]  # Initial probabilities
)

cpd_t_1 = TabularCPD(
    variable=('T', 1),
    variable_card=3,
    evidence=[('T', 0), ('W', 0)],
    evidence_card=[3, 3],
    values=[
        [0.8, 0.6, 0.1, 0.7, 0.4, 0.2, 0.6, 0.3, 0.1],  # P(Hot | T_0, W_0)
        [0.2, 0.3, 0.7, 0.2, 0.5, 0.3, 0.3, 0.4, 0.3],  # P(Mild | T_0, W_0)
        [0.0, 0.1, 0.2, 0.1, 0.1, 0.5, 0.1, 0.3, 0.6]   # P(Cold | T_0, W_0)
    ]
)

# CPD for O (Ground observation)
cpd_o = TabularCPD(
    variable=('O', 0),
    variable_card=2,  # Dry, Wet
    evidence=[('W', 0)],
    evidence_card=[3],
    values=[
        [0.9, 0.6, 0.2],  # P(Dry | Sunny, Cloudy, Rainy)
        [0.1, 0.4, 0.8]   # P(Wet | Sunny, Cloudy, Rainy)
    ]
)

# CPD for H (Humidity observation)
cpd_h = TabularCPD(
    variable=('H', 0),
    variable_card=3,  # Low, Medium, High
    evidence=[('T', 0), ('W', 0)],
    evidence_card=[3, 3],
    values=[
        [0.7, 0.4, 0.1, 0.5, 0.3, 0.2, 0.3, 0.2, 0.1],  # P(Low | T_0, W_0)
        [0.2, 0.5, 0.3, 0.4, 0.5, 0.3, 0.4, 0.3, 0.2],  # P(Medium | T_0, W_0)
        [0.1, 0.1, 0.6, 0.1, 0.2, 0.5, 0.3, 0.5, 0.7]   # P(High | T_0, W_0)
    ]
)

# Add CPDs to the DBN
dbn.add_cpds(cpd_w_0, cpd_w_1, cpd_t_0, cpd_t_1, cpd_o, cpd_h)

# After defining the model, call the initialization method that generates the required data structures for further computation.
dbn.initialize_initial_state()

# Simulate some data from the defined model.
samples = dbn.simulate(n_samples=1000, n_time_slices=10)
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: 1.1102230246251565e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
WARNING:pgmpy:Probability values don't exactly sum to 1. Differ by: -2.220446049250313e-16. Adjusting values.
[3]:
# Fitting model parameters to a defined network structure.

# Define the network structure for which to learn the model parameters. Here, we have assumeed the same model
# structure that we simulated the data from
dbn = DBN()
dbn.add_edges_from([
    (('W', 0), ('O', 0)),  # Weather influences ground observation
    (('T', 0), ('H', 0)),  # Temperature influences humidity
    (('W', 0), ('H', 0))   # Weather influences humidity
])
dbn.add_edges_from([
    (('W', 0), ('W', 1)),  # Weather transition
    (('T', 0), ('T', 1)),  # Temperature transition
    (('W', 0), ('T', 1))   # Weather influences future temperature
])

# Fit the model using simulated samples
dbn.fit(samples)
print(dbn.cpds)
INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data:
 {'T_0': 'C', 'W_0': 'C', 'H_0': 'C', 'O_0': 'C', 'T_1': 'C', 'W_1': 'C', 'H_1': 'C', 'O_1': 'C'}
INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data:
 {'T_0': 'C', 'W_0': 'C', 'H_0': 'C', 'O_0': 'C', 'T_1': 'C', 'W_1': 'C', 'H_1': 'C', 'O_1': 'C'}
INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data:
 {'T_0': 'C', 'W_0': 'C', 'H_0': 'C', 'O_0': 'C', 'T_1': 'C', 'W_1': 'C', 'H_1': 'C', 'O_1': 'C'}
INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data:
 {'T_0': 'C', 'W_0': 'C', 'H_0': 'C', 'O_0': 'C', 'T_1': 'C', 'W_1': 'C', 'H_1': 'C', 'O_1': 'C'}
INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data:
 {'T_0': 'C', 'W_0': 'C', 'H_0': 'C', 'O_0': 'C', 'T_1': 'C', 'W_1': 'C', 'H_1': 'C', 'O_1': 'C'}
INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data:
 {'T_0': 'C', 'W_0': 'C', 'H_0': 'C', 'O_0': 'C', 'T_1': 'C', 'W_1': 'C', 'H_1': 'C', 'O_1': 'C'}
INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data:
 {'T_0': 'C', 'W_0': 'C', 'H_0': 'C', 'O_0': 'C', 'T_1': 'C', 'W_1': 'C', 'H_1': 'C', 'O_1': 'C'}
INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data:
 {'T_0': 'C', 'W_0': 'C', 'H_0': 'C', 'O_0': 'C', 'T_1': 'C', 'W_1': 'C', 'H_1': 'C', 'O_1': 'C'}
INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data:
 {'T_0': 'C', 'W_0': 'C', 'H_0': 'C', 'O_0': 'C', 'T_1': 'C', 'W_1': 'C', 'H_1': 'C', 'O_1': 'C'}
[<TabularCPD representing P((W, 0):3) at 0x747250050110>, <TabularCPD representing P((O, 0):2 | (W, 0):3) at 0x74713dd92570>, <TabularCPD representing P((H, 0):3 | (T, 0):3, (W, 0):3) at 0x74713dd93230>, <TabularCPD representing P((W, 1):3 | (W, 0):3) at 0x74713dd92ea0>, <TabularCPD representing P((T, 1):3 | (T, 0):3, (W, 0):3) at 0x74713dd925a0>, <TabularCPD representing P((O, 1):2 | (W, 1):3) at 0x74713dd93590>, <TabularCPD representing P((H, 1):3 | (T, 1):3, (W, 1):3) at 0x74713dd917c0>, <TabularCPD representing P((T, 0):3) at 0x74713dd927e0>]
[4]:
# Learning the model structure from data.

# pgmpy doesn't implement any specific methods for DBN structure learning. This is a hackish method to utilize the
# existing BN learning algorithms to estimate the structure of the DBN. Essentially, we remove the time-information from the
# given data and try to learn the 2-DBN network that remains constant across time-slices.

# First convert the given dataset into long form removing the time information such that it is suitable to learn the 2-DBN network.

import pandas as pd

colnames = [(node + '0') for node in dbn._nodes()] + [(node + '1') for node in dbn._nodes()]
df_long = pd.DataFrame(columns=colnames)

for t in range(9):
    cols = [(node, t) for node in dbn._nodes()] + [(node, t+1) for node in dbn._nodes()]
    samples_t = samples.loc[:, cols]
    samples_t.columns = colnames
    df_long = pd.concat([df_long, samples_t])

df_long = df_long.reset_index(drop=True)


# Use this long data frame to learn the first two time frames of the DBN. Because we are using structure learning algorithms we
# need to add constraints such that the algorithm doesn't learn edges from time slice 1 to 0.
from pgmpy.estimators import HillClimbSearch
est = HillClimbSearch(df_long)
dag = est.estimate(black_list=[('W1', 'W0'), ('W1', 'O0'), ('W1', 'T0'), ('W1', 'H0'),
                               ('O1', 'W0'), ('O1', 'O0'), ('O1', 'T0'), ('O1', 'H0'),
                               ('T1', 'W0'), ('T1', 'O0'), ('T1', 'T0'), ('T1', 'H0'),
                               ('H1', 'W0'), ('H1', 'O0'), ('H1', 'T0'), ('H1', 'H0'),]) # Constraints to learn edges in only time forward direction.

print(dag.edges()) # Use this learned DAG to define a DBN.
/tmp/ipykernel_1736345/1916813029.py:18: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  df_long = pd.concat([df_long, samples_t])
/tmp/ipykernel_1736345/1916813029.py:18: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  df_long = pd.concat([df_long, samples_t])
/tmp/ipykernel_1736345/1916813029.py:18: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  df_long = pd.concat([df_long, samples_t])
/tmp/ipykernel_1736345/1916813029.py:18: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  df_long = pd.concat([df_long, samples_t])
/tmp/ipykernel_1736345/1916813029.py:18: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  df_long = pd.concat([df_long, samples_t])
/tmp/ipykernel_1736345/1916813029.py:18: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  df_long = pd.concat([df_long, samples_t])
/tmp/ipykernel_1736345/1916813029.py:18: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  df_long = pd.concat([df_long, samples_t])
/tmp/ipykernel_1736345/1916813029.py:18: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.
  df_long = pd.concat([df_long, samples_t])
INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data:
 {'T0': 'C', 'W0': 'C', 'H0': 'C', 'O0': 'C', 'T1': 'C', 'W1': 'C', 'H1': 'C', 'O1': 'C'}
INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data:
 {'T0': 'C', 'W0': 'C', 'H0': 'C', 'O0': 'C', 'T1': 'C', 'W1': 'C', 'H1': 'C', 'O1': 'C'}
INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data:
 {'T0': 'C', 'W0': 'C', 'H0': 'C', 'O0': 'C', 'T1': 'C', 'W1': 'C', 'H1': 'C', 'O1': 'C'}
[('T0', 'T1'), ('W0', 'T1'), ('W0', 'H0'), ('W0', 'W1'), ('W0', 'T0'), ('H0', 'T0'), ('O0', 'W0'), ('T1', 'H1'), ('W1', 'H1'), ('W1', 'O1')]