Source code for pgmpy.readwrite.XMLBeliefNetwork

import itertools
import xml.etree.ElementTree as etree

import numpy as np

from pgmpy.factors.discrete import TabularCPD
from pgmpy.global_vars import logger
from pgmpy.models import DiscreteBayesianNetwork



[docs]
class XBNReader(object):
    """
    Initializer for XBNReader class.

    Parameters
    ----------
    path: str or file
        Path of the file containing XBN data.

    string: str
        String of XBN data

    Examples
    --------
    >>> reader = XBNReader("test_XBN.xml")

    Reference
    ---------
    [1] Microsoft Research. XML belief network file format.
        http://xml.coverpages.org/xbn-MSdefault19990414.html, 1999.
    """

    def __init__(self, path=None, string=None):
        if path:
            self.network = etree.parse(path).getroot()
        elif string:
            self.network = etree.fromstring(string)
        else:
            raise ValueError("Must specify either path or string")

        self.bnmodel = self.network.find("BNMODEL")
        self.analysisnotebook = self.get_analysisnotebook_values()
        self.model_name = self.get_bnmodel_name()
        self.static_properties = self.get_static_properties()
        self.variables = self.get_variables()
        self.edges = self.get_edges()
        self.variable_CPD = self.get_distributions()


[docs]
    def get_analysisnotebook_values(self):
        """
        Returns a dictionary of the attributes of ANALYSISNOTEBOOK tag

        Examples
        --------
        >>> reader = XBNReader("xbn_test.xml")
        >>> reader.get_analysisnotebook_values()
        {'NAME': "Notebook.Cancer Example From Neapolitan",
         'ROOT': "Cancer"}
        """
        return {key: value for key, value in self.network.items()}



[docs]
    def get_bnmodel_name(self):
        """
        Returns the name of the BNMODEL.

        Examples
        --------
        >>> reader = XBNReader("xbn_test.xml")
        >>> reader.get_bnmodel_name()
        'Cancer'
        """
        return self.network.find("BNMODEL").get("NAME")



[docs]
    def get_static_properties(self):
        """
        Returns a dictionary of STATICPROPERTIES

        Examples
        --------
        >>> reader = XBNReader("xbn_test.xml")
        >>> reader.get_static_properties()
        {'FORMAT': 'MSR DTAS XML', 'VERSION': '0.2', 'CREATOR': 'Microsoft Research DTAS'}
        """
        if self.bnmodel.find("STATICPROPERTIES") is not None:
            return {
                tags.tag: tags.get("VALUE")
                for tags in self.bnmodel.find("STATICPROPERTIES")
            }
        else:
            return {}



[docs]
    def get_variables(self):
        """
        Returns a list of variables.

        Examples
        --------
        >>> reader = XBNReader("xbn_test.xml")
        >>> reader.get_variables()
        {'a': {'TYPE': 'discrete', 'XPOS': '13495',
               'YPOS': '10465', 'DESCRIPTION': '(a) Metastatic Cancer',
               'STATES': ['Present', 'Absent']}
        'b': {'TYPE': 'discrete', 'XPOS': '11290',
               'YPOS': '11965', 'DESCRIPTION': '(b) Serum Calcium Increase',
               'STATES': ['Present', 'Absent']},
        'c': {....},
        'd': {....},
        'e': {....}
        }
        """
        variables = {}
        for variable in self.bnmodel.find("VARIABLES"):
            variables[variable.get("NAME")] = {
                "TYPE": variable.get("TYPE"),
                "XPOS": variable.get("XPOS"),
                "YPOS": variable.get("YPOS"),
                "DESCRIPTION": variable.find("DESCRIPTION").text,
                "STATES": [state.text for state in variable.findall("STATENAME")],
            }
        return variables



[docs]
    def get_edges(self):
        """
        Returns a list of tuples. Each tuple contains two elements (parent, child) for each edge.

        Examples
        --------
        >>> reader = XBNReader("xbn_test.xml")
        >>> reader.get_edges()
        [('a', 'b'), ('a', 'c'), ('b', 'd'), ('c', 'd'), ('c', 'e')]
        """
        return [
            (arc.get("PARENT"), arc.get("CHILD"))
            for arc in self.bnmodel.find("STRUCTURE")
        ]



[docs]
    def get_distributions(self):
        """
        Returns a dictionary of name and its distribution. Distribution is a ndarray.

        The ndarray is stored in the standard way such that the rightmost variable
        changes most often. Consider a CPD of variable 'd' which has parents 'b' and
        'c' (distribution['CONDSET'] = ['b', 'c'])

                  |  d_0     d_1
        ---------------------------
        b_0, c_0  |  0.8     0.2
        b_0, c_1  |  0.9     0.1
        b_1, c_0  |  0.7     0.3
        b_1, c_1  |  0.05    0.95

        The value of distribution['d']['DPIS'] for the above example will be:
        array([[ 0.8 ,  0.2 ], [ 0.9 ,  0.1 ], [ 0.7 ,  0.3 ], [ 0.05,  0.95]])

        Examples
        --------
        >>> reader = XBNReader("xbn_test.xml")
        >>> reader.get_distributions()
        {'a': {'TYPE': 'discrete', 'DPIS': array([[ 0.2,  0.8]])},
         'e': {'TYPE': 'discrete', 'DPIS': array([[ 0.8,  0.2],
                 [ 0.6,  0.4]]), 'CONDSET': ['c'], 'CARDINALITY': [2]},
         'b': {'TYPE': 'discrete', 'DPIS': array([[ 0.8,  0.2],
                 [ 0.2,  0.8]]), 'CONDSET': ['a'], 'CARDINALITY': [2]},
         'c': {'TYPE': 'discrete', 'DPIS': array([[ 0.2 ,  0.8 ],
                 [ 0.05,  0.95]]), 'CONDSET': ['a'], 'CARDINALITY': [2]},
         'd': {'TYPE': 'discrete', 'DPIS': array([[ 0.8 ,  0.2 ],
                 [ 0.9 ,  0.1 ],
                 [ 0.7 ,  0.3 ],
                 [ 0.05,  0.95]]), 'CONDSET': ['b', 'c']}, 'CARDINALITY': [2, 2]}
        """
        distribution = {}
        for dist in self.bnmodel.find("DISTRIBUTIONS"):
            variable_name = dist.find("PRIVATE").get("NAME")
            distribution[variable_name] = {"TYPE": dist.get("TYPE")}
            if dist.find("CONDSET") is not None:
                distribution[variable_name]["CONDSET"] = [
                    var.get("NAME") for var in dist.find("CONDSET").findall("CONDELEM")
                ]
                distribution[variable_name]["CARDINALITY"] = np.array(
                    [
                        len(
                            set(
                                np.array(
                                    [
                                        list(map(int, dpi.get("INDEXES").split()))
                                        for dpi in dist.find("DPIS")
                                    ]
                                )[:, i]
                            )
                        )
                        for i in range(len(distribution[variable_name]["CONDSET"]))
                    ]
                )
            distribution[variable_name]["DPIS"] = np.array(
                [list(map(float, dpi.text.split())) for dpi in dist.find("DPIS")]
            ).transpose()

        return distribution



[docs]
    def get_model(self):
        """
        Returns an instance of Bayesian Model.
        """
        model = DiscreteBayesianNetwork()
        model.add_nodes_from(self.variables)
        model.add_edges_from(self.edges)
        model.name = self.model_name

        tabular_cpds = []
        for var, values in self.variable_CPD.items():
            evidence = values["CONDSET"] if "CONDSET" in values else []
            cpd = values["DPIS"]
            evidence_card = values["CARDINALITY"] if "CARDINALITY" in values else []
            states = self.variables[var]["STATES"]
            cpd = TabularCPD(
                var, len(states), cpd, evidence=evidence, evidence_card=evidence_card
            )
            tabular_cpds.append(cpd)

        model.add_cpds(*tabular_cpds)
        for var, properties in self.variables.items():
            model._node[var] = properties

        return model





[docs]
class XBNWriter(object):
    """
    Initializer for XBNWriter class

    Parameters
    ----------
    model: DiscreteBayesianNetwork Instance
        Model to write
    encoding: str(optional)
        Encoding for test data
    prettyprint: Bool(optional)
        Indentation in output XML if true

    Reference
    ---------
    http://xml.coverpages.org/xbn-MSdefault19990414.html

    Examples
    --------
    >>> writer = XBNWriter(model)
    """

    def __init__(self, model, encoding="utf-8", prettyprint=True):
        if not isinstance(model, DiscreteBayesianNetwork):
            raise TypeError("Model must be an instance of Bayesian Model.")
        self.model = model

        self.encoding = encoding
        self.prettyprint = prettyprint

        self.network = etree.Element("ANALYSISNOTEBOOK")
        self.bnmodel = etree.SubElement(self.network, "BNMODEL")
        if self.model.name:
            etree.SubElement(self.bnmodel, "NAME").text = self.model.name

        self.variables = self.set_variables(self.model.nodes)
        self.structure = self.set_edges(sorted(self.model.edges()))
        self.distribution = self.set_distributions()

    def __str__(self):
        """
        Return the XML as string.
        """
        if self.prettyprint:
            self.indent(self.network)
        return etree.tostring(self.network, encoding=self.encoding)


[docs]
    def indent(self, elem, level=0):
        """
        Inplace prettyprint formatter.
        """
        i = "\n" + level * "  "
        if len(elem):
            if not elem.text or not elem.text.strip():
                elem.text = i + "  "
            if not elem.tail or not elem.tail.strip():
                elem.tail = i
            for elem in elem:
                self.indent(elem, level + 1)
            if not elem.tail or not elem.tail.strip():
                elem.tail = i
        else:
            if level and (not elem.tail or not elem.tail.strip()):
                elem.tail = i



[docs]
    def set_analysisnotebook(self, **data):
        """
        Set attributes for ANALYSISNOTEBOOK tag

        Parameters
        ----------
        **data: dict
            {name: value} for the attributes to be set.

        Examples
        --------
        >>> from pgmpy.readwrite.XMLBeliefNetwork import XBNWriter
        >>> writer = XBNWriter()
        >>> writer.set_analysisnotebook(
        ...     NAME="Notebook.Cancer Example From Neapolitan", ROOT="Cancer"
        ... )
        """
        for key, value in data.items():
            self.network.set(str(key), str(value))



[docs]
    def set_bnmodel_name(self, name):
        """
        Set the name of the BNMODEL.

        Parameters
        ----------
        name: str
            Name of the BNModel.

        Examples
        --------
        >>> from pgmpy.readwrite.XMLBeliefNetwork import XBNWriter
        >>> writer = XBNWriter()
        >>> writer.set_bnmodel_name("Cancer")
        """
        self.bnmodel.set("NAME", str(name))



[docs]
    def set_static_properties(self, **data):
        """
        Set STATICPROPERTIES tag for the network

        Parameters
        ----------
        **data: dict
            {name: value} for name and value of the property.

        Examples
        --------
        >>> from pgmpy.readwrite.XMLBeliefNetwork import XBNWriter
        >>> writer = XBNWriter()
        >>> writer.set_static_properties(
        ...     FORMAT="MSR DTAS XML", VERSION="0.2", CREATOR="Microsoft Research DTAS"
        ... )
        """
        static_prop = etree.SubElement(self.bnmodel, "STATICPROPERTIES")
        for key, value in data.items():
            etree.SubElement(static_prop, key, attrib={"VALUE": value})



[docs]
    def set_variables(self, data):
        """
        Set variables for the network.

        Parameters
        ----------
        data: dict
            dict for variable in the form of example as shown.

        Examples
        --------
        >>> from pgmpy.readwrite.XMLBeliefNetwork import XBNWriter
        >>> writer = XBNWriter()
        >>> writer.set_variables(
        ...     {
        ...         "a": {
        ...             "TYPE": "discrete",
        ...             "XPOS": "13495",
        ...             "YPOS": "10465",
        ...             "DESCRIPTION": "(a) Metastatic Cancer",
        ...             "STATES": ["Present", "Absent"],
        ...         },
        ...         "b": {
        ...             "TYPE": "discrete",
        ...             "XPOS": "11290",
        ...             "YPOS": "11965",
        ...             "DESCRIPTION": "(b) Serum Calcium Increase",
        ...             "STATES": ["Present", "Absent"],
        ...         },
        ...     }
        ... )
        """
        variables = etree.SubElement(self.bnmodel, "VARIABLES")
        for var in sorted(data):
            variable = etree.SubElement(
                variables,
                "VAR",
                attrib={
                    "NAME": var,
                    "TYPE": data[var].get("TYPE", ""),
                    "XPOS": data[var].get("XPOS", ""),
                    "YPOS": data[var].get("YPOS", ""),
                },
            )
            etree.SubElement(
                variable,
                "DESCRIPTION",
                attrib={"DESCRIPTION": data[var].get("DESCRIPTION", "")},
            )
            for state in self.model.states[var]:
                etree.SubElement(variable, "STATENAME").text = state



[docs]
    def set_edges(self, edge_list):
        """
        Set edges/arc in the network.

        Parameters
        ----------
        edge_list: array_like
            list, tuple, dict or set whose each element has two values (parent, child).

        Examples
        --------
        >>> from pgmpy.readwrite.XMLBeliefNetwork import XBNWriter
        >>> writer = XBNWriter()
        >>> writer.set_edges(
        ...     [("a", "b"), ("a", "c"), ("b", "d"), ("c", "d"), ("c", "e")]
        ... )
        """
        structure = etree.SubElement(self.bnmodel, "STRUCTURE")
        for edge in edge_list:
            etree.SubElement(
                structure, "ARC", attrib={"PARENT": edge[0], "CHILD": edge[1]}
            )



[docs]
    def set_distributions(self):
        """
        Set distributions in the network.

        Examples
        --------
        >>> from pgmpy.readwrite.XMLBeliefNetwork import XBNWriter
        >>> writer = XBNWriter()
        >>> writer.set_distributions()
        """
        distributions = etree.SubElement(self.bnmodel, "DISTRIBUTIONS")

        cpds = self.model.get_cpds()
        cpds.sort(key=lambda x: x.variable)
        for cpd in cpds:
            cpd_values = cpd.get_values().transpose()
            var = cpd.variable
            dist = etree.SubElement(
                distributions,
                "DIST",
                attrib={"TYPE": self.model.nodes[var].get("TYPE", "")},
            )
            etree.SubElement(dist, "PRIVATE", attrib={"NAME": var})
            dpis = etree.SubElement(dist, "DPIS")
            evidence = cpd.variables[1:]
            evidence_card = cpd.cardinality[1:]
            if evidence:
                condset = etree.SubElement(dist, "CONDSET")
                for condelem in evidence:
                    etree.SubElement(condset, "CONDELEM", attrib={"NAME": condelem})
                indexes_iter = itertools.product(
                    *[range(card) for card in evidence_card]
                )
                for val in range(cpd_values.shape[0]):
                    index_value = " " + " ".join(map(str, next(indexes_iter))) + " "
                    etree.SubElement(
                        dpis, "DPI", attrib={"INDEXES": index_value}
                    ).text = (" " + " ".join(map(str, cpd_values[val])) + " ")
            else:
                etree.SubElement(dpis, "DPI").text = (
                    " " + " ".join(map(str, cpd_values[0])) + " "
                )



[docs]
    def write(self, filename):
        """
        Writes the BIF data into a file

        Parameters
        ----------
        filename : Name of the file

        Example
        -------
        >>> from pgmpy.utils import get_example_model
        >>> from pgmpy.readwrite import XBNReader, XBNWriter
        >>> asia = get_example_model("asia")
        >>> writer = XBNWriter(asia)
        >>> writer.write(filename="asia.xbn")
        """
        writer = self.__str__()
        with open(filename, "wb") as fout:
            fout.write(writer)


    def write_xbn(self, filename):
        logger.warning(
            "The `XBNWriter.write_xbn` has been deprecated. Please use `XBNWriter.write` instead."
        )
        self.write(filename)
Source code for pgmpy.readwrite.XMLBeliefNetwork

Navigation

Related Topics