Source code for pgmpy.ci_tests.fisher_z

import numpy as np
import pandas as pd
from scipy import stats

from ._base import _BaseCITest
from .pearsonr import Pearsonr



[docs]
class FisherZ(_BaseCITest):
    r"""
    Fisher's Z test for conditional independence on continuous data.

    This test first computes the Pearson or partial correlation coefficient :math:`\rho_{XY \mid Z}` using
    :class:`Pearsonr`. It then applies the Fisher transformation and computes the test statistic as:

    .. math::
        Z = \sqrt{n - |Z| - 3} \cdot \operatorname{arctanh}(\rho_{XY \mid Z}),

    where :math:`n` is the sample size and :math:`|Z|` is the number of conditioning variables. Under the null
    hypothesis :math:`X \perp Y \mid Z`, :math:`Z` is approximately standard normal.

    Parameters
    ----------
    data : pandas.DataFrame
        The dataset in which to test the independence condition.

    Attributes
    ----------
    statistic_ : float
        The Fisher Z test statistic. Set after calling the test.
    p_value_ : float
        The two-sided p-value for the test. Set after calling the test.

    Examples
    --------
    >>> import numpy as np
    >>> import pandas as pd
    >>> from pgmpy.ci_tests import FisherZ
    >>> rng = np.random.default_rng(seed=42)
    >>> data = pd.DataFrame(data=rng.standard_normal(size=(1000, 3)), columns=["X", "Y", "Z"])
    >>> test = FisherZ(data=data)
    >>> test(X="X", Y="Y", Z=["Z"], significance_level=0.05)
    np.True_
    >>> round(test.statistic_, 2)
    np.float64(0.17)
    >>> round(test.p_value_, 2)
    np.float64(0.87)
    """

    _tags = {
        "name": "fisher_z",
        "data_types": ("continuous",),
        "default_for": None,
        "requires_data": True,
    }

    def __init__(self, data: pd.DataFrame):
        self.data = data
        super().__init__()


[docs]
    def run_test(
        self,
        X: str,
        Y: str,
        Z: list,
    ):
        """
        Compute the Fisher Z statistic and p-value.

        Sets ``self.statistic_``, ``self.transformed_statistic_``, and ``self.p_value_``.
        """
        partial_corr, _ = Pearsonr(data=self.data).run_test(X=X, Y=Y, Z=Z)

        rho = np.clip(partial_corr, -0.999999, 0.999999)
        self.statistic_ = np.sqrt(self.data.shape[0] - len(Z) - 3) * np.arctanh(rho)
        self.p_value_ = 2 * stats.norm.sf(np.abs(self.statistic_))

        return self.statistic_, self.p_value_