Source code for pgmpy.ci_tests.fisher_z

import numpy as np
import pandas as pd
from scipy import stats

from ._base import _BaseCITest
from .pearsonr import Pearsonr


[docs] class FisherZ(_BaseCITest): r""" Fisher's Z test for conditional independence on continuous data. This test first computes the Pearson or partial correlation coefficient :math:`\rho_{XY \mid Z}` using :class:`Pearsonr`. It then applies the Fisher transformation and computes the test statistic as: .. math:: Z = \sqrt{n - |Z| - 3} \cdot \operatorname{arctanh}(\rho_{XY \mid Z}), where :math:`n` is the sample size and :math:`|Z|` is the number of conditioning variables. Under the null hypothesis :math:`X \perp Y \mid Z`, :math:`Z` is approximately standard normal. Parameters ---------- data : pandas.DataFrame The dataset in which to test the independence condition. Attributes ---------- statistic_ : float The Fisher Z test statistic. Set after calling the test. p_value_ : float The two-sided p-value for the test. Set after calling the test. Examples -------- >>> import numpy as np >>> import pandas as pd >>> from pgmpy.ci_tests import FisherZ >>> rng = np.random.default_rng(seed=42) >>> data = pd.DataFrame(data=rng.standard_normal(size=(1000, 3)), columns=["X", "Y", "Z"]) >>> test = FisherZ(data=data) >>> test(X="X", Y="Y", Z=["Z"], significance_level=0.05) np.True_ >>> round(test.statistic_, 2) np.float64(0.17) >>> round(test.p_value_, 2) np.float64(0.87) """ _tags = { "name": "fisher_z", "data_types": ("continuous",), "default_for": None, "requires_data": True, } def __init__(self, data: pd.DataFrame): self.data = data super().__init__()
[docs] def run_test( self, X: str, Y: str, Z: list, ): """ Compute the Fisher Z statistic and p-value. Sets ``self.statistic_``, ``self.transformed_statistic_``, and ``self.p_value_``. """ partial_corr, _ = Pearsonr(data=self.data).run_test(X=X, Y=Y, Z=Z) rho = np.clip(partial_corr, -0.999999, 0.999999) self.statistic_ = np.sqrt(self.data.shape[0] - len(Z) - 3) * np.arctanh(rho) self.p_value_ = 2 * stats.norm.sf(np.abs(self.statistic_)) return self.statistic_, self.p_value_