Source code for pgmpy.ci_tests.g_sq
import pandas as pd
from .power_divergence import PowerDivergence
[docs]
class GSq(PowerDivergence):
"""
G-squared test for conditional independence on discrete data.
This class is a thin specialization of :class:`PowerDivergence` with
``lambda_="log-likelihood"``. For the contingency-table construction, conditional-case
aggregation, and p-value computation, see :class:`PowerDivergence`.
Parameters
----------
data : pandas.DataFrame
The dataset on which to test the independence condition.
Attributes
----------
statistic_ : float
The G-squared (log-likelihood ratio) test statistic. Set after calling the test.
p_value_ : float
The p-value for the test. Set after calling the test.
dof_ : int
Degrees of freedom for the test. Set after calling the test.
References
----------
.. [1] https://en.wikipedia.org/wiki/G-test
Examples
--------
>>> import pandas as pd
>>> import numpy as np
>>> from pgmpy.ci_tests import GSq
>>> np.random.seed(42)
>>> data = pd.DataFrame(
... data=np.random.randint(low=0, high=2, size=(50000, 4)), columns=list("ABCD")
... )
>>> data["E"] = data["A"] + data["B"] + data["C"]
>>> test = GSq(data=data)
>>> test(X="A", Y="C", Z=[], significance_level=0.05)
np.True_
>>> round(test.statistic_, 2)
np.float64(0.03)
>>> round(test.p_value_, 2)
np.float64(0.86)
>>> test.dof_
1
>>> test(X="A", Y="B", Z=["D"], significance_level=0.05)
np.True_
>>> test(X="A", Y="B", Z=["D", "E"], significance_level=0.05)
np.False_
"""
_tags = {
"name": "g_sq",
"data_types": ("discrete",),
"default_for": None,
"requires_data": True,
}
def __init__(self, data: pd.DataFrame):
super().__init__(data=data, lambda_="log-likelihood")