Coverage for src / ts_stat_tests / correlation / tests.py: 100%
53 statements
« prev ^ index » next coverage.py v7.13.2, created at 2026-02-01 09:48 +0000
« prev ^ index » next coverage.py v7.13.2, created at 2026-02-01 09:48 +0000
1# ============================================================================ #
2# #
3# Title : Correlation Tests #
4# Purpose : This module is a single point of entry for all correlation #
5# tests in the ts_stat_tests package. #
6# #
7# ============================================================================ #
10# ---------------------------------------------------------------------------- #
11# #
12# Overview ####
13# #
14# ---------------------------------------------------------------------------- #
17# ---------------------------------------------------------------------------- #
18# Description ####
19# ---------------------------------------------------------------------------- #
22"""
23!!! note "Summary"
24 This module contains tests for the correlation functions defined in the `ts_stat_tests.correlation.algorithms` module.
25"""
28# ---------------------------------------------------------------------------- #
29# #
30# Setup ####
31# #
32# ---------------------------------------------------------------------------- #
35# ---------------------------------------------------------------------------- #
36# Imports ####
37# ---------------------------------------------------------------------------- #
40# ## Python StdLib Imports ----
41from typing import Literal, Union, overload
43# ## Python Third Party Imports ----
44import numpy as np
45import pandas as pd
46from numpy.typing import ArrayLike, NDArray
47from statsmodels.regression.linear_model import (
48 RegressionResults,
49 RegressionResultsWrapper,
50)
51from statsmodels.stats.diagnostic import ResultsStore
52from statsmodels.tsa.stattools import ArrayLike1D
53from typeguard import typechecked
55# ## Local First Party Imports ----
56from ts_stat_tests.correlation.algorithms import (
57 acf as _acf,
58 bglm as _bglm,
59 ccf as _ccf,
60 lb as _lb,
61 lm as _lm,
62 pacf as _pacf,
63)
64from ts_stat_tests.utils.errors import generate_error_message
67# ---------------------------------------------------------------------------- #
68# Exports ####
69# ---------------------------------------------------------------------------- #
72__all__: list[str] = ["correlation", "is_correlated"]
75# ---------------------------------------------------------------------------- #
76# #
77# Tests ####
78# #
79# ---------------------------------------------------------------------------- #
82@overload
83def correlation(
84 x: ArrayLike,
85 algorithm: Literal["acf", "auto", "ac"],
86 **kwargs: Union[float, int, str, bool, ArrayLike, None],
87) -> Union[NDArray[np.float64], tuple[NDArray[np.float64], ...]]: ...
88@overload
89def correlation(
90 x: ArrayLike1D,
91 algorithm: Literal["pacf", "partial", "pc"],
92 **kwargs: Union[float, int, str, bool, ArrayLike, None],
93) -> Union[NDArray[np.float64], tuple[NDArray[np.float64], ...]]: ...
94@overload
95def correlation(
96 x: ArrayLike,
97 algorithm: Literal["ccf", "cross", "cross-correlation", "cc"],
98 **kwargs: Union[float, int, str, bool, ArrayLike, None],
99) -> Union[NDArray[np.float64], tuple[NDArray[np.float64], ...]]: ...
100@overload
101def correlation(
102 x: ArrayLike,
103 algorithm: Literal["lb", "alb", "acorr_ljungbox", "acor_lb", "a_lb", "ljungbox"],
104 **kwargs: Union[float, int, str, bool, ArrayLike, None],
105) -> pd.DataFrame: ...
106@overload
107def correlation(
108 x: ArrayLike,
109 algorithm: Literal["lm", "alm", "acorr_lm", "a_lm"],
110 **kwargs: Union[float, int, str, bool, ArrayLike, None],
111) -> Union[
112 tuple[float, float, float, float],
113 tuple[float, float, float, float, ResultsStore],
114]: ...
115@overload
116def correlation(
117 x: Union[RegressionResults, RegressionResultsWrapper],
118 algorithm: Literal["bglm", "breusch_godfrey", "bg"],
119 **kwargs: Union[float, int, str, bool, ArrayLike, None],
120) -> Union[
121 tuple[float, float, float, float],
122 tuple[float, float, float, float, ResultsStore],
123]: ...
124@typechecked
125def correlation(
126 x: Union[ArrayLike, ArrayLike1D, RegressionResults, RegressionResultsWrapper],
127 algorithm: str = "acf",
128 **kwargs: Union[float, int, str, bool, ArrayLike, None],
129) -> Union[
130 NDArray[np.float64],
131 tuple[NDArray[np.float64], ...],
132 pd.DataFrame,
133 tuple[float, float, float, float],
134 tuple[float, float, float, float, ResultsStore],
135]:
136 """
137 !!! note "Summary"
138 A unified interface for various correlation tests.
140 ???+ abstract "Details"
141 This function acts as a dispatcher for several correlation measures and tests, allowing users to access them through a single, consistent API. Depending on the `algorithm` parameter, it routes the call to the appropriate implementation in `ts_stat_tests.correlation.algorithms`.
143 The supported algorithms include:
145 - **Autocorrelation Function (ACF)**: Measures the correlation of a signal with a delayed copy of itself.
146 - **Partial Autocorrelation Function (PACF)**: Measures the correlation between a signal and its lagged values after removing the effects of intermediate lags.
147 - **Cross-Correlation Function (CCF)**: Measures the correlation between two signals at different lags.
148 - **Ljung-Box Test**: Tests for the presence of autocorrelation in the residuals of a model.
149 - **Lagrange Multiplier (LM) Test**: A generic test for autocorrelation, often used for ARCH effects.
150 - **Breusch-Godfrey Test**: A more general version of the LM test for serial correlation in residuals.
152 Params:
153 x (Union[ArrayLike, ArrayLike1D, RegressionResults, RegressionResultsWrapper]):
154 The input time series data or regression results.
155 algorithm (str):
156 The correlation algorithm to use. Options include:
157 - "acf", "auto", "ac": Autocorrelation Function
158 - "pacf", "partial", "pc": Partial Autocorrelation Function
159 - "ccf", "cross", "cross-correlation", "cc": Cross-Correlation Function
160 - "lb", "alb", "acorr_ljungbox", "acor_lb", "a_lb", "ljungbox": Ljung-Box Test
161 - "lm", "alm", "acorr_lm", "a_lm": Lagrange Multiplier Test
162 - "bglm", "breusch_godfrey", "bg": Breusch-Godfrey Test
163 kwargs (Union[float, int, str, bool, ArrayLike, None]):
164 Additional keyword arguments specific to the chosen algorithm.
166 Raises:
167 (ValueError):
168 If an unsupported algorithm is specified.
170 Returns:
171 (Union[NDArray[np.float64], tuple[NDArray[np.float64], ...], pd.DataFrame, tuple[float, float, float, float], tuple[float, float, float, float, ResultsStore]]):
172 Returns the result of the specified correlation test.
174 ???+ example "Examples"
176 ```pycon {.py .python linenums="1" title="Setup"}
177 >>> from ts_stat_tests.correlation.tests import correlation
178 >>> from ts_stat_tests.utils.data import data_normal
179 >>> normal = data_normal
181 ```
183 ```pycon {.py .python linenums="1" title="Example 1: Autocorrelation (ACF)"}
184 >>> res = correlation(normal, algorithm="acf", nlags=10)
185 >>> print(f"Lag 1 ACF: {res[1]:.4f}")
186 Lag 1 ACF: 0.0236
188 ```
190 ```pycon {.py .python linenums="1" title="Example 2: Ljung-Box test"}
191 >>> res = correlation(normal, algorithm="lb", lags=[5])
192 >>> print(res)
193 lb_stat lb_pvalue
194 5 7.882362 0.162839
196 ```
198 ??? tip "See Also"
199 - [`ts_stat_tests.correlation.algorithms.acf`][ts_stat_tests.correlation.algorithms.acf]: Autocorrelation Function algorithm.
200 - [`ts_stat_tests.correlation.algorithms.pacf`][ts_stat_tests.correlation.algorithms.pacf]: Partial Autocorrelation Function algorithm.
201 - [`ts_stat_tests.correlation.algorithms.ccf`][ts_stat_tests.correlation.algorithms.ccf]: Cross-Correlation Function algorithm.
202 - [`ts_stat_tests.correlation.algorithms.lb`][ts_stat_tests.correlation.algorithms.lb]: Ljung-Box Test algorithm.
203 - [`ts_stat_tests.correlation.algorithms.lm`][ts_stat_tests.correlation.algorithms.lm]: Lagrange Multiplier Test algorithm.
204 - [`ts_stat_tests.correlation.algorithms.bglm`][ts_stat_tests.correlation.algorithms.bglm]: Breusch-Godfrey Test algorithm.
205 """
207 options: dict[str, tuple[str, ...]] = {
208 "acf": ("acf", "auto", "ac"),
209 "pacf": ("pacf", "partial", "pc"),
210 "ccf": ("ccf", "cross", "cross-correlation", "cc"),
211 "lb": ("alb", "acorr_ljungbox", "acor_lb", "a_lb", "lb", "ljungbox"),
212 "lm": ("alm", "acorr_lm", "a_lm", "lm"),
213 "bglm": ("bglm", "breusch_godfrey", "bg"),
214 }
216 if algorithm in options["acf"]:
217 return _acf(x=x, **kwargs) # type: ignore
219 if algorithm in options["pacf"]:
220 return _pacf(x=x, **kwargs) # type: ignore
222 if algorithm in options["lb"]:
223 return _lb(x=x, **kwargs) # type: ignore
225 if algorithm in options["lm"]:
226 return _lm(resid=x, **kwargs) # type: ignore
228 if algorithm in options["ccf"]:
229 if "y" not in kwargs or kwargs["y"] is None:
230 raise ValueError("The 'ccf' algorithm requires a 'y' parameter.")
231 return _ccf(x=x, **kwargs) # type: ignore
233 if algorithm in options["bglm"]:
234 return _bglm(res=x, **kwargs) # type: ignore
236 raise ValueError(
237 generate_error_message(
238 parameter_name="algorithm",
239 value_parsed=algorithm,
240 options=options,
241 )
242 )
245@typechecked
246def is_correlated(
247 x: Union[ArrayLike, ArrayLike1D, RegressionResults, RegressionResultsWrapper],
248 algorithm: str = "lb",
249 alpha: float = 0.05,
250 **kwargs: Union[float, int, str, bool, ArrayLike, None],
251) -> dict[str, Union[str, float, bool, None]]:
252 """
253 !!! note "Summary"
254 Test whether a given data set is `correlated` or not.
256 ???+ abstract "Details"
257 This function checks for autocorrelation in the given data using various tests. By default, it uses the Ljung-Box test.
259 - **Ljung-Box (`lb`)**: Tests the null hypothesis that the data are independently distributed (i.e. no autocorrelation). If the p-value is less than `alpha`, the null hypothesis is rejected, and the series is considered `correlated`. If multiple lags are provided, it checks if any of the p-values are below `alpha`.
260 - **LM Test (`lm`)**: Tests for serial correlation. If the LMP-value is less than `alpha`, it is considered `correlated`.
261 - **Breusch-Godfrey (`bglm`)**: Tests for serial correlation in residuals. If the LMP-value is less than `alpha`, it is considered `correlated`.
263 Params:
264 x (Union[ArrayLike, ArrayLike1D, RegressionResults, RegressionResultsWrapper]):
265 The input time series data or regression results.
266 algorithm (str):
267 The correlation algorithm to use. Options include:
268 - `"lb"`, `"alb"`, `"acorr_ljungbox"`, `"acor_lb"`, `"a_lb"`, `"ljungbox"`: Ljung-Box Test (default)
269 - `"lm"`, `"alm"`, `"acorr_lm"`, `"a_lm"`: Lagrange Multiplier Test
270 - `"bglm"`, `"breusch_godfrey"`, `"bg"`: Breusch-Godfrey Test
271 alpha (float, optional):
272 The significance level for the test. Default: `0.05`.
273 kwargs (Union[float, int, str, bool, ArrayLike, None]):
274 Additional arguments to pass to the underlying algorithm.
276 Raises:
277 (ValueError):
278 If an unsupported algorithm is specified.
280 Returns:
281 (dict[str, Union[str, float, bool, None]]):
282 A dictionary containing:
283 - `"result"` (bool): `True` if the series is significantly correlated.
284 - `"statistic"` (float): The test statistic.
285 - `"pvalue"` (float): The p-value of the test.
286 - `"alpha"` (float): The significance level used.
287 - `"algorithm"` (str): The algorithm name used.
289 ???+ example "Examples"
291 ```pycon {.py .python linenums="1" title="Setup"}
292 >>> from ts_stat_tests.correlation.tests import is_correlated
293 >>> from ts_stat_tests.utils.data import data_normal
294 >>> normal = data_normal
296 ```
298 ```pycon {.py .python linenums="1" title="Example 1: Ljung-Box test on random data"}
299 >>> res = is_correlated(normal, algorithm="lb", lags=[5])
300 >>> res["result"]
301 False
302 >>> print(f"p-value: {res['pvalue']:.4f}")
303 p-value: 0.1628
305 ```
307 ```pycon {.py .python linenums="1" title="Example 2: LM test"}
308 >>> res = is_correlated(normal, algorithm="lm", nlags=5)
309 >>> res["result"]
310 False
312 ```
314 ??? tip "See Also"
315 - [`correlation()`][ts_stat_tests.correlation.tests.correlation]: Dispatcher for correlation measures and tests.
316 - [`ts_stat_tests.correlation.algorithms.lb`][ts_stat_tests.correlation.algorithms.lb]: Ljung-Box Test algorithm.
317 - [`ts_stat_tests.correlation.algorithms.lm`][ts_stat_tests.correlation.algorithms.lm]: Lagrange Multiplier Test algorithm.
318 - [`ts_stat_tests.correlation.algorithms.bglm`][ts_stat_tests.correlation.algorithms.bglm]: Breusch-Godfrey Test algorithm.
319 """
320 options: dict[str, tuple[str, ...]] = {
321 "lb": ("alb", "acorr_ljungbox", "acor_lb", "a_lb", "lb", "ljungbox"),
322 "lm": ("alm", "acorr_lm", "a_lm", "lm"),
323 "bglm": ("bglm", "breusch_godfrey", "bg"),
324 }
326 res = correlation(x=x, algorithm=algorithm, **kwargs) # type: ignore
328 is_corr: bool = False
329 stat: float = 0.0
330 pval: Union[float, None] = None
332 if algorithm in options["lb"]:
333 df = res
334 # Check if any p-value is significant
335 pval = float(df["lb_pvalue"].min())
336 # Metric: if any lag shows correlation, the series is correlated
337 is_corr = bool(pval < alpha)
338 # Return the statistic for the most significant lag
339 idx = df["lb_pvalue"].idxmin()
340 stat = float(df.loc[idx, "lb_stat"])
342 elif algorithm in options["lm"] or algorithm in options["bglm"]:
343 # returns (lm, lmpval, fval, fpval)
344 res_tuple = res
345 stat = float(res_tuple[0])
346 pval = float(res_tuple[1])
347 is_corr = bool(pval < alpha)
349 else:
350 raise ValueError(
351 f"Algorithm '{algorithm}' is not supported for 'is_correlated'. "
352 f"Supported algorithms for boolean check are: 'lb', 'lm', 'bglm'."
353 )
355 return {
356 "result": is_corr,
357 "statistic": stat,
358 "pvalue": pval,
359 "alpha": alpha,
360 "algorithm": algorithm,
361 }