Coverage for src / ts_stat_tests / correlation / tests.py: 100%

53 statements  

« prev     ^ index     » next       coverage.py v7.13.2, created at 2026-02-01 09:48 +0000

1# ============================================================================ # 

2# # 

3# Title : Correlation Tests # 

4# Purpose : This module is a single point of entry for all correlation # 

5# tests in the ts_stat_tests package. # 

6# # 

7# ============================================================================ # 

8 

9 

10# ---------------------------------------------------------------------------- # 

11# # 

12# Overview #### 

13# # 

14# ---------------------------------------------------------------------------- # 

15 

16 

17# ---------------------------------------------------------------------------- # 

18# Description #### 

19# ---------------------------------------------------------------------------- # 

20 

21 

22""" 

23!!! note "Summary" 

24 This module contains tests for the correlation functions defined in the `ts_stat_tests.correlation.algorithms` module. 

25""" 

26 

27 

28# ---------------------------------------------------------------------------- # 

29# # 

30# Setup #### 

31# # 

32# ---------------------------------------------------------------------------- # 

33 

34 

35# ---------------------------------------------------------------------------- # 

36# Imports #### 

37# ---------------------------------------------------------------------------- # 

38 

39 

40# ## Python StdLib Imports ---- 

41from typing import Literal, Union, overload 

42 

43# ## Python Third Party Imports ---- 

44import numpy as np 

45import pandas as pd 

46from numpy.typing import ArrayLike, NDArray 

47from statsmodels.regression.linear_model import ( 

48 RegressionResults, 

49 RegressionResultsWrapper, 

50) 

51from statsmodels.stats.diagnostic import ResultsStore 

52from statsmodels.tsa.stattools import ArrayLike1D 

53from typeguard import typechecked 

54 

55# ## Local First Party Imports ---- 

56from ts_stat_tests.correlation.algorithms import ( 

57 acf as _acf, 

58 bglm as _bglm, 

59 ccf as _ccf, 

60 lb as _lb, 

61 lm as _lm, 

62 pacf as _pacf, 

63) 

64from ts_stat_tests.utils.errors import generate_error_message 

65 

66 

67# ---------------------------------------------------------------------------- # 

68# Exports #### 

69# ---------------------------------------------------------------------------- # 

70 

71 

72__all__: list[str] = ["correlation", "is_correlated"] 

73 

74 

75# ---------------------------------------------------------------------------- # 

76# # 

77# Tests #### 

78# # 

79# ---------------------------------------------------------------------------- # 

80 

81 

82@overload 

83def correlation( 

84 x: ArrayLike, 

85 algorithm: Literal["acf", "auto", "ac"], 

86 **kwargs: Union[float, int, str, bool, ArrayLike, None], 

87) -> Union[NDArray[np.float64], tuple[NDArray[np.float64], ...]]: ... 

88@overload 

89def correlation( 

90 x: ArrayLike1D, 

91 algorithm: Literal["pacf", "partial", "pc"], 

92 **kwargs: Union[float, int, str, bool, ArrayLike, None], 

93) -> Union[NDArray[np.float64], tuple[NDArray[np.float64], ...]]: ... 

94@overload 

95def correlation( 

96 x: ArrayLike, 

97 algorithm: Literal["ccf", "cross", "cross-correlation", "cc"], 

98 **kwargs: Union[float, int, str, bool, ArrayLike, None], 

99) -> Union[NDArray[np.float64], tuple[NDArray[np.float64], ...]]: ... 

100@overload 

101def correlation( 

102 x: ArrayLike, 

103 algorithm: Literal["lb", "alb", "acorr_ljungbox", "acor_lb", "a_lb", "ljungbox"], 

104 **kwargs: Union[float, int, str, bool, ArrayLike, None], 

105) -> pd.DataFrame: ... 

106@overload 

107def correlation( 

108 x: ArrayLike, 

109 algorithm: Literal["lm", "alm", "acorr_lm", "a_lm"], 

110 **kwargs: Union[float, int, str, bool, ArrayLike, None], 

111) -> Union[ 

112 tuple[float, float, float, float], 

113 tuple[float, float, float, float, ResultsStore], 

114]: ... 

115@overload 

116def correlation( 

117 x: Union[RegressionResults, RegressionResultsWrapper], 

118 algorithm: Literal["bglm", "breusch_godfrey", "bg"], 

119 **kwargs: Union[float, int, str, bool, ArrayLike, None], 

120) -> Union[ 

121 tuple[float, float, float, float], 

122 tuple[float, float, float, float, ResultsStore], 

123]: ... 

124@typechecked 

125def correlation( 

126 x: Union[ArrayLike, ArrayLike1D, RegressionResults, RegressionResultsWrapper], 

127 algorithm: str = "acf", 

128 **kwargs: Union[float, int, str, bool, ArrayLike, None], 

129) -> Union[ 

130 NDArray[np.float64], 

131 tuple[NDArray[np.float64], ...], 

132 pd.DataFrame, 

133 tuple[float, float, float, float], 

134 tuple[float, float, float, float, ResultsStore], 

135]: 

136 """ 

137 !!! note "Summary" 

138 A unified interface for various correlation tests. 

139 

140 ???+ abstract "Details" 

141 This function acts as a dispatcher for several correlation measures and tests, allowing users to access them through a single, consistent API. Depending on the `algorithm` parameter, it routes the call to the appropriate implementation in `ts_stat_tests.correlation.algorithms`. 

142 

143 The supported algorithms include: 

144 

145 - **Autocorrelation Function (ACF)**: Measures the correlation of a signal with a delayed copy of itself. 

146 - **Partial Autocorrelation Function (PACF)**: Measures the correlation between a signal and its lagged values after removing the effects of intermediate lags. 

147 - **Cross-Correlation Function (CCF)**: Measures the correlation between two signals at different lags. 

148 - **Ljung-Box Test**: Tests for the presence of autocorrelation in the residuals of a model. 

149 - **Lagrange Multiplier (LM) Test**: A generic test for autocorrelation, often used for ARCH effects. 

150 - **Breusch-Godfrey Test**: A more general version of the LM test for serial correlation in residuals. 

151 

152 Params: 

153 x (Union[ArrayLike, ArrayLike1D, RegressionResults, RegressionResultsWrapper]): 

154 The input time series data or regression results. 

155 algorithm (str): 

156 The correlation algorithm to use. Options include: 

157 - "acf", "auto", "ac": Autocorrelation Function 

158 - "pacf", "partial", "pc": Partial Autocorrelation Function 

159 - "ccf", "cross", "cross-correlation", "cc": Cross-Correlation Function 

160 - "lb", "alb", "acorr_ljungbox", "acor_lb", "a_lb", "ljungbox": Ljung-Box Test 

161 - "lm", "alm", "acorr_lm", "a_lm": Lagrange Multiplier Test 

162 - "bglm", "breusch_godfrey", "bg": Breusch-Godfrey Test 

163 kwargs (Union[float, int, str, bool, ArrayLike, None]): 

164 Additional keyword arguments specific to the chosen algorithm. 

165 

166 Raises: 

167 (ValueError): 

168 If an unsupported algorithm is specified. 

169 

170 Returns: 

171 (Union[NDArray[np.float64], tuple[NDArray[np.float64], ...], pd.DataFrame, tuple[float, float, float, float], tuple[float, float, float, float, ResultsStore]]): 

172 Returns the result of the specified correlation test. 

173 

174 ???+ example "Examples" 

175 

176 ```pycon {.py .python linenums="1" title="Setup"} 

177 >>> from ts_stat_tests.correlation.tests import correlation 

178 >>> from ts_stat_tests.utils.data import data_normal 

179 >>> normal = data_normal 

180 

181 ``` 

182 

183 ```pycon {.py .python linenums="1" title="Example 1: Autocorrelation (ACF)"} 

184 >>> res = correlation(normal, algorithm="acf", nlags=10) 

185 >>> print(f"Lag 1 ACF: {res[1]:.4f}") 

186 Lag 1 ACF: 0.0236 

187 

188 ``` 

189 

190 ```pycon {.py .python linenums="1" title="Example 2: Ljung-Box test"} 

191 >>> res = correlation(normal, algorithm="lb", lags=[5]) 

192 >>> print(res) 

193 lb_stat lb_pvalue 

194 5 7.882362 0.162839 

195 

196 ``` 

197 

198 ??? tip "See Also" 

199 - [`ts_stat_tests.correlation.algorithms.acf`][ts_stat_tests.correlation.algorithms.acf]: Autocorrelation Function algorithm. 

200 - [`ts_stat_tests.correlation.algorithms.pacf`][ts_stat_tests.correlation.algorithms.pacf]: Partial Autocorrelation Function algorithm. 

201 - [`ts_stat_tests.correlation.algorithms.ccf`][ts_stat_tests.correlation.algorithms.ccf]: Cross-Correlation Function algorithm. 

202 - [`ts_stat_tests.correlation.algorithms.lb`][ts_stat_tests.correlation.algorithms.lb]: Ljung-Box Test algorithm. 

203 - [`ts_stat_tests.correlation.algorithms.lm`][ts_stat_tests.correlation.algorithms.lm]: Lagrange Multiplier Test algorithm. 

204 - [`ts_stat_tests.correlation.algorithms.bglm`][ts_stat_tests.correlation.algorithms.bglm]: Breusch-Godfrey Test algorithm. 

205 """ 

206 

207 options: dict[str, tuple[str, ...]] = { 

208 "acf": ("acf", "auto", "ac"), 

209 "pacf": ("pacf", "partial", "pc"), 

210 "ccf": ("ccf", "cross", "cross-correlation", "cc"), 

211 "lb": ("alb", "acorr_ljungbox", "acor_lb", "a_lb", "lb", "ljungbox"), 

212 "lm": ("alm", "acorr_lm", "a_lm", "lm"), 

213 "bglm": ("bglm", "breusch_godfrey", "bg"), 

214 } 

215 

216 if algorithm in options["acf"]: 

217 return _acf(x=x, **kwargs) # type: ignore 

218 

219 if algorithm in options["pacf"]: 

220 return _pacf(x=x, **kwargs) # type: ignore 

221 

222 if algorithm in options["lb"]: 

223 return _lb(x=x, **kwargs) # type: ignore 

224 

225 if algorithm in options["lm"]: 

226 return _lm(resid=x, **kwargs) # type: ignore 

227 

228 if algorithm in options["ccf"]: 

229 if "y" not in kwargs or kwargs["y"] is None: 

230 raise ValueError("The 'ccf' algorithm requires a 'y' parameter.") 

231 return _ccf(x=x, **kwargs) # type: ignore 

232 

233 if algorithm in options["bglm"]: 

234 return _bglm(res=x, **kwargs) # type: ignore 

235 

236 raise ValueError( 

237 generate_error_message( 

238 parameter_name="algorithm", 

239 value_parsed=algorithm, 

240 options=options, 

241 ) 

242 ) 

243 

244 

245@typechecked 

246def is_correlated( 

247 x: Union[ArrayLike, ArrayLike1D, RegressionResults, RegressionResultsWrapper], 

248 algorithm: str = "lb", 

249 alpha: float = 0.05, 

250 **kwargs: Union[float, int, str, bool, ArrayLike, None], 

251) -> dict[str, Union[str, float, bool, None]]: 

252 """ 

253 !!! note "Summary" 

254 Test whether a given data set is `correlated` or not. 

255 

256 ???+ abstract "Details" 

257 This function checks for autocorrelation in the given data using various tests. By default, it uses the Ljung-Box test. 

258 

259 - **Ljung-Box (`lb`)**: Tests the null hypothesis that the data are independently distributed (i.e. no autocorrelation). If the p-value is less than `alpha`, the null hypothesis is rejected, and the series is considered `correlated`. If multiple lags are provided, it checks if any of the p-values are below `alpha`. 

260 - **LM Test (`lm`)**: Tests for serial correlation. If the LMP-value is less than `alpha`, it is considered `correlated`. 

261 - **Breusch-Godfrey (`bglm`)**: Tests for serial correlation in residuals. If the LMP-value is less than `alpha`, it is considered `correlated`. 

262 

263 Params: 

264 x (Union[ArrayLike, ArrayLike1D, RegressionResults, RegressionResultsWrapper]): 

265 The input time series data or regression results. 

266 algorithm (str): 

267 The correlation algorithm to use. Options include: 

268 - `"lb"`, `"alb"`, `"acorr_ljungbox"`, `"acor_lb"`, `"a_lb"`, `"ljungbox"`: Ljung-Box Test (default) 

269 - `"lm"`, `"alm"`, `"acorr_lm"`, `"a_lm"`: Lagrange Multiplier Test 

270 - `"bglm"`, `"breusch_godfrey"`, `"bg"`: Breusch-Godfrey Test 

271 alpha (float, optional): 

272 The significance level for the test. Default: `0.05`. 

273 kwargs (Union[float, int, str, bool, ArrayLike, None]): 

274 Additional arguments to pass to the underlying algorithm. 

275 

276 Raises: 

277 (ValueError): 

278 If an unsupported algorithm is specified. 

279 

280 Returns: 

281 (dict[str, Union[str, float, bool, None]]): 

282 A dictionary containing: 

283 - `"result"` (bool): `True` if the series is significantly correlated. 

284 - `"statistic"` (float): The test statistic. 

285 - `"pvalue"` (float): The p-value of the test. 

286 - `"alpha"` (float): The significance level used. 

287 - `"algorithm"` (str): The algorithm name used. 

288 

289 ???+ example "Examples" 

290 

291 ```pycon {.py .python linenums="1" title="Setup"} 

292 >>> from ts_stat_tests.correlation.tests import is_correlated 

293 >>> from ts_stat_tests.utils.data import data_normal 

294 >>> normal = data_normal 

295 

296 ``` 

297 

298 ```pycon {.py .python linenums="1" title="Example 1: Ljung-Box test on random data"} 

299 >>> res = is_correlated(normal, algorithm="lb", lags=[5]) 

300 >>> res["result"] 

301 False 

302 >>> print(f"p-value: {res['pvalue']:.4f}") 

303 p-value: 0.1628 

304 

305 ``` 

306 

307 ```pycon {.py .python linenums="1" title="Example 2: LM test"} 

308 >>> res = is_correlated(normal, algorithm="lm", nlags=5) 

309 >>> res["result"] 

310 False 

311 

312 ``` 

313 

314 ??? tip "See Also" 

315 - [`correlation()`][ts_stat_tests.correlation.tests.correlation]: Dispatcher for correlation measures and tests. 

316 - [`ts_stat_tests.correlation.algorithms.lb`][ts_stat_tests.correlation.algorithms.lb]: Ljung-Box Test algorithm. 

317 - [`ts_stat_tests.correlation.algorithms.lm`][ts_stat_tests.correlation.algorithms.lm]: Lagrange Multiplier Test algorithm. 

318 - [`ts_stat_tests.correlation.algorithms.bglm`][ts_stat_tests.correlation.algorithms.bglm]: Breusch-Godfrey Test algorithm. 

319 """ 

320 options: dict[str, tuple[str, ...]] = { 

321 "lb": ("alb", "acorr_ljungbox", "acor_lb", "a_lb", "lb", "ljungbox"), 

322 "lm": ("alm", "acorr_lm", "a_lm", "lm"), 

323 "bglm": ("bglm", "breusch_godfrey", "bg"), 

324 } 

325 

326 res = correlation(x=x, algorithm=algorithm, **kwargs) # type: ignore 

327 

328 is_corr: bool = False 

329 stat: float = 0.0 

330 pval: Union[float, None] = None 

331 

332 if algorithm in options["lb"]: 

333 df = res 

334 # Check if any p-value is significant 

335 pval = float(df["lb_pvalue"].min()) 

336 # Metric: if any lag shows correlation, the series is correlated 

337 is_corr = bool(pval < alpha) 

338 # Return the statistic for the most significant lag 

339 idx = df["lb_pvalue"].idxmin() 

340 stat = float(df.loc[idx, "lb_stat"]) 

341 

342 elif algorithm in options["lm"] or algorithm in options["bglm"]: 

343 # returns (lm, lmpval, fval, fpval) 

344 res_tuple = res 

345 stat = float(res_tuple[0]) 

346 pval = float(res_tuple[1]) 

347 is_corr = bool(pval < alpha) 

348 

349 else: 

350 raise ValueError( 

351 f"Algorithm '{algorithm}' is not supported for 'is_correlated'. " 

352 f"Supported algorithms for boolean check are: 'lb', 'lm', 'bglm'." 

353 ) 

354 

355 return { 

356 "result": is_corr, 

357 "statistic": stat, 

358 "pvalue": pval, 

359 "alpha": alpha, 

360 "algorithm": algorithm, 

361 }