Coverage for src/ts_stat_tests/correlation/tests.py: 100%

1# ============================================================================ #

2# #

3# Title : Correlation Tests #

4# Purpose : This module is a single point of entry for all correlation #

5# tests in the ts_stat_tests package. #

6# #

7# ============================================================================ #

10# ---------------------------------------------------------------------------- #

11# #

12# Overview ####

13# #

14# ---------------------------------------------------------------------------- #

17# ---------------------------------------------------------------------------- #

18# Description ####

19# ---------------------------------------------------------------------------- #

22"""

23!!! note "Summary"

24 This module contains tests for the correlation functions defined in the `ts_stat_tests.correlation.algorithms` module.

25"""

28# ---------------------------------------------------------------------------- #

29# #

30# Setup ####

31# #

32# ---------------------------------------------------------------------------- #

35# ---------------------------------------------------------------------------- #

36# Imports ####

37# ---------------------------------------------------------------------------- #

40# ## Python StdLib Imports ----

41from typing import Literal, Union, overload

43# ## Python Third Party Imports ----

44import numpy as np

45import pandas as pd

46from numpy.typing import ArrayLike, NDArray

47from statsmodels.regression.linear_model import (

48 RegressionResults,

49 RegressionResultsWrapper,

50)

51from statsmodels.stats.diagnostic import ResultsStore

52from statsmodels.tsa.stattools import ArrayLike1D

53from typeguard import typechecked

55# ## Local First Party Imports ----

56from ts_stat_tests.correlation.algorithms import (

57 acf as _acf,

58 bglm as _bglm,

59 ccf as _ccf,

60 lb as _lb,

61 lm as _lm,

62 pacf as _pacf,

63)

64from ts_stat_tests.utils.errors import generate_error_message

67# ---------------------------------------------------------------------------- #

68# Exports ####

69# ---------------------------------------------------------------------------- #

72__all__: list[str] = ["correlation", "is_correlated"]

75# ---------------------------------------------------------------------------- #

76# #

77# Tests ####

78# #

79# ---------------------------------------------------------------------------- #

82@overload

83def correlation(

84 x: ArrayLike,

85 algorithm: Literal["acf", "auto", "ac"],

86 **kwargs: Union[float, int, str, bool, ArrayLike, None],

87) -> Union[NDArray[np.float64], tuple[NDArray[np.float64], ...]]: ...

88@overload

89def correlation(

90 x: ArrayLike1D,

91 algorithm: Literal["pacf", "partial", "pc"],

92 **kwargs: Union[float, int, str, bool, ArrayLike, None],

93) -> Union[NDArray[np.float64], tuple[NDArray[np.float64], ...]]: ...

94@overload

95def correlation(

96 x: ArrayLike,

97 algorithm: Literal["ccf", "cross", "cross-correlation", "cc"],

98 **kwargs: Union[float, int, str, bool, ArrayLike, None],

99) -> Union[NDArray[np.float64], tuple[NDArray[np.float64], ...]]: ...

100@overload

101def correlation(

102 x: ArrayLike,

103 algorithm: Literal["lb", "alb", "acorr_ljungbox", "acor_lb", "a_lb", "ljungbox"],

104 **kwargs: Union[float, int, str, bool, ArrayLike, None],

105) -> pd.DataFrame: ...

106@overload

107def correlation(

108 x: ArrayLike,

109 algorithm: Literal["lm", "alm", "acorr_lm", "a_lm"],

110 **kwargs: Union[float, int, str, bool, ArrayLike, None],

111) -> Union[

112 tuple[float, float, float, float],

113 tuple[float, float, float, float, ResultsStore],

114]: ...

115@overload

116def correlation(

117 x: Union[RegressionResults, RegressionResultsWrapper],

118 algorithm: Literal["bglm", "breusch_godfrey", "bg"],

119 **kwargs: Union[float, int, str, bool, ArrayLike, None],

120) -> Union[

121 tuple[float, float, float, float],

122 tuple[float, float, float, float, ResultsStore],

123]: ...

124@typechecked

125def correlation(

126 x: Union[ArrayLike, ArrayLike1D, RegressionResults, RegressionResultsWrapper],

127 algorithm: str = "acf",

128 **kwargs: Union[float, int, str, bool, ArrayLike, None],

129) -> Union[

130 NDArray[np.float64],

131 tuple[NDArray[np.float64], ...],

132 pd.DataFrame,

133 tuple[float, float, float, float],

134 tuple[float, float, float, float, ResultsStore],

135]:

136 """

137 !!! note "Summary"

138 A unified interface for various correlation tests.

139

140 ???+ abstract "Details"

141 This function acts as a dispatcher for several correlation measures and tests, allowing users to access them through a single, consistent API. Depending on the `algorithm` parameter, it routes the call to the appropriate implementation in `ts_stat_tests.correlation.algorithms`.

142

143 The supported algorithms include:

144

145 - **Autocorrelation Function (ACF)**: Measures the correlation of a signal with a delayed copy of itself.

146 - **Partial Autocorrelation Function (PACF)**: Measures the correlation between a signal and its lagged values after removing the effects of intermediate lags.

147 - **Cross-Correlation Function (CCF)**: Measures the correlation between two signals at different lags.

148 - **Ljung-Box Test**: Tests for the presence of autocorrelation in the residuals of a model.

149 - **Lagrange Multiplier (LM) Test**: A generic test for autocorrelation, often used for ARCH effects.

150 - **Breusch-Godfrey Test**: A more general version of the LM test for serial correlation in residuals.

151

152 Params:

153 x (Union[ArrayLike, ArrayLike1D, RegressionResults, RegressionResultsWrapper]):

154 The input time series data or regression results.

155 algorithm (str):

156 The correlation algorithm to use. Options include:

157 - "acf", "auto", "ac": Autocorrelation Function

158 - "pacf", "partial", "pc": Partial Autocorrelation Function

159 - "ccf", "cross", "cross-correlation", "cc": Cross-Correlation Function

160 - "lb", "alb", "acorr_ljungbox", "acor_lb", "a_lb", "ljungbox": Ljung-Box Test

161 - "lm", "alm", "acorr_lm", "a_lm": Lagrange Multiplier Test

162 - "bglm", "breusch_godfrey", "bg": Breusch-Godfrey Test

163 kwargs (Union[float, int, str, bool, ArrayLike, None]):

164 Additional keyword arguments specific to the chosen algorithm.

165

166 Raises:

167 (ValueError):

168 If an unsupported algorithm is specified.

169

170 Returns:

171 (Union[NDArray[np.float64], tuple[NDArray[np.float64], ...], pd.DataFrame, tuple[float, float, float, float], tuple[float, float, float, float, ResultsStore]]):

172 Returns the result of the specified correlation test.

173

174 ???+ example "Examples"

175

176 ```pycon {.py .python linenums="1" title="Setup"}

177 >>> from ts_stat_tests.correlation.tests import correlation

178 >>> from ts_stat_tests.utils.data import data_normal

179 >>> normal = data_normal

180

181 ```

182

183 ```pycon {.py .python linenums="1" title="Example 1: Autocorrelation (ACF)"}

184 >>> res = correlation(normal, algorithm="acf", nlags=10)

185 >>> print(f"Lag 1 ACF: {res[1]:.4f}")

186 Lag 1 ACF: 0.0236

187

188 ```

189

190 ```pycon {.py .python linenums="1" title="Example 2: Ljung-Box test"}

191 >>> res = correlation(normal, algorithm="lb", lags=[5])

192 >>> print(res)

193 lb_stat lb_pvalue

194 5 7.882362 0.162839

195

196 ```

197

198 ??? tip "See Also"

199 - [`ts_stat_tests.correlation.algorithms.acf`][ts_stat_tests.correlation.algorithms.acf]: Autocorrelation Function algorithm.

200 - [`ts_stat_tests.correlation.algorithms.pacf`][ts_stat_tests.correlation.algorithms.pacf]: Partial Autocorrelation Function algorithm.

201 - [`ts_stat_tests.correlation.algorithms.ccf`][ts_stat_tests.correlation.algorithms.ccf]: Cross-Correlation Function algorithm.

202 - [`ts_stat_tests.correlation.algorithms.lb`][ts_stat_tests.correlation.algorithms.lb]: Ljung-Box Test algorithm.

203 - [`ts_stat_tests.correlation.algorithms.lm`][ts_stat_tests.correlation.algorithms.lm]: Lagrange Multiplier Test algorithm.

204 - [`ts_stat_tests.correlation.algorithms.bglm`][ts_stat_tests.correlation.algorithms.bglm]: Breusch-Godfrey Test algorithm.

205 """

206

207 options: dict[str, tuple[str, ...]] = {

208 "acf": ("acf", "auto", "ac"),

209 "pacf": ("pacf", "partial", "pc"),

210 "ccf": ("ccf", "cross", "cross-correlation", "cc"),

211 "lb": ("alb", "acorr_ljungbox", "acor_lb", "a_lb", "lb", "ljungbox"),

212 "lm": ("alm", "acorr_lm", "a_lm", "lm"),

213 "bglm": ("bglm", "breusch_godfrey", "bg"),

214 }

215

216 if algorithm in options["acf"]:

217 return _acf(x=x, **kwargs) # type: ignore

218

219 if algorithm in options["pacf"]:

220 return _pacf(x=x, **kwargs) # type: ignore

221

222 if algorithm in options["lb"]:

223 return _lb(x=x, **kwargs) # type: ignore

224

225 if algorithm in options["lm"]:

226 return _lm(resid=x, **kwargs) # type: ignore

227

228 if algorithm in options["ccf"]:

229 if "y" not in kwargs or kwargs["y"] is None:

230 raise ValueError("The 'ccf' algorithm requires a 'y' parameter.")

231 return _ccf(x=x, **kwargs) # type: ignore

232

233 if algorithm in options["bglm"]:

234 return _bglm(res=x, **kwargs) # type: ignore

235

236 raise ValueError(

237 generate_error_message(

238 parameter_name="algorithm",

239 value_parsed=algorithm,

240 options=options,

241 )

242 )

243

244

245@typechecked

246def is_correlated(

247 x: Union[ArrayLike, ArrayLike1D, RegressionResults, RegressionResultsWrapper],

248 algorithm: str = "lb",

249 alpha: float = 0.05,

250 **kwargs: Union[float, int, str, bool, ArrayLike, None],

251) -> dict[str, Union[str, float, bool, None]]:

252 """

253 !!! note "Summary"

254 Test whether a given data set is `correlated` or not.

255

256 ???+ abstract "Details"

257 This function checks for autocorrelation in the given data using various tests. By default, it uses the Ljung-Box test.

258

259 - **Ljung-Box (`lb`)**: Tests the null hypothesis that the data are independently distributed (i.e. no autocorrelation). If the p-value is less than `alpha`, the null hypothesis is rejected, and the series is considered `correlated`. If multiple lags are provided, it checks if any of the p-values are below `alpha`.

260 - **LM Test (`lm`)**: Tests for serial correlation. If the LMP-value is less than `alpha`, it is considered `correlated`.

261 - **Breusch-Godfrey (`bglm`)**: Tests for serial correlation in residuals. If the LMP-value is less than `alpha`, it is considered `correlated`.

262

263 Params:

264 x (Union[ArrayLike, ArrayLike1D, RegressionResults, RegressionResultsWrapper]):

265 The input time series data or regression results.

266 algorithm (str):

267 The correlation algorithm to use. Options include:

268 - `"lb"`, `"alb"`, `"acorr_ljungbox"`, `"acor_lb"`, `"a_lb"`, `"ljungbox"`: Ljung-Box Test (default)

269 - `"lm"`, `"alm"`, `"acorr_lm"`, `"a_lm"`: Lagrange Multiplier Test

270 - `"bglm"`, `"breusch_godfrey"`, `"bg"`: Breusch-Godfrey Test

271 alpha (float, optional):

272 The significance level for the test. Default: `0.05`.

273 kwargs (Union[float, int, str, bool, ArrayLike, None]):

274 Additional arguments to pass to the underlying algorithm.

275

276 Raises:

277 (ValueError):

278 If an unsupported algorithm is specified.

279

280 Returns:

281 (dict[str, Union[str, float, bool, None]]):

282 A dictionary containing:

283 - `"result"` (bool): `True` if the series is significantly correlated.

284 - `"statistic"` (float): The test statistic.

285 - `"pvalue"` (float): The p-value of the test.

286 - `"alpha"` (float): The significance level used.

287 - `"algorithm"` (str): The algorithm name used.

288

289 ???+ example "Examples"

290

291 ```pycon {.py .python linenums="1" title="Setup"}

292 >>> from ts_stat_tests.correlation.tests import is_correlated

293 >>> from ts_stat_tests.utils.data import data_normal

294 >>> normal = data_normal

295

296 ```

297

298 ```pycon {.py .python linenums="1" title="Example 1: Ljung-Box test on random data"}

299 >>> res = is_correlated(normal, algorithm="lb", lags=[5])

300 >>> res["result"]

301 False

302 >>> print(f"p-value: {res['pvalue']:.4f}")

303 p-value: 0.1628

304

305 ```

306

307 ```pycon {.py .python linenums="1" title="Example 2: LM test"}

308 >>> res = is_correlated(normal, algorithm="lm", nlags=5)

309 >>> res["result"]

310 False

311

312 ```

313

314 ??? tip "See Also"

315 - [`correlation()`][ts_stat_tests.correlation.tests.correlation]: Dispatcher for correlation measures and tests.

316 - [`ts_stat_tests.correlation.algorithms.lb`][ts_stat_tests.correlation.algorithms.lb]: Ljung-Box Test algorithm.

317 - [`ts_stat_tests.correlation.algorithms.lm`][ts_stat_tests.correlation.algorithms.lm]: Lagrange Multiplier Test algorithm.

318 - [`ts_stat_tests.correlation.algorithms.bglm`][ts_stat_tests.correlation.algorithms.bglm]: Breusch-Godfrey Test algorithm.

319 """

320 options: dict[str, tuple[str, ...]] = {

321 "lb": ("alb", "acorr_ljungbox", "acor_lb", "a_lb", "lb", "ljungbox"),

322 "lm": ("alm", "acorr_lm", "a_lm", "lm"),

323 "bglm": ("bglm", "breusch_godfrey", "bg"),

324 }

325

326 res = correlation(x=x, algorithm=algorithm, **kwargs) # type: ignore

327

328 is_corr: bool = False

329 stat: float = 0.0

330 pval: Union[float, None] = None

331

332 if algorithm in options["lb"]:

333 df = res

334 # Check if any p-value is significant

335 pval = float(df["lb_pvalue"].min())

336 # Metric: if any lag shows correlation, the series is correlated

337 is_corr = bool(pval < alpha)

338 # Return the statistic for the most significant lag

339 idx = df["lb_pvalue"].idxmin()

340 stat = float(df.loc[idx, "lb_stat"])

341

342 elif algorithm in options["lm"] or algorithm in options["bglm"]:

343 # returns (lm, lmpval, fval, fpval)

344 res_tuple = res

345 stat = float(res_tuple[0])

346 pval = float(res_tuple[1])

347 is_corr = bool(pval < alpha)

348

349 else:

350 raise ValueError(

351 f"Algorithm '{algorithm}' is not supported for 'is_correlated'. "

352 f"Supported algorithms for boolean check are: 'lb', 'lm', 'bglm'."

353 )

354

355 return {

356 "result": is_corr,

357 "statistic": stat,

358 "pvalue": pval,

359 "alpha": alpha,

360 "algorithm": algorithm,

361 }

Coverage for src / ts_stat_tests / correlation / tests.py: 100%

53 statements