Coverage for src / ts_stat_tests / linearity / algorithms.py: 100%

25 statements  

« prev     ^ index     » next       coverage.py v7.13.2, created at 2026-02-01 09:48 +0000

1# ============================================================================ # 

2# # 

3# Title: Linearity Algorithms # 

4# Purpose: Implementation of linearity test algorithms using statsmodels. # 

5# # 

6# ============================================================================ # 

7 

8 

9# ---------------------------------------------------------------------------- # 

10# # 

11# Overview #### 

12# # 

13# ---------------------------------------------------------------------------- # 

14 

15 

16# ---------------------------------------------------------------------------- # 

17# Description #### 

18# ---------------------------------------------------------------------------- # 

19 

20 

21""" 

22!!! note "Summary" 

23 This module provides implementations of various linearity test algorithms using the `statsmodels` library. 

24""" 

25 

26 

27# ---------------------------------------------------------------------------- # 

28# # 

29# Setup #### 

30# # 

31# ---------------------------------------------------------------------------- # 

32 

33 

34# ---------------------------------------------------------------------------- # 

35# Imports #### 

36# ---------------------------------------------------------------------------- # 

37 

38 

39# ## Python StdLib Imports ---- 

40from typing import Callable, Literal, Optional, Union 

41 

42# ## Python Third Party Imports ---- 

43import numpy as np 

44from numpy.typing import ArrayLike, NDArray 

45from statsmodels.regression.linear_model import ( 

46 RegressionResults, 

47 RegressionResultsWrapper, 

48) 

49from statsmodels.stats.api import ( 

50 linear_harvey_collier, 

51 linear_lm, 

52 linear_rainbow, 

53 linear_reset, 

54) 

55from statsmodels.stats.contrast import ContrastResults 

56from typeguard import typechecked 

57 

58 

59# ---------------------------------------------------------------------------- # 

60# Exports #### 

61# ---------------------------------------------------------------------------- # 

62 

63 

64__all__: list[str] = ["hc", "lm", "rb", "rr"] 

65 

66 

67## --------------------------------------------------------------------------- # 

68## Constants #### 

69## --------------------------------------------------------------------------- # 

70 

71 

72VALID_RR_TEST_TYPE_OPTIONS = Literal["fitted", "exog", "princomp"] 

73VALID_RR_COV_TYPE_OPTIONS = Literal["nonrobust", "HC0", "HC1", "HC2", "HC3", "HAC"] 

74 

75 

76# ---------------------------------------------------------------------------- # 

77# # 

78# Algorithms #### 

79# # 

80# ---------------------------------------------------------------------------- # 

81 

82 

83@typechecked 

84def hc( 

85 res: Union[RegressionResults, RegressionResultsWrapper], 

86 order_by: Optional[ArrayLike] = None, 

87 skip: Optional[int] = None, 

88) -> tuple[float, float]: 

89 r""" 

90 !!! note "Summary" 

91 The Harvey-Collier test is a statistical test used to determine whether a dataset follows a linear relationship. In time series forecasting, the test can be used to evaluate whether the residuals of a model follow a linear distribution. 

92 

93 ???+ abstract "Details" 

94 The Harvey-Collier test is based on a recursive residuals analysis. The test statistic follows a t-distribution under the null hypothesis of linearity. 

95 

96 Params: 

97 res (Union[RegressionResults, RegressionResultsWrapper]): 

98 The results of a linear regression model from `statsmodels`. 

99 order_by (Optional[ArrayLike]): 

100 Variable(s) to order by. If `None`, the original order is used. 

101 skip (Optional[int]): 

102 The number of observations to skip at the beginning of the series. 

103 

104 Returns: 

105 (tuple[float, float]): 

106 - `statistic` (float): The t-statistic of the test. 

107 - `pvalue` (float): The p-value associated with the t-statistic. 

108 

109 ???+ example "Examples" 

110 

111 ```pycon {.py .python linenums="1" title="Setup"} 

112 >>> import statsmodels.api as sm 

113 >>> from ts_stat_tests.linearity.algorithms import lm 

114 >>> from ts_stat_tests.utils.data import data_random, data_line 

115 >>> exog = sm.add_constant(data_line.reshape(-1, 1)) 

116 

117 ``` 

118 

119 ```pycon {.py .python linenums="1" title="Example 1: Linear Data"} 

120 >>> lm_stat, lm_pval, f_stat, f_pval = lm(data_line, exog) 

121 >>> print(f"LM Statistic: {lm_stat:.2f}") 

122 LM Statistic: 1000.00 

123 >>> print(f"LM p-value: {lm_pval:.4f}") 

124 LM p-value: 0.0000 

125 

126 ``` 

127 

128 ```pycon {.py .python linenums="1" title="Example 2: Random Data"} 

129 >>> # resid can be anything for this dummy example 

130 >>> lm_stat, lm_pval, f_stat, f_pval = lm(data_random, exog) 

131 >>> print(f"LM Statistic: {lm_stat:.2f}") 

132 LM Statistic: 0.02 

133 >>> print(f"LM p-value: {lm_pval:.4f}") 

134 LM p-value: 0.8840 

135 

136 ``` 

137 

138 ??? question "References" 

139 - Harvey, A.C. and Collier, P. (1977). "Testing for Functional Form in Regression with Application to an Agricultural Production Function." Journal of Econometrics, 6(1), 103-119. 

140 """ 

141 res_hc = linear_harvey_collier(res=res, order_by=order_by, skip=skip) 

142 return float(getattr(res_hc, "statistic", np.nan)), float(getattr(res_hc, "pvalue", np.nan)) 

143 

144 

145@typechecked 

146def lm( 

147 resid: NDArray[np.float64], exog: NDArray[np.float64], func: Optional[Callable] = None 

148) -> tuple[float, float, float, float]: 

149 r""" 

150 !!! note "Summary" 

151 Lagrange Multiplier test for functional form / linearity. 

152 

153 ???+ abstract "Details" 

154 This test checks whether the linear specification is appropriate for the data. It is a general test for functional form misspecification. 

155 

156 Params: 

157 resid (NDArray[np.float64]): 

158 The residuals from a linear regression. 

159 exog (NDArray[np.float64]): 

160 The exogenous variables (predictors) used in the regression. 

161 func (Optional[Callable]): 

162 A function that takes `exog` and returns a transformed version of it to test against. 

163 Default: `None` 

164 

165 Returns: 

166 (tuple[float, float, float, float]): 

167 - `lm` (float): Lagrange multiplier statistic. 

168 - `lmpval` (float): p-value for LM statistic. 

169 - `fval` (float): F-statistic. 

170 - `fpval` (float): p-value for F-statistic. 

171 

172 ???+ example "Examples" 

173 

174 ```pycon {.py .python linenums="1" title="Setup"} 

175 >>> import statsmodels.api as sm 

176 >>> from ts_stat_tests.linearity.algorithms import lm 

177 >>> from ts_stat_tests.utils.data import data_random, data_line 

178 >>> exog = sm.add_constant(data_line.reshape(-1, 1)) 

179 >>> y = 3 + 2 * data_line + 2 * data_line**2 + data_random 

180 >>> res = sm.OLS(y, exog).fit() 

181 >>> resid = res.resid 

182 

183 ``` 

184 

185 ```pycon {.py .python linenums="1" title="Example 1: Basic Usage"} 

186 >>> lm_stat, lm_pval, f_stat, f_pval = lm(resid, exog) 

187 >>> print(f"LM Statistic: {lm_stat:.2f}") 

188 LM Statistic: 1000.00 

189 >>> print(f"LM p-value: {lm_pval:.4f}") 

190 LM p-value: 0.0000 

191 

192 ``` 

193 """ 

194 res_lm = linear_lm(resid=resid, exog=exog, func=func) 

195 return ( 

196 float(res_lm[0]), 

197 float(res_lm[1]), 

198 float(getattr(res_lm[2], "fvalue", np.nan)), 

199 float(getattr(res_lm[2], "pvalue", np.nan)), 

200 ) 

201 

202 

203@typechecked 

204def rb( 

205 res: Union[RegressionResults, RegressionResultsWrapper], 

206 frac: float = 0.5, 

207 order_by: Optional[Union[ArrayLike, str, list[str]]] = None, 

208 use_distance: bool = False, 

209 center: Optional[Union[float, int]] = None, 

210) -> tuple[float, float]: 

211 r""" 

212 !!! note "Summary" 

213 The Rainbow test for linearity. 

214 

215 ???+ abstract "Details" 

216 The Rainbow test is a test for linearity that is based on the idea that if a relationship is non-linear, it is more likely to be linear in a subset of the data than in the entire dataset. 

217 

218 Params: 

219 res (Union[RegressionResults, RegressionResultsWrapper]): 

220 The results of a linear regression model from `statsmodels`. 

221 frac (float): 

222 The fraction of the data to use for the subset. 

223 Default: `0.5` 

224 order_by (Optional[Union[ArrayLike, str, list[str]]]): 

225 Variable(s) to order by. If `None`, the original order is used. 

226 use_distance (bool): 

227 Whether to use distance from the center for ordering. 

228 Default: `False` 

229 center (Optional[Union[float, int]]): 

230 The center to use for distance calculation. 

231 Default: `None` 

232 

233 Returns: 

234 (tuple[float, float]): 

235 - `fstat` (float): The F-statistic of the test. 

236 - `pvalue` (float): The p-value associated with the F-statistic. 

237 

238 ???+ example "Examples" 

239 

240 ```pycon {.py .python linenums="1" title="Setup"} 

241 >>> import statsmodels.api as sm 

242 >>> from ts_stat_tests.linearity.algorithms import rb 

243 >>> from ts_stat_tests.utils.data import data_line, data_random 

244 >>> X = sm.add_constant(data_line) 

245 >>> y = 3 + 2 * data_line + 2 * data_line**2 + data_random 

246 >>> res = sm.OLS(y, X).fit() 

247 

248 ``` 

249 

250 ```pycon {.py .python linenums="1" title="Example 1: Basic Usage"} 

251 >>> rb_stat, rb_pval = rb(res) 

252 >>> print(f"Rainbow F-Statistic: {rb_stat:.2f}") 

253 Rainbow F-Statistic: 30.88 

254 >>> print(f"p-value: {rb_pval:.4e}") 

255 p-value: 1.8319e-230 

256 

257 ``` 

258 

259 ??? question "References" 

260 - Utts, J.M. (1982). "The Rainbow Test for Linearity." Biometrika, 69(2), 319-326. 

261 """ 

262 res_rb = linear_rainbow(res=res, frac=frac, order_by=order_by, use_distance=use_distance, center=center) 

263 return float(res_rb[0]), float(res_rb[1]) 

264 

265 

266@typechecked 

267def rr( 

268 res: Union[RegressionResults, RegressionResultsWrapper], 

269 power: Union[int, list[int]] = 3, 

270 test_type: VALID_RR_TEST_TYPE_OPTIONS = "fitted", 

271 use_f: bool = False, 

272 cov_type: VALID_RR_COV_TYPE_OPTIONS = "nonrobust", 

273 *, 

274 cov_kwargs: Optional[dict] = None, 

275) -> ContrastResults: 

276 r""" 

277 !!! note "Summary" 

278 Ramsey's RESET (Regression Specification Error Test) for linearity. 

279 

280 ???+ abstract "Details" 

281 RESET test for functional form misspecification. The test is based on the idea that if the model is correctly specified, then powers of the fitted values (or other variables) should not have any explanatory power when added to the model. 

282 

283 Params: 

284 res (Union[RegressionResults, RegressionResultsWrapper]): 

285 The results of a linear regression model from `statsmodels`. 

286 power (Union[int, list[int]]): 

287 The powers of the fitted values or exogenous variables to include in the auxiliary regression. 

288 Default: `3` 

289 test_type (VALID_RR_TEST_TYPE_OPTIONS): 

290 The type of test to perform. Options are `"fitted"`, `"exog"`, or `"princomp"`. 

291 Default: `"fitted"` 

292 use_f (bool): 

293 Whether to use an F-test or a Chi-squared test. 

294 Default: `False` 

295 cov_type (VALID_RR_COV_TYPE_OPTIONS): 

296 The type of covariance matrix to use in the test. 

297 Default: `"nonrobust"` 

298 cov_kwargs (Optional[dict]): 

299 Optional keyword arguments for the covariance matrix calculation. 

300 Default: `None` 

301 

302 Returns: 

303 (ContrastResults): 

304 The results of the RESET test. 

305 

306 ???+ example "Examples" 

307 

308 ```pycon {.py .python linenums="1" title="Setup"} 

309 >>> import statsmodels.api as sm 

310 >>> from ts_stat_tests.linearity.algorithms import rr 

311 >>> from ts_stat_tests.utils.data import data_line, data_random 

312 >>> X = sm.add_constant(data_line) 

313 >>> y = 3 + 2 * data_line + 2 * data_line**2 + data_random 

314 >>> res = sm.OLS(y, X).fit() 

315 

316 ``` 

317 

318 ```pycon {.py .python linenums="1" title="Example 1: Basic Usage"} 

319 >>> rr_res = rr(res) 

320 >>> print(f"RESET Test Statistic: {rr_res.statistic:.2f}") 

321 RESET Test Statistic: 225070.65 

322 

323 ``` 

324 

325 ??? question "References" 

326 - Ramsey, J.B. (1969). "Tests for Specification Errors in Classical Linear Least-squares Regression Analysis." Journal of the Royal Statistical Society, Series B, 31(2), 350-371. 

327 """ 

328 return linear_reset( 

329 res=res, 

330 power=power, # type: ignore[arg-type] 

331 test_type=test_type, 

332 use_f=use_f, 

333 cov_type=cov_type, 

334 cov_kwargs=cov_kwargs, 

335 )