Coverage for src/ts_stat_tests/linearity/algorithms.py: 100%

1# ============================================================================ #

2# #

3# Title: Linearity Algorithms #

4# Purpose: Implementation of linearity test algorithms using statsmodels. #

5# #

6# ============================================================================ #

9# ---------------------------------------------------------------------------- #

10# #

11# Overview ####

12# #

13# ---------------------------------------------------------------------------- #

16# ---------------------------------------------------------------------------- #

17# Description ####

18# ---------------------------------------------------------------------------- #

21"""

22!!! note "Summary"

23 This module provides implementations of various linearity test algorithms using the `statsmodels` library.

24"""

27# ---------------------------------------------------------------------------- #

28# #

29# Setup ####

30# #

31# ---------------------------------------------------------------------------- #

34# ---------------------------------------------------------------------------- #

35# Imports ####

36# ---------------------------------------------------------------------------- #

39# ## Python StdLib Imports ----

40from typing import Callable, Literal, Optional, Union

42# ## Python Third Party Imports ----

43import numpy as np

44from numpy.typing import ArrayLike, NDArray

45from statsmodels.regression.linear_model import (

46 RegressionResults,

47 RegressionResultsWrapper,

48)

49from statsmodels.stats.api import (

50 linear_harvey_collier,

51 linear_lm,

52 linear_rainbow,

53 linear_reset,

54)

55from statsmodels.stats.contrast import ContrastResults

56from typeguard import typechecked

59# ---------------------------------------------------------------------------- #

60# Exports ####

61# ---------------------------------------------------------------------------- #

64__all__: list[str] = ["hc", "lm", "rb", "rr"]

67## --------------------------------------------------------------------------- #

68## Constants ####

69## --------------------------------------------------------------------------- #

72VALID_RR_TEST_TYPE_OPTIONS = Literal["fitted", "exog", "princomp"]

73VALID_RR_COV_TYPE_OPTIONS = Literal["nonrobust", "HC0", "HC1", "HC2", "HC3", "HAC"]

76# ---------------------------------------------------------------------------- #

77# #

78# Algorithms ####

79# #

80# ---------------------------------------------------------------------------- #

83@typechecked

84def hc(

85 res: Union[RegressionResults, RegressionResultsWrapper],

86 order_by: Optional[ArrayLike] = None,

87 skip: Optional[int] = None,

88) -> tuple[float, float]:

89 r"""

90 !!! note "Summary"

91 The Harvey-Collier test is a statistical test used to determine whether a dataset follows a linear relationship. In time series forecasting, the test can be used to evaluate whether the residuals of a model follow a linear distribution.

93 ???+ abstract "Details"

94 The Harvey-Collier test is based on a recursive residuals analysis. The test statistic follows a t-distribution under the null hypothesis of linearity.

96 Params:

97 res (Union[RegressionResults, RegressionResultsWrapper]):

98 The results of a linear regression model from `statsmodels`.

99 order_by (Optional[ArrayLike]):

100 Variable(s) to order by. If `None`, the original order is used.

101 skip (Optional[int]):

102 The number of observations to skip at the beginning of the series.

103

104 Returns:

105 (tuple[float, float]):

106 - `statistic` (float): The t-statistic of the test.

107 - `pvalue` (float): The p-value associated with the t-statistic.

108

109 ???+ example "Examples"

110

111 ```pycon {.py .python linenums="1" title="Setup"}

112 >>> import statsmodels.api as sm

113 >>> from ts_stat_tests.linearity.algorithms import lm

114 >>> from ts_stat_tests.utils.data import data_random, data_line

115 >>> exog = sm.add_constant(data_line.reshape(-1, 1))

116

117 ```

118

119 ```pycon {.py .python linenums="1" title="Example 1: Linear Data"}

120 >>> lm_stat, lm_pval, f_stat, f_pval = lm(data_line, exog)

121 >>> print(f"LM Statistic: {lm_stat:.2f}")

122 LM Statistic: 1000.00

123 >>> print(f"LM p-value: {lm_pval:.4f}")

124 LM p-value: 0.0000

125

126 ```

127

128 ```pycon {.py .python linenums="1" title="Example 2: Random Data"}

129 >>> # resid can be anything for this dummy example

130 >>> lm_stat, lm_pval, f_stat, f_pval = lm(data_random, exog)

131 >>> print(f"LM Statistic: {lm_stat:.2f}")

132 LM Statistic: 0.02

133 >>> print(f"LM p-value: {lm_pval:.4f}")

134 LM p-value: 0.8840

135

136 ```

137

138 ??? question "References"

139 - Harvey, A.C. and Collier, P. (1977). "Testing for Functional Form in Regression with Application to an Agricultural Production Function." Journal of Econometrics, 6(1), 103-119.

140 """

141 res_hc = linear_harvey_collier(res=res, order_by=order_by, skip=skip)

142 return float(getattr(res_hc, "statistic", np.nan)), float(getattr(res_hc, "pvalue", np.nan))

143

144

145@typechecked

146def lm(

147 resid: NDArray[np.float64], exog: NDArray[np.float64], func: Optional[Callable] = None

148) -> tuple[float, float, float, float]:

149 r"""

150 !!! note "Summary"

151 Lagrange Multiplier test for functional form / linearity.

152

153 ???+ abstract "Details"

154 This test checks whether the linear specification is appropriate for the data. It is a general test for functional form misspecification.

155

156 Params:

157 resid (NDArray[np.float64]):

158 The residuals from a linear regression.

159 exog (NDArray[np.float64]):

160 The exogenous variables (predictors) used in the regression.

161 func (Optional[Callable]):

162 A function that takes `exog` and returns a transformed version of it to test against.

163 Default: `None`

164

165 Returns:

166 (tuple[float, float, float, float]):

167 - `lm` (float): Lagrange multiplier statistic.

168 - `lmpval` (float): p-value for LM statistic.

169 - `fval` (float): F-statistic.

170 - `fpval` (float): p-value for F-statistic.

171

172 ???+ example "Examples"

173

174 ```pycon {.py .python linenums="1" title="Setup"}

175 >>> import statsmodels.api as sm

176 >>> from ts_stat_tests.linearity.algorithms import lm

177 >>> from ts_stat_tests.utils.data import data_random, data_line

178 >>> exog = sm.add_constant(data_line.reshape(-1, 1))

179 >>> y = 3 + 2 * data_line + 2 * data_line**2 + data_random

180 >>> res = sm.OLS(y, exog).fit()

181 >>> resid = res.resid

182

183 ```

184

185 ```pycon {.py .python linenums="1" title="Example 1: Basic Usage"}

186 >>> lm_stat, lm_pval, f_stat, f_pval = lm(resid, exog)

187 >>> print(f"LM Statistic: {lm_stat:.2f}")

188 LM Statistic: 1000.00

189 >>> print(f"LM p-value: {lm_pval:.4f}")

190 LM p-value: 0.0000

191

192 ```

193 """

194 res_lm = linear_lm(resid=resid, exog=exog, func=func)

195 return (

196 float(res_lm[0]),

197 float(res_lm[1]),

198 float(getattr(res_lm[2], "fvalue", np.nan)),

199 float(getattr(res_lm[2], "pvalue", np.nan)),

200 )

201

202

203@typechecked

204def rb(

205 res: Union[RegressionResults, RegressionResultsWrapper],

206 frac: float = 0.5,

207 order_by: Optional[Union[ArrayLike, str, list[str]]] = None,

208 use_distance: bool = False,

209 center: Optional[Union[float, int]] = None,

210) -> tuple[float, float]:

211 r"""

212 !!! note "Summary"

213 The Rainbow test for linearity.

214

215 ???+ abstract "Details"

216 The Rainbow test is a test for linearity that is based on the idea that if a relationship is non-linear, it is more likely to be linear in a subset of the data than in the entire dataset.

217

218 Params:

219 res (Union[RegressionResults, RegressionResultsWrapper]):

220 The results of a linear regression model from `statsmodels`.

221 frac (float):

222 The fraction of the data to use for the subset.

223 Default: `0.5`

224 order_by (Optional[Union[ArrayLike, str, list[str]]]):

225 Variable(s) to order by. If `None`, the original order is used.

226 use_distance (bool):

227 Whether to use distance from the center for ordering.

228 Default: `False`

229 center (Optional[Union[float, int]]):

230 The center to use for distance calculation.

231 Default: `None`

232

233 Returns:

234 (tuple[float, float]):

235 - `fstat` (float): The F-statistic of the test.

236 - `pvalue` (float): The p-value associated with the F-statistic.

237

238 ???+ example "Examples"

239

240 ```pycon {.py .python linenums="1" title="Setup"}

241 >>> import statsmodels.api as sm

242 >>> from ts_stat_tests.linearity.algorithms import rb

243 >>> from ts_stat_tests.utils.data import data_line, data_random

244 >>> X = sm.add_constant(data_line)

245 >>> y = 3 + 2 * data_line + 2 * data_line**2 + data_random

246 >>> res = sm.OLS(y, X).fit()

247

248 ```

249

250 ```pycon {.py .python linenums="1" title="Example 1: Basic Usage"}

251 >>> rb_stat, rb_pval = rb(res)

252 >>> print(f"Rainbow F-Statistic: {rb_stat:.2f}")

253 Rainbow F-Statistic: 30.88

254 >>> print(f"p-value: {rb_pval:.4e}")

255 p-value: 1.8319e-230

256

257 ```

258

259 ??? question "References"

260 - Utts, J.M. (1982). "The Rainbow Test for Linearity." Biometrika, 69(2), 319-326.

261 """

262 res_rb = linear_rainbow(res=res, frac=frac, order_by=order_by, use_distance=use_distance, center=center)

263 return float(res_rb[0]), float(res_rb[1])

264

265

266@typechecked

267def rr(

268 res: Union[RegressionResults, RegressionResultsWrapper],

269 power: Union[int, list[int]] = 3,

270 test_type: VALID_RR_TEST_TYPE_OPTIONS = "fitted",

271 use_f: bool = False,

272 cov_type: VALID_RR_COV_TYPE_OPTIONS = "nonrobust",

273 *,

274 cov_kwargs: Optional[dict] = None,

275) -> ContrastResults:

276 r"""

277 !!! note "Summary"

278 Ramsey's RESET (Regression Specification Error Test) for linearity.

279

280 ???+ abstract "Details"

281 RESET test for functional form misspecification. The test is based on the idea that if the model is correctly specified, then powers of the fitted values (or other variables) should not have any explanatory power when added to the model.

282

283 Params:

284 res (Union[RegressionResults, RegressionResultsWrapper]):

285 The results of a linear regression model from `statsmodels`.

286 power (Union[int, list[int]]):

287 The powers of the fitted values or exogenous variables to include in the auxiliary regression.

288 Default: `3`

289 test_type (VALID_RR_TEST_TYPE_OPTIONS):

290 The type of test to perform. Options are `"fitted"`, `"exog"`, or `"princomp"`.

291 Default: `"fitted"`

292 use_f (bool):

293 Whether to use an F-test or a Chi-squared test.

294 Default: `False`

295 cov_type (VALID_RR_COV_TYPE_OPTIONS):

296 The type of covariance matrix to use in the test.

297 Default: `"nonrobust"`

298 cov_kwargs (Optional[dict]):

299 Optional keyword arguments for the covariance matrix calculation.

300 Default: `None`

301

302 Returns:

303 (ContrastResults):

304 The results of the RESET test.

305

306 ???+ example "Examples"

307

308 ```pycon {.py .python linenums="1" title="Setup"}

309 >>> import statsmodels.api as sm

310 >>> from ts_stat_tests.linearity.algorithms import rr

311 >>> from ts_stat_tests.utils.data import data_line, data_random

312 >>> X = sm.add_constant(data_line)

313 >>> y = 3 + 2 * data_line + 2 * data_line**2 + data_random

314 >>> res = sm.OLS(y, X).fit()

315

316 ```

317

318 ```pycon {.py .python linenums="1" title="Example 1: Basic Usage"}

319 >>> rr_res = rr(res)

320 >>> print(f"RESET Test Statistic: {rr_res.statistic:.2f}")

321 RESET Test Statistic: 225070.65

322

323 ```

324

325 ??? question "References"

326 - Ramsey, J.B. (1969). "Tests for Specification Errors in Classical Linear Least-squares Regression Analysis." Journal of the Royal Statistical Society, Series B, 31(2), 350-371.

327 """

328 return linear_reset(

329 res=res,

330 power=power, # type: ignore[arg-type]

331 test_type=test_type,

332 use_f=use_f,

333 cov_type=cov_type,

334 cov_kwargs=cov_kwargs,

335 )

Coverage for src / ts_stat_tests / linearity / algorithms.py: 100%

25 statements