Coverage for src / ts_stat_tests / normality / tests.py: 100%

59 statements  

« prev     ^ index     » next       coverage.py v7.13.2, created at 2026-02-01 09:48 +0000

1# ============================================================================ # 

2# # 

3# Title: Normality Tests # 

4# Purpose: Convenience functions for normality algorithms. # 

5# # 

6# ============================================================================ # 

7 

8 

9# ---------------------------------------------------------------------------- # 

10# # 

11# Overview #### 

12# # 

13# ---------------------------------------------------------------------------- # 

14 

15 

16# ---------------------------------------------------------------------------- # 

17# Description #### 

18# ---------------------------------------------------------------------------- # 

19 

20 

21""" 

22!!! note "Summary" 

23 This module contains convenience functions and tests for normality measures, allowing for easy access to different normality algorithms. 

24""" 

25 

26 

27# ---------------------------------------------------------------------------- # 

28# # 

29# Setup #### 

30# # 

31# ---------------------------------------------------------------------------- # 

32 

33 

34# ---------------------------------------------------------------------------- # 

35# Imports #### 

36# ---------------------------------------------------------------------------- # 

37 

38 

39# ## Python StdLib Imports ---- 

40from typing import Any, Union 

41 

42# ## Python Third Party Imports ---- 

43import numpy as np 

44from numpy.typing import ArrayLike 

45from scipy.stats._morestats import AndersonResult, ShapiroResult 

46from scipy.stats._stats_py import NormaltestResult 

47from typeguard import typechecked 

48 

49# ## Local First Party Imports ---- 

50from ts_stat_tests.normality.algorithms import ( 

51 VALID_AD_DIST_OPTIONS, 

52 VALID_DP_NAN_POLICY_OPTIONS, 

53 ad as _ad, 

54 dp as _dp, 

55 jb as _jb, 

56 ob as _ob, 

57 sw as _sw, 

58) 

59from ts_stat_tests.utils.errors import generate_error_message 

60 

61 

62# ---------------------------------------------------------------------------- # 

63# Exports #### 

64# ---------------------------------------------------------------------------- # 

65 

66 

67__all__: list[str] = ["normality", "is_normal"] 

68 

69 

70# ---------------------------------------------------------------------------- # 

71# # 

72# Tests #### 

73# # 

74# ---------------------------------------------------------------------------- # 

75 

76 

77@typechecked 

78def normality( 

79 x: ArrayLike, 

80 algorithm: str = "dp", 

81 axis: int = 0, 

82 nan_policy: VALID_DP_NAN_POLICY_OPTIONS = "propagate", 

83 dist: VALID_AD_DIST_OPTIONS = "norm", 

84) -> Union[tuple[float, ...], NormaltestResult, ShapiroResult, AndersonResult]: 

85 """ 

86 !!! note "Summary" 

87 Perform a normality test on the given data. 

88 

89 ???+ abstract "Details" 

90 This function is a convenience wrapper around the five underlying algorithms:<br> 

91 - [`jb()`][ts_stat_tests.normality.algorithms.jb]<br> 

92 - [`ob()`][ts_stat_tests.normality.algorithms.ob]<br> 

93 - [`sw()`][ts_stat_tests.normality.algorithms.sw]<br> 

94 - [`dp()`][ts_stat_tests.normality.algorithms.dp]<br> 

95 - [`ad()`][ts_stat_tests.normality.algorithms.ad] 

96 

97 Params: 

98 x (ArrayLike): 

99 The data to be checked. Should be a `1-D` or `N-D` data array. 

100 algorithm (str): 

101 Which normality algorithm to use.<br> 

102 - `jb()`: `["jb", "jarque", "jarque-bera"]`<br> 

103 - `ob()`: `["ob", "omni", "omnibus"]`<br> 

104 - `sw()`: `["sw", "shapiro", "shapiro-wilk"]`<br> 

105 - `dp()`: `["dp", "dagostino", "dagostino-pearson"]`<br> 

106 - `ad()`: `["ad", "anderson", "anderson-darling"]`<br> 

107 Default: `"dp"` 

108 axis (int): 

109 Axis along which to compute the test. 

110 Default: `0` 

111 nan_policy (VALID_DP_NAN_POLICY_OPTIONS): 

112 Defines how to handle when input contains `NaN`.<br> 

113 - `propagate`: returns `NaN`<br> 

114 - `raise`: throws an error<br> 

115 - `omit`: performs the calculations ignoring `NaN` values<br> 

116 Default: `"propagate"` 

117 dist (VALID_AD_DIST_OPTIONS): 

118 The type of distribution to test against.<br> 

119 Only relevant when `algorithm=anderson`.<br> 

120 Default: `"norm"` 

121 

122 Raises: 

123 (ValueError): 

124 When the given value for `algorithm` is not valid. 

125 

126 Returns: 

127 (Union[tuple[float, float], tuple[float, list[float], list[float]]]): 

128 If not `"ad"`, returns a `tuple` of `(stat, pvalue)`. 

129 If `"ad"`, returns a `tuple` of `(stat, critical_values, significance_level)`. 

130 

131 !!! success "Credit" 

132 Calculations are performed by `scipy.stats` and `statsmodels.stats`. 

133 

134 ???+ example "Examples" 

135 

136 ```pycon {.py .python linenums="1" title="Setup"} 

137 >>> from ts_stat_tests.normality.tests import normality 

138 >>> from ts_stat_tests.utils.data import data_normal 

139 >>> normal = data_normal 

140 

141 ``` 

142 

143 ```pycon {.py .python linenums="1" title="Example 1: D'Agostino-Pearson test"} 

144 >>> stat, pvalue = normality(normal, algorithm="dp") 

145 >>> print(f"DP statistic: {stat:.4f}") 

146 DP statistic: 1.3537 

147 >>> print(f"p-value: {pvalue:.4f}") 

148 p-value: 0.5082 

149 

150 ``` 

151 

152 ```pycon {.py .python linenums="1" title="Example 2: Jarque-Bera test"} 

153 >>> stat, pvalue = normality(normal, algorithm="jb") 

154 >>> print(f"JB statistic: {stat:.4f}") 

155 JB statistic: 1.4168 

156 >>> print(f"p-value: {pvalue:.4f}") 

157 p-value: 0.4924 

158 

159 ``` 

160 """ 

161 options: dict[str, tuple[str, ...]] = { 

162 "jb": ("jb", "jarque", "jarque-bera"), 

163 "ob": ("ob", "omni", "omnibus"), 

164 "sw": ("sw", "shapiro", "shapiro-wilk"), 

165 "dp": ("dp", "dagostino", "dagostino-pearson"), 

166 "ad": ("ad", "anderson", "anderson-darling"), 

167 } 

168 if algorithm in options["jb"]: 

169 res_jb = _jb(x=x, axis=axis) 

170 return (res_jb[0], res_jb[1]) 

171 if algorithm in options["ob"]: 

172 return _ob(x=x, axis=axis) 

173 if algorithm in options["sw"]: 

174 return _sw(x=x) 

175 if algorithm in options["dp"]: 

176 return _dp(x=x, axis=axis, nan_policy=nan_policy) 

177 if algorithm in options["ad"]: 

178 return _ad(x=x, dist=dist) 

179 

180 raise ValueError( 

181 generate_error_message( 

182 parameter_name="algorithm", 

183 value_parsed=algorithm, 

184 options=options, 

185 ) 

186 ) 

187 

188 

189@typechecked 

190def is_normal( 

191 x: ArrayLike, 

192 algorithm: str = "dp", 

193 alpha: float = 0.05, 

194 axis: int = 0, 

195 nan_policy: VALID_DP_NAN_POLICY_OPTIONS = "propagate", 

196 dist: VALID_AD_DIST_OPTIONS = "norm", 

197) -> dict[str, Union[str, float, bool, None]]: 

198 """ 

199 !!! note "Summary" 

200 Test whether a given data set is `normal` or not. 

201 

202 ???+ abstract "Details" 

203 This function implements the given algorithm (defined in the parameter `algorithm`), and returns a dictionary containing the relevant data: 

204 ```python 

205 { 

206 "result": ..., # The result of the test. Will be `True` if `p-value >= alpha`, and `False` otherwise 

207 "statistic": ..., # The test statistic 

208 "p_value": ..., # The p-value of the test (if applicable) 

209 "alpha": ..., # The significance level used 

210 } 

211 ``` 

212 

213 Params: 

214 x (ArrayLike): 

215 The data to be checked. Should be a `1-D` or `N-D` data array. 

216 algorithm (str): 

217 Which normality algorithm to use.<br> 

218 - `jb()`: `["jb", "jarque", "jarque-bera"]`<br> 

219 - `ob()`: `["ob", "omni", "omnibus"]`<br> 

220 - `sw()`: `["sw", "shapiro", "shapiro-wilk"]`<br> 

221 - `dp()`: `["dp", "dagostino", "dagostino-pearson"]`<br> 

222 - `ad()`: `["ad", "anderson", "anderson-darling"]`<br> 

223 Default: `"dp"` 

224 alpha (float): 

225 Significance level. 

226 Default: `0.05` 

227 axis (int): 

228 Axis along which to compute the test. 

229 Default: `0` 

230 nan_policy (VALID_DP_NAN_POLICY_OPTIONS): 

231 Defines how to handle when input contains `NaN`.<br> 

232 - `propagate`: returns `NaN`<br> 

233 - `raise`: throws an error<br> 

234 - `omit`: performs the calculations ignoring `NaN` values<br> 

235 Default: `"propagate"` 

236 dist (VALID_AD_DIST_OPTIONS): 

237 The type of distribution to test against.<br> 

238 Only relevant when `algorithm=anderson`.<br> 

239 Default: `"norm"` 

240 

241 Returns: 

242 (dict[str, Union[str, float, bool, None]]): 

243 A dictionary containing: 

244 - `"result"` (bool): Indicator if the series is normal. 

245 - `"statistic"` (float): The test statistic. 

246 - `"p_value"` (float): The p-value of the test (if applicable). 

247 - `"alpha"` (float): The significance level used. 

248 

249 !!! success "Credit" 

250 Calculations are performed by `scipy.stats` and `statsmodels.stats`. 

251 

252 ???+ example "Examples" 

253 

254 ```pycon {.py .python linenums="1" title="Setup"} 

255 >>> from ts_stat_tests.normality.tests import is_normal 

256 >>> from ts_stat_tests.utils.data import data_normal, data_random 

257 >>> normal = data_normal 

258 >>> random = data_random 

259 

260 ``` 

261 

262 ```pycon {.py .python linenums="1" title="Example 1: Test normal data"} 

263 >>> res = is_normal(normal, algorithm="dp") 

264 >>> res["result"] 

265 True 

266 >>> print(f"p-value: {res['p_value']:.4f}") 

267 p-value: 0.5082 

268 

269 ``` 

270 

271 ```pycon {.py .python linenums="1" title="Example 2: Test non-normal (random) data"} 

272 >>> res = is_normal(random, algorithm="sw") 

273 >>> res["result"] 

274 False 

275 

276 ``` 

277 """ 

278 res: Any = normality(x=x, algorithm=algorithm, axis=axis, nan_policy=nan_policy, dist=dist) 

279 

280 if algorithm in ("ad", "anderson", "anderson-darling"): 

281 # res is AndersonResult(statistic, critical_values, significance_level, fit_result) 

282 # indexing only gives the first 3 elements 

283 res_list: list[Any] = list(res) if isinstance(res, (tuple, list)) else [] 

284 if len(res_list) >= 3: 

285 v0: Any = res_list[0] 

286 v1: Any = res_list[1] 

287 v2: Any = res_list[2] 

288 stat = v0 

289 crit = v1 

290 sig = v2 

291 

292 # sig is something like [15. , 10. , 5. , 2.5, 1. ] 

293 # alpha is something like 0.05 (which is 5%) 

294 sig_arr = np.asarray(sig) 

295 crit_arr = np.asarray(crit) 

296 idx = np.argmin(np.abs(sig_arr - (alpha * 100))) 

297 critical_value = crit_arr[idx] 

298 is_norm = stat < critical_value 

299 return { 

300 "result": bool(is_norm), 

301 "statistic": float(stat), 

302 "critical_value": float(critical_value), 

303 "significance_level": float(sig_arr[idx]), 

304 "alpha": float(alpha), 

305 } 

306 # Fallback for unexpected return format 

307 return { 

308 "result": False, 

309 "statistic": 0.0, 

310 "alpha": float(alpha), 

311 } 

312 

313 # For others, they return (statistic, pvalue) or similar 

314 p_val: Union[float, None] = None 

315 stat_val: Union[float, None] = None 

316 

317 # Use getattr to avoid type checker attribute issues 

318 p_val_attr = getattr(res, "pvalue", None) 

319 stat_val_attr = getattr(res, "statistic", None) 

320 

321 if p_val_attr is not None and stat_val_attr is not None: 

322 p_val = float(p_val_attr) 

323 stat_val = float(stat_val_attr) 

324 elif isinstance(res, (tuple, list)) and len(res) >= 2: 

325 res_tuple: Any = res 

326 stat_val = float(res_tuple[0]) 

327 p_val = float(res_tuple[1]) 

328 else: 

329 # Fallback 

330 if isinstance(res, (float, int)): 

331 stat_val = float(res) 

332 p_val = None 

333 

334 is_norm_val = p_val >= alpha if p_val is not None else False 

335 

336 return { 

337 "result": bool(is_norm_val), 

338 "statistic": stat_val, 

339 "p_value": p_val, 

340 "alpha": float(alpha), 

341 }