Coverage for src/ts_stat_tests/normality/tests.py: 100%

1# ============================================================================ #

2# #

3# Title: Normality Tests #

4# Purpose: Convenience functions for normality algorithms. #

5# #

6# ============================================================================ #

9# ---------------------------------------------------------------------------- #

10# #

11# Overview ####

12# #

13# ---------------------------------------------------------------------------- #

16# ---------------------------------------------------------------------------- #

17# Description ####

18# ---------------------------------------------------------------------------- #

21"""

22!!! note "Summary"

23 This module contains convenience functions and tests for normality measures, allowing for easy access to different normality algorithms.

24"""

27# ---------------------------------------------------------------------------- #

28# #

29# Setup ####

30# #

31# ---------------------------------------------------------------------------- #

34# ---------------------------------------------------------------------------- #

35# Imports ####

36# ---------------------------------------------------------------------------- #

39# ## Python StdLib Imports ----

40from typing import Any, Union

42# ## Python Third Party Imports ----

43import numpy as np

44from numpy.typing import ArrayLike

45from scipy.stats._morestats import AndersonResult, ShapiroResult

46from scipy.stats._stats_py import NormaltestResult

47from typeguard import typechecked

49# ## Local First Party Imports ----

50from ts_stat_tests.normality.algorithms import (

51 VALID_AD_DIST_OPTIONS,

52 VALID_DP_NAN_POLICY_OPTIONS,

53 ad as _ad,

54 dp as _dp,

55 jb as _jb,

56 ob as _ob,

57 sw as _sw,

58)

59from ts_stat_tests.utils.errors import generate_error_message

62# ---------------------------------------------------------------------------- #

63# Exports ####

64# ---------------------------------------------------------------------------- #

67__all__: list[str] = ["normality", "is_normal"]

70# ---------------------------------------------------------------------------- #

71# #

72# Tests ####

73# #

74# ---------------------------------------------------------------------------- #

77@typechecked

78def normality(

79 x: ArrayLike,

80 algorithm: str = "dp",

81 axis: int = 0,

82 nan_policy: VALID_DP_NAN_POLICY_OPTIONS = "propagate",

83 dist: VALID_AD_DIST_OPTIONS = "norm",

84) -> Union[tuple[float, ...], NormaltestResult, ShapiroResult, AndersonResult]:

85 """

86 !!! note "Summary"

87 Perform a normality test on the given data.

89 ???+ abstract "Details"

90 This function is a convenience wrapper around the five underlying algorithms:

91 - [`jb()`][ts_stat_tests.normality.algorithms.jb]

92 - [`ob()`][ts_stat_tests.normality.algorithms.ob]

93 - [`sw()`][ts_stat_tests.normality.algorithms.sw]

94 - [`dp()`][ts_stat_tests.normality.algorithms.dp]

95 - [`ad()`][ts_stat_tests.normality.algorithms.ad]

97 Params:

98 x (ArrayLike):

99 The data to be checked. Should be a `1-D` or `N-D` data array.

100 algorithm (str):

101 Which normality algorithm to use.

102 - `jb()`: `["jb", "jarque", "jarque-bera"]`

103 - `ob()`: `["ob", "omni", "omnibus"]`

104 - `sw()`: `["sw", "shapiro", "shapiro-wilk"]`

105 - `dp()`: `["dp", "dagostino", "dagostino-pearson"]`

106 - `ad()`: `["ad", "anderson", "anderson-darling"]`

107 Default: `"dp"`

108 axis (int):

109 Axis along which to compute the test.

110 Default: `0`

111 nan_policy (VALID_DP_NAN_POLICY_OPTIONS):

112 Defines how to handle when input contains `NaN`.

113 - `propagate`: returns `NaN`

114 - `raise`: throws an error

115 - `omit`: performs the calculations ignoring `NaN` values

116 Default: `"propagate"`

117 dist (VALID_AD_DIST_OPTIONS):

118 The type of distribution to test against.

119 Only relevant when `algorithm=anderson`.

120 Default: `"norm"`

121

122 Raises:

123 (ValueError):

124 When the given value for `algorithm` is not valid.

125

126 Returns:

127 (Union[tuple[float, float], tuple[float, list[float], list[float]]]):

128 If not `"ad"`, returns a `tuple` of `(stat, pvalue)`.

129 If `"ad"`, returns a `tuple` of `(stat, critical_values, significance_level)`.

130

131 !!! success "Credit"

132 Calculations are performed by `scipy.stats` and `statsmodels.stats`.

133

134 ???+ example "Examples"

135

136 ```pycon {.py .python linenums="1" title="Setup"}

137 >>> from ts_stat_tests.normality.tests import normality

138 >>> from ts_stat_tests.utils.data import data_normal

139 >>> normal = data_normal

140

141 ```

142

143 ```pycon {.py .python linenums="1" title="Example 1: D'Agostino-Pearson test"}

144 >>> stat, pvalue = normality(normal, algorithm="dp")

145 >>> print(f"DP statistic: {stat:.4f}")

146 DP statistic: 1.3537

147 >>> print(f"p-value: {pvalue:.4f}")

148 p-value: 0.5082

149

150 ```

151

152 ```pycon {.py .python linenums="1" title="Example 2: Jarque-Bera test"}

153 >>> stat, pvalue = normality(normal, algorithm="jb")

154 >>> print(f"JB statistic: {stat:.4f}")

155 JB statistic: 1.4168

156 >>> print(f"p-value: {pvalue:.4f}")

157 p-value: 0.4924

158

159 ```

160 """

161 options: dict[str, tuple[str, ...]] = {

162 "jb": ("jb", "jarque", "jarque-bera"),

163 "ob": ("ob", "omni", "omnibus"),

164 "sw": ("sw", "shapiro", "shapiro-wilk"),

165 "dp": ("dp", "dagostino", "dagostino-pearson"),

166 "ad": ("ad", "anderson", "anderson-darling"),

167 }

168 if algorithm in options["jb"]:

169 res_jb = _jb(x=x, axis=axis)

170 return (res_jb[0], res_jb[1])

171 if algorithm in options["ob"]:

172 return _ob(x=x, axis=axis)

173 if algorithm in options["sw"]:

174 return _sw(x=x)

175 if algorithm in options["dp"]:

176 return _dp(x=x, axis=axis, nan_policy=nan_policy)

177 if algorithm in options["ad"]:

178 return _ad(x=x, dist=dist)

179

180 raise ValueError(

181 generate_error_message(

182 parameter_name="algorithm",

183 value_parsed=algorithm,

184 options=options,

185 )

186 )

187

188

189@typechecked

190def is_normal(

191 x: ArrayLike,

192 algorithm: str = "dp",

193 alpha: float = 0.05,

194 axis: int = 0,

195 nan_policy: VALID_DP_NAN_POLICY_OPTIONS = "propagate",

196 dist: VALID_AD_DIST_OPTIONS = "norm",

197) -> dict[str, Union[str, float, bool, None]]:

198 """

199 !!! note "Summary"

200 Test whether a given data set is `normal` or not.

201

202 ???+ abstract "Details"

203 This function implements the given algorithm (defined in the parameter `algorithm`), and returns a dictionary containing the relevant data:

204 ```python

205 {

206 "result": ..., # The result of the test. Will be `True` if `p-value >= alpha`, and `False` otherwise

207 "statistic": ..., # The test statistic

208 "p_value": ..., # The p-value of the test (if applicable)

209 "alpha": ..., # The significance level used

210 }

211 ```

212

213 Params:

214 x (ArrayLike):

215 The data to be checked. Should be a `1-D` or `N-D` data array.

216 algorithm (str):

217 Which normality algorithm to use.

218 - `jb()`: `["jb", "jarque", "jarque-bera"]`

219 - `ob()`: `["ob", "omni", "omnibus"]`

220 - `sw()`: `["sw", "shapiro", "shapiro-wilk"]`

221 - `dp()`: `["dp", "dagostino", "dagostino-pearson"]`

222 - `ad()`: `["ad", "anderson", "anderson-darling"]`

223 Default: `"dp"`

224 alpha (float):

225 Significance level.

226 Default: `0.05`

227 axis (int):

228 Axis along which to compute the test.

229 Default: `0`

230 nan_policy (VALID_DP_NAN_POLICY_OPTIONS):

231 Defines how to handle when input contains `NaN`.

232 - `propagate`: returns `NaN`

233 - `raise`: throws an error

234 - `omit`: performs the calculations ignoring `NaN` values

235 Default: `"propagate"`

236 dist (VALID_AD_DIST_OPTIONS):

237 The type of distribution to test against.

238 Only relevant when `algorithm=anderson`.

239 Default: `"norm"`

240

241 Returns:

242 (dict[str, Union[str, float, bool, None]]):

243 A dictionary containing:

244 - `"result"` (bool): Indicator if the series is normal.

245 - `"statistic"` (float): The test statistic.

246 - `"p_value"` (float): The p-value of the test (if applicable).

247 - `"alpha"` (float): The significance level used.

248

249 !!! success "Credit"

250 Calculations are performed by `scipy.stats` and `statsmodels.stats`.

251

252 ???+ example "Examples"

253

254 ```pycon {.py .python linenums="1" title="Setup"}

255 >>> from ts_stat_tests.normality.tests import is_normal

256 >>> from ts_stat_tests.utils.data import data_normal, data_random

257 >>> normal = data_normal

258 >>> random = data_random

259

260 ```

261

262 ```pycon {.py .python linenums="1" title="Example 1: Test normal data"}

263 >>> res = is_normal(normal, algorithm="dp")

264 >>> res["result"]

265 True

266 >>> print(f"p-value: {res['p_value']:.4f}")

267 p-value: 0.5082

268

269 ```

270

271 ```pycon {.py .python linenums="1" title="Example 2: Test non-normal (random) data"}

272 >>> res = is_normal(random, algorithm="sw")

273 >>> res["result"]

274 False

275

276 ```

277 """

278 res: Any = normality(x=x, algorithm=algorithm, axis=axis, nan_policy=nan_policy, dist=dist)

279

280 if algorithm in ("ad", "anderson", "anderson-darling"):

281 # res is AndersonResult(statistic, critical_values, significance_level, fit_result)

282 # indexing only gives the first 3 elements

283 res_list: list[Any] = list(res) if isinstance(res, (tuple, list)) else []

284 if len(res_list) >= 3:

285 v0: Any = res_list[0]

286 v1: Any = res_list[1]

287 v2: Any = res_list[2]

288 stat = v0

289 crit = v1

290 sig = v2

291

292 # sig is something like [15. , 10. , 5. , 2.5, 1. ]

293 # alpha is something like 0.05 (which is 5%)

294 sig_arr = np.asarray(sig)

295 crit_arr = np.asarray(crit)

296 idx = np.argmin(np.abs(sig_arr - (alpha * 100)))

297 critical_value = crit_arr[idx]

298 is_norm = stat < critical_value

299 return {

300 "result": bool(is_norm),

301 "statistic": float(stat),

302 "critical_value": float(critical_value),

303 "significance_level": float(sig_arr[idx]),

304 "alpha": float(alpha),

305 }

306 # Fallback for unexpected return format

307 return {

308 "result": False,

309 "statistic": 0.0,

310 "alpha": float(alpha),

311 }

312

313 # For others, they return (statistic, pvalue) or similar

314 p_val: Union[float, None] = None

315 stat_val: Union[float, None] = None

316

317 # Use getattr to avoid type checker attribute issues

318 p_val_attr = getattr(res, "pvalue", None)

319 stat_val_attr = getattr(res, "statistic", None)

320

321 if p_val_attr is not None and stat_val_attr is not None:

322 p_val = float(p_val_attr)

323 stat_val = float(stat_val_attr)

324 elif isinstance(res, (tuple, list)) and len(res) >= 2:

325 res_tuple: Any = res

326 stat_val = float(res_tuple[0])

327 p_val = float(res_tuple[1])

328 else:

329 # Fallback

330 if isinstance(res, (float, int)):

331 stat_val = float(res)

332 p_val = None

333

334 is_norm_val = p_val >= alpha if p_val is not None else False

335

336 return {

337 "result": bool(is_norm_val),

338 "statistic": stat_val,

339 "p_value": p_val,

340 "alpha": float(alpha),

341 }

Coverage for src / ts_stat_tests / normality / tests.py: 100%

59 statements