Coverage for src / ts_stat_tests / normality / tests.py: 100%
59 statements
« prev ^ index » next coverage.py v7.13.2, created at 2026-02-01 09:48 +0000
« prev ^ index » next coverage.py v7.13.2, created at 2026-02-01 09:48 +0000
1# ============================================================================ #
2# #
3# Title: Normality Tests #
4# Purpose: Convenience functions for normality algorithms. #
5# #
6# ============================================================================ #
9# ---------------------------------------------------------------------------- #
10# #
11# Overview ####
12# #
13# ---------------------------------------------------------------------------- #
16# ---------------------------------------------------------------------------- #
17# Description ####
18# ---------------------------------------------------------------------------- #
21"""
22!!! note "Summary"
23 This module contains convenience functions and tests for normality measures, allowing for easy access to different normality algorithms.
24"""
27# ---------------------------------------------------------------------------- #
28# #
29# Setup ####
30# #
31# ---------------------------------------------------------------------------- #
34# ---------------------------------------------------------------------------- #
35# Imports ####
36# ---------------------------------------------------------------------------- #
39# ## Python StdLib Imports ----
40from typing import Any, Union
42# ## Python Third Party Imports ----
43import numpy as np
44from numpy.typing import ArrayLike
45from scipy.stats._morestats import AndersonResult, ShapiroResult
46from scipy.stats._stats_py import NormaltestResult
47from typeguard import typechecked
49# ## Local First Party Imports ----
50from ts_stat_tests.normality.algorithms import (
51 VALID_AD_DIST_OPTIONS,
52 VALID_DP_NAN_POLICY_OPTIONS,
53 ad as _ad,
54 dp as _dp,
55 jb as _jb,
56 ob as _ob,
57 sw as _sw,
58)
59from ts_stat_tests.utils.errors import generate_error_message
62# ---------------------------------------------------------------------------- #
63# Exports ####
64# ---------------------------------------------------------------------------- #
67__all__: list[str] = ["normality", "is_normal"]
70# ---------------------------------------------------------------------------- #
71# #
72# Tests ####
73# #
74# ---------------------------------------------------------------------------- #
77@typechecked
78def normality(
79 x: ArrayLike,
80 algorithm: str = "dp",
81 axis: int = 0,
82 nan_policy: VALID_DP_NAN_POLICY_OPTIONS = "propagate",
83 dist: VALID_AD_DIST_OPTIONS = "norm",
84) -> Union[tuple[float, ...], NormaltestResult, ShapiroResult, AndersonResult]:
85 """
86 !!! note "Summary"
87 Perform a normality test on the given data.
89 ???+ abstract "Details"
90 This function is a convenience wrapper around the five underlying algorithms:<br>
91 - [`jb()`][ts_stat_tests.normality.algorithms.jb]<br>
92 - [`ob()`][ts_stat_tests.normality.algorithms.ob]<br>
93 - [`sw()`][ts_stat_tests.normality.algorithms.sw]<br>
94 - [`dp()`][ts_stat_tests.normality.algorithms.dp]<br>
95 - [`ad()`][ts_stat_tests.normality.algorithms.ad]
97 Params:
98 x (ArrayLike):
99 The data to be checked. Should be a `1-D` or `N-D` data array.
100 algorithm (str):
101 Which normality algorithm to use.<br>
102 - `jb()`: `["jb", "jarque", "jarque-bera"]`<br>
103 - `ob()`: `["ob", "omni", "omnibus"]`<br>
104 - `sw()`: `["sw", "shapiro", "shapiro-wilk"]`<br>
105 - `dp()`: `["dp", "dagostino", "dagostino-pearson"]`<br>
106 - `ad()`: `["ad", "anderson", "anderson-darling"]`<br>
107 Default: `"dp"`
108 axis (int):
109 Axis along which to compute the test.
110 Default: `0`
111 nan_policy (VALID_DP_NAN_POLICY_OPTIONS):
112 Defines how to handle when input contains `NaN`.<br>
113 - `propagate`: returns `NaN`<br>
114 - `raise`: throws an error<br>
115 - `omit`: performs the calculations ignoring `NaN` values<br>
116 Default: `"propagate"`
117 dist (VALID_AD_DIST_OPTIONS):
118 The type of distribution to test against.<br>
119 Only relevant when `algorithm=anderson`.<br>
120 Default: `"norm"`
122 Raises:
123 (ValueError):
124 When the given value for `algorithm` is not valid.
126 Returns:
127 (Union[tuple[float, float], tuple[float, list[float], list[float]]]):
128 If not `"ad"`, returns a `tuple` of `(stat, pvalue)`.
129 If `"ad"`, returns a `tuple` of `(stat, critical_values, significance_level)`.
131 !!! success "Credit"
132 Calculations are performed by `scipy.stats` and `statsmodels.stats`.
134 ???+ example "Examples"
136 ```pycon {.py .python linenums="1" title="Setup"}
137 >>> from ts_stat_tests.normality.tests import normality
138 >>> from ts_stat_tests.utils.data import data_normal
139 >>> normal = data_normal
141 ```
143 ```pycon {.py .python linenums="1" title="Example 1: D'Agostino-Pearson test"}
144 >>> stat, pvalue = normality(normal, algorithm="dp")
145 >>> print(f"DP statistic: {stat:.4f}")
146 DP statistic: 1.3537
147 >>> print(f"p-value: {pvalue:.4f}")
148 p-value: 0.5082
150 ```
152 ```pycon {.py .python linenums="1" title="Example 2: Jarque-Bera test"}
153 >>> stat, pvalue = normality(normal, algorithm="jb")
154 >>> print(f"JB statistic: {stat:.4f}")
155 JB statistic: 1.4168
156 >>> print(f"p-value: {pvalue:.4f}")
157 p-value: 0.4924
159 ```
160 """
161 options: dict[str, tuple[str, ...]] = {
162 "jb": ("jb", "jarque", "jarque-bera"),
163 "ob": ("ob", "omni", "omnibus"),
164 "sw": ("sw", "shapiro", "shapiro-wilk"),
165 "dp": ("dp", "dagostino", "dagostino-pearson"),
166 "ad": ("ad", "anderson", "anderson-darling"),
167 }
168 if algorithm in options["jb"]:
169 res_jb = _jb(x=x, axis=axis)
170 return (res_jb[0], res_jb[1])
171 if algorithm in options["ob"]:
172 return _ob(x=x, axis=axis)
173 if algorithm in options["sw"]:
174 return _sw(x=x)
175 if algorithm in options["dp"]:
176 return _dp(x=x, axis=axis, nan_policy=nan_policy)
177 if algorithm in options["ad"]:
178 return _ad(x=x, dist=dist)
180 raise ValueError(
181 generate_error_message(
182 parameter_name="algorithm",
183 value_parsed=algorithm,
184 options=options,
185 )
186 )
189@typechecked
190def is_normal(
191 x: ArrayLike,
192 algorithm: str = "dp",
193 alpha: float = 0.05,
194 axis: int = 0,
195 nan_policy: VALID_DP_NAN_POLICY_OPTIONS = "propagate",
196 dist: VALID_AD_DIST_OPTIONS = "norm",
197) -> dict[str, Union[str, float, bool, None]]:
198 """
199 !!! note "Summary"
200 Test whether a given data set is `normal` or not.
202 ???+ abstract "Details"
203 This function implements the given algorithm (defined in the parameter `algorithm`), and returns a dictionary containing the relevant data:
204 ```python
205 {
206 "result": ..., # The result of the test. Will be `True` if `p-value >= alpha`, and `False` otherwise
207 "statistic": ..., # The test statistic
208 "p_value": ..., # The p-value of the test (if applicable)
209 "alpha": ..., # The significance level used
210 }
211 ```
213 Params:
214 x (ArrayLike):
215 The data to be checked. Should be a `1-D` or `N-D` data array.
216 algorithm (str):
217 Which normality algorithm to use.<br>
218 - `jb()`: `["jb", "jarque", "jarque-bera"]`<br>
219 - `ob()`: `["ob", "omni", "omnibus"]`<br>
220 - `sw()`: `["sw", "shapiro", "shapiro-wilk"]`<br>
221 - `dp()`: `["dp", "dagostino", "dagostino-pearson"]`<br>
222 - `ad()`: `["ad", "anderson", "anderson-darling"]`<br>
223 Default: `"dp"`
224 alpha (float):
225 Significance level.
226 Default: `0.05`
227 axis (int):
228 Axis along which to compute the test.
229 Default: `0`
230 nan_policy (VALID_DP_NAN_POLICY_OPTIONS):
231 Defines how to handle when input contains `NaN`.<br>
232 - `propagate`: returns `NaN`<br>
233 - `raise`: throws an error<br>
234 - `omit`: performs the calculations ignoring `NaN` values<br>
235 Default: `"propagate"`
236 dist (VALID_AD_DIST_OPTIONS):
237 The type of distribution to test against.<br>
238 Only relevant when `algorithm=anderson`.<br>
239 Default: `"norm"`
241 Returns:
242 (dict[str, Union[str, float, bool, None]]):
243 A dictionary containing:
244 - `"result"` (bool): Indicator if the series is normal.
245 - `"statistic"` (float): The test statistic.
246 - `"p_value"` (float): The p-value of the test (if applicable).
247 - `"alpha"` (float): The significance level used.
249 !!! success "Credit"
250 Calculations are performed by `scipy.stats` and `statsmodels.stats`.
252 ???+ example "Examples"
254 ```pycon {.py .python linenums="1" title="Setup"}
255 >>> from ts_stat_tests.normality.tests import is_normal
256 >>> from ts_stat_tests.utils.data import data_normal, data_random
257 >>> normal = data_normal
258 >>> random = data_random
260 ```
262 ```pycon {.py .python linenums="1" title="Example 1: Test normal data"}
263 >>> res = is_normal(normal, algorithm="dp")
264 >>> res["result"]
265 True
266 >>> print(f"p-value: {res['p_value']:.4f}")
267 p-value: 0.5082
269 ```
271 ```pycon {.py .python linenums="1" title="Example 2: Test non-normal (random) data"}
272 >>> res = is_normal(random, algorithm="sw")
273 >>> res["result"]
274 False
276 ```
277 """
278 res: Any = normality(x=x, algorithm=algorithm, axis=axis, nan_policy=nan_policy, dist=dist)
280 if algorithm in ("ad", "anderson", "anderson-darling"):
281 # res is AndersonResult(statistic, critical_values, significance_level, fit_result)
282 # indexing only gives the first 3 elements
283 res_list: list[Any] = list(res) if isinstance(res, (tuple, list)) else []
284 if len(res_list) >= 3:
285 v0: Any = res_list[0]
286 v1: Any = res_list[1]
287 v2: Any = res_list[2]
288 stat = v0
289 crit = v1
290 sig = v2
292 # sig is something like [15. , 10. , 5. , 2.5, 1. ]
293 # alpha is something like 0.05 (which is 5%)
294 sig_arr = np.asarray(sig)
295 crit_arr = np.asarray(crit)
296 idx = np.argmin(np.abs(sig_arr - (alpha * 100)))
297 critical_value = crit_arr[idx]
298 is_norm = stat < critical_value
299 return {
300 "result": bool(is_norm),
301 "statistic": float(stat),
302 "critical_value": float(critical_value),
303 "significance_level": float(sig_arr[idx]),
304 "alpha": float(alpha),
305 }
306 # Fallback for unexpected return format
307 return {
308 "result": False,
309 "statistic": 0.0,
310 "alpha": float(alpha),
311 }
313 # For others, they return (statistic, pvalue) or similar
314 p_val: Union[float, None] = None
315 stat_val: Union[float, None] = None
317 # Use getattr to avoid type checker attribute issues
318 p_val_attr = getattr(res, "pvalue", None)
319 stat_val_attr = getattr(res, "statistic", None)
321 if p_val_attr is not None and stat_val_attr is not None:
322 p_val = float(p_val_attr)
323 stat_val = float(stat_val_attr)
324 elif isinstance(res, (tuple, list)) and len(res) >= 2:
325 res_tuple: Any = res
326 stat_val = float(res_tuple[0])
327 p_val = float(res_tuple[1])
328 else:
329 # Fallback
330 if isinstance(res, (float, int)):
331 stat_val = float(res)
332 p_val = None
334 is_norm_val = p_val >= alpha if p_val is not None else False
336 return {
337 "result": bool(is_norm_val),
338 "statistic": stat_val,
339 "p_value": p_val,
340 "alpha": float(alpha),
341 }