truthound-dashboard 1.3.1__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. truthound_dashboard/api/alerts.py +258 -0
  2. truthound_dashboard/api/anomaly.py +1302 -0
  3. truthound_dashboard/api/cross_alerts.py +352 -0
  4. truthound_dashboard/api/deps.py +143 -0
  5. truthound_dashboard/api/drift_monitor.py +540 -0
  6. truthound_dashboard/api/lineage.py +1151 -0
  7. truthound_dashboard/api/maintenance.py +363 -0
  8. truthound_dashboard/api/middleware.py +373 -1
  9. truthound_dashboard/api/model_monitoring.py +805 -0
  10. truthound_dashboard/api/notifications_advanced.py +2452 -0
  11. truthound_dashboard/api/plugins.py +2096 -0
  12. truthound_dashboard/api/profile.py +211 -14
  13. truthound_dashboard/api/reports.py +853 -0
  14. truthound_dashboard/api/router.py +147 -0
  15. truthound_dashboard/api/rule_suggestions.py +310 -0
  16. truthound_dashboard/api/schema_evolution.py +231 -0
  17. truthound_dashboard/api/sources.py +47 -3
  18. truthound_dashboard/api/triggers.py +190 -0
  19. truthound_dashboard/api/validations.py +13 -0
  20. truthound_dashboard/api/validators.py +333 -4
  21. truthound_dashboard/api/versioning.py +309 -0
  22. truthound_dashboard/api/websocket.py +301 -0
  23. truthound_dashboard/core/__init__.py +27 -0
  24. truthound_dashboard/core/anomaly.py +1395 -0
  25. truthound_dashboard/core/anomaly_explainer.py +633 -0
  26. truthound_dashboard/core/cache.py +206 -0
  27. truthound_dashboard/core/cached_services.py +422 -0
  28. truthound_dashboard/core/charts.py +352 -0
  29. truthound_dashboard/core/connections.py +1069 -42
  30. truthound_dashboard/core/cross_alerts.py +837 -0
  31. truthound_dashboard/core/drift_monitor.py +1477 -0
  32. truthound_dashboard/core/drift_sampling.py +669 -0
  33. truthound_dashboard/core/i18n/__init__.py +42 -0
  34. truthound_dashboard/core/i18n/detector.py +173 -0
  35. truthound_dashboard/core/i18n/messages.py +564 -0
  36. truthound_dashboard/core/lineage.py +971 -0
  37. truthound_dashboard/core/maintenance.py +443 -5
  38. truthound_dashboard/core/model_monitoring.py +1043 -0
  39. truthound_dashboard/core/notifications/channels.py +1020 -1
  40. truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
  41. truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
  42. truthound_dashboard/core/notifications/deduplication/service.py +400 -0
  43. truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
  44. truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
  45. truthound_dashboard/core/notifications/dispatcher.py +43 -0
  46. truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
  47. truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
  48. truthound_dashboard/core/notifications/escalation/engine.py +429 -0
  49. truthound_dashboard/core/notifications/escalation/models.py +336 -0
  50. truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
  51. truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
  52. truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
  53. truthound_dashboard/core/notifications/events.py +49 -0
  54. truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
  55. truthound_dashboard/core/notifications/metrics/base.py +528 -0
  56. truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
  57. truthound_dashboard/core/notifications/routing/__init__.py +169 -0
  58. truthound_dashboard/core/notifications/routing/combinators.py +184 -0
  59. truthound_dashboard/core/notifications/routing/config.py +375 -0
  60. truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
  61. truthound_dashboard/core/notifications/routing/engine.py +382 -0
  62. truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
  63. truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
  64. truthound_dashboard/core/notifications/routing/rules.py +625 -0
  65. truthound_dashboard/core/notifications/routing/validator.py +678 -0
  66. truthound_dashboard/core/notifications/service.py +2 -0
  67. truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
  68. truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
  69. truthound_dashboard/core/notifications/throttling/builder.py +311 -0
  70. truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
  71. truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
  72. truthound_dashboard/core/openlineage.py +1028 -0
  73. truthound_dashboard/core/plugins/__init__.py +39 -0
  74. truthound_dashboard/core/plugins/docs/__init__.py +39 -0
  75. truthound_dashboard/core/plugins/docs/extractor.py +703 -0
  76. truthound_dashboard/core/plugins/docs/renderers.py +804 -0
  77. truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
  78. truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
  79. truthound_dashboard/core/plugins/hooks/manager.py +403 -0
  80. truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
  81. truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
  82. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
  83. truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
  84. truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
  85. truthound_dashboard/core/plugins/loader.py +504 -0
  86. truthound_dashboard/core/plugins/registry.py +810 -0
  87. truthound_dashboard/core/plugins/reporter_executor.py +588 -0
  88. truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
  89. truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
  90. truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
  91. truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
  92. truthound_dashboard/core/plugins/sandbox.py +617 -0
  93. truthound_dashboard/core/plugins/security/__init__.py +68 -0
  94. truthound_dashboard/core/plugins/security/analyzer.py +535 -0
  95. truthound_dashboard/core/plugins/security/policies.py +311 -0
  96. truthound_dashboard/core/plugins/security/protocols.py +296 -0
  97. truthound_dashboard/core/plugins/security/signing.py +842 -0
  98. truthound_dashboard/core/plugins/security.py +446 -0
  99. truthound_dashboard/core/plugins/validator_executor.py +401 -0
  100. truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
  101. truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
  102. truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
  103. truthound_dashboard/core/plugins/versioning/semver.py +266 -0
  104. truthound_dashboard/core/profile_comparison.py +601 -0
  105. truthound_dashboard/core/report_history.py +570 -0
  106. truthound_dashboard/core/reporters/__init__.py +57 -0
  107. truthound_dashboard/core/reporters/base.py +296 -0
  108. truthound_dashboard/core/reporters/csv_reporter.py +155 -0
  109. truthound_dashboard/core/reporters/html_reporter.py +598 -0
  110. truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
  111. truthound_dashboard/core/reporters/i18n/base.py +494 -0
  112. truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
  113. truthound_dashboard/core/reporters/json_reporter.py +160 -0
  114. truthound_dashboard/core/reporters/junit_reporter.py +233 -0
  115. truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
  116. truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
  117. truthound_dashboard/core/reporters/registry.py +272 -0
  118. truthound_dashboard/core/rule_generator.py +2088 -0
  119. truthound_dashboard/core/scheduler.py +822 -12
  120. truthound_dashboard/core/schema_evolution.py +858 -0
  121. truthound_dashboard/core/services.py +152 -9
  122. truthound_dashboard/core/statistics.py +718 -0
  123. truthound_dashboard/core/streaming_anomaly.py +883 -0
  124. truthound_dashboard/core/triggers/__init__.py +45 -0
  125. truthound_dashboard/core/triggers/base.py +226 -0
  126. truthound_dashboard/core/triggers/evaluators.py +609 -0
  127. truthound_dashboard/core/triggers/factory.py +363 -0
  128. truthound_dashboard/core/unified_alerts.py +870 -0
  129. truthound_dashboard/core/validation_limits.py +509 -0
  130. truthound_dashboard/core/versioning.py +709 -0
  131. truthound_dashboard/core/websocket/__init__.py +59 -0
  132. truthound_dashboard/core/websocket/manager.py +512 -0
  133. truthound_dashboard/core/websocket/messages.py +130 -0
  134. truthound_dashboard/db/__init__.py +30 -0
  135. truthound_dashboard/db/models.py +3375 -3
  136. truthound_dashboard/main.py +22 -0
  137. truthound_dashboard/schemas/__init__.py +396 -1
  138. truthound_dashboard/schemas/anomaly.py +1258 -0
  139. truthound_dashboard/schemas/base.py +4 -0
  140. truthound_dashboard/schemas/cross_alerts.py +334 -0
  141. truthound_dashboard/schemas/drift_monitor.py +890 -0
  142. truthound_dashboard/schemas/lineage.py +428 -0
  143. truthound_dashboard/schemas/maintenance.py +154 -0
  144. truthound_dashboard/schemas/model_monitoring.py +374 -0
  145. truthound_dashboard/schemas/notifications_advanced.py +1363 -0
  146. truthound_dashboard/schemas/openlineage.py +704 -0
  147. truthound_dashboard/schemas/plugins.py +1293 -0
  148. truthound_dashboard/schemas/profile.py +420 -34
  149. truthound_dashboard/schemas/profile_comparison.py +242 -0
  150. truthound_dashboard/schemas/reports.py +285 -0
  151. truthound_dashboard/schemas/rule_suggestion.py +434 -0
  152. truthound_dashboard/schemas/schema_evolution.py +164 -0
  153. truthound_dashboard/schemas/source.py +117 -2
  154. truthound_dashboard/schemas/triggers.py +511 -0
  155. truthound_dashboard/schemas/unified_alerts.py +223 -0
  156. truthound_dashboard/schemas/validation.py +25 -1
  157. truthound_dashboard/schemas/validators/__init__.py +11 -0
  158. truthound_dashboard/schemas/validators/base.py +151 -0
  159. truthound_dashboard/schemas/versioning.py +152 -0
  160. truthound_dashboard/static/index.html +2 -2
  161. {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/METADATA +142 -22
  162. truthound_dashboard-1.4.0.dist-info/RECORD +239 -0
  163. truthound_dashboard/static/assets/index-BZG20KuF.js +0 -586
  164. truthound_dashboard/static/assets/index-D_HyZ3pb.css +0 -1
  165. truthound_dashboard/static/assets/unmerged_dictionaries-CtpqQBm0.js +0 -1
  166. truthound_dashboard-1.3.1.dist-info/RECORD +0 -110
  167. {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/WHEEL +0 -0
  168. {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/entry_points.txt +0 -0
  169. {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,718 @@
1
+ """Statistical analysis utilities.
2
+
3
+ This module provides statistical significance testing and analysis
4
+ utilities for profile comparison and data quality assessment.
5
+
6
+ Features:
7
+ - T-test for comparing means
8
+ - Mann-Whitney U test for non-parametric comparison
9
+ - Chi-square test for categorical data
10
+ - Effect size calculation (Cohen's d)
11
+ - Confidence interval estimation
12
+ - Trend significance detection
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import math
18
+ from dataclasses import dataclass
19
+ from enum import Enum
20
+ from typing import Sequence
21
+
22
+
23
+ class SignificanceLevel(str, Enum):
24
+ """Statistical significance levels."""
25
+
26
+ NOT_SIGNIFICANT = "not_significant"
27
+ MARGINALLY_SIGNIFICANT = "marginally_significant" # p < 0.10
28
+ SIGNIFICANT = "significant" # p < 0.05
29
+ HIGHLY_SIGNIFICANT = "highly_significant" # p < 0.01
30
+ VERY_HIGHLY_SIGNIFICANT = "very_highly_significant" # p < 0.001
31
+
32
+
33
+ class EffectSize(str, Enum):
34
+ """Cohen's d effect size interpretation."""
35
+
36
+ NEGLIGIBLE = "negligible" # < 0.2
37
+ SMALL = "small" # 0.2 - 0.5
38
+ MEDIUM = "medium" # 0.5 - 0.8
39
+ LARGE = "large" # > 0.8
40
+
41
+
42
+ @dataclass
43
+ class StatisticalTestResult:
44
+ """Result of a statistical significance test."""
45
+
46
+ test_name: str
47
+ statistic: float
48
+ p_value: float
49
+ significance_level: SignificanceLevel
50
+ effect_size: float | None = None
51
+ effect_interpretation: EffectSize | None = None
52
+ confidence_interval: tuple[float, float] | None = None
53
+ sample_sizes: tuple[int, int] | None = None
54
+ is_significant: bool = False
55
+ interpretation: str = ""
56
+
57
+
58
+ def _compute_mean(values: Sequence[float]) -> float:
59
+ """Compute mean of values."""
60
+ if not values:
61
+ return 0.0
62
+ return sum(values) / len(values)
63
+
64
+
65
+ def _compute_variance(values: Sequence[float], ddof: int = 1) -> float:
66
+ """Compute variance with degrees of freedom adjustment."""
67
+ if len(values) <= ddof:
68
+ return 0.0
69
+ mean = _compute_mean(values)
70
+ squared_diffs = [(x - mean) ** 2 for x in values]
71
+ return sum(squared_diffs) / (len(values) - ddof)
72
+
73
+
74
+ def _compute_std(values: Sequence[float], ddof: int = 1) -> float:
75
+ """Compute standard deviation."""
76
+ return math.sqrt(_compute_variance(values, ddof))
77
+
78
+
79
+ def _pooled_std(
80
+ values1: Sequence[float],
81
+ values2: Sequence[float],
82
+ ) -> float:
83
+ """Compute pooled standard deviation for two groups."""
84
+ n1 = len(values1)
85
+ n2 = len(values2)
86
+
87
+ if n1 <= 1 or n2 <= 1:
88
+ return 0.0
89
+
90
+ var1 = _compute_variance(values1, ddof=1)
91
+ var2 = _compute_variance(values2, ddof=1)
92
+
93
+ # Pooled variance formula
94
+ pooled_var = ((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2)
95
+ return math.sqrt(pooled_var)
96
+
97
+
98
+ def cohens_d(
99
+ values1: Sequence[float],
100
+ values2: Sequence[float],
101
+ ) -> float:
102
+ """Calculate Cohen's d effect size.
103
+
104
+ Args:
105
+ values1: First group values.
106
+ values2: Second group values.
107
+
108
+ Returns:
109
+ Cohen's d effect size.
110
+ """
111
+ mean1 = _compute_mean(values1)
112
+ mean2 = _compute_mean(values2)
113
+ pooled = _pooled_std(values1, values2)
114
+
115
+ if pooled == 0:
116
+ return 0.0
117
+
118
+ return abs(mean1 - mean2) / pooled
119
+
120
+
121
+ def interpret_effect_size(d: float) -> EffectSize:
122
+ """Interpret Cohen's d effect size.
123
+
124
+ Args:
125
+ d: Cohen's d value.
126
+
127
+ Returns:
128
+ Effect size interpretation.
129
+ """
130
+ abs_d = abs(d)
131
+ if abs_d < 0.2:
132
+ return EffectSize.NEGLIGIBLE
133
+ elif abs_d < 0.5:
134
+ return EffectSize.SMALL
135
+ elif abs_d < 0.8:
136
+ return EffectSize.MEDIUM
137
+ else:
138
+ return EffectSize.LARGE
139
+
140
+
141
+ def interpret_p_value(p_value: float) -> SignificanceLevel:
142
+ """Interpret p-value significance level.
143
+
144
+ Args:
145
+ p_value: P-value from statistical test.
146
+
147
+ Returns:
148
+ Significance level interpretation.
149
+ """
150
+ if p_value >= 0.10:
151
+ return SignificanceLevel.NOT_SIGNIFICANT
152
+ elif p_value >= 0.05:
153
+ return SignificanceLevel.MARGINALLY_SIGNIFICANT
154
+ elif p_value >= 0.01:
155
+ return SignificanceLevel.SIGNIFICANT
156
+ elif p_value >= 0.001:
157
+ return SignificanceLevel.HIGHLY_SIGNIFICANT
158
+ else:
159
+ return SignificanceLevel.VERY_HIGHLY_SIGNIFICANT
160
+
161
+
162
+ def _t_distribution_cdf(t: float, df: int) -> float:
163
+ """Approximate CDF of t-distribution using normal approximation.
164
+
165
+ For large df, t-distribution approaches normal.
166
+ This is a simplified implementation without scipy.
167
+ """
168
+ # Use normal approximation for large df
169
+ if df > 100:
170
+ # Approximate using normal distribution
171
+ return _normal_cdf(t)
172
+
173
+ # For smaller df, use a simple approximation
174
+ # This is not exact but provides reasonable estimates
175
+ x = t / math.sqrt(df)
176
+ a = 0.5 * (1 + x / math.sqrt(1 + x * x))
177
+ return a
178
+
179
+
180
+ def _normal_cdf(x: float) -> float:
181
+ """Approximate standard normal CDF using error function approximation."""
182
+ # Approximation of standard normal CDF
183
+ return 0.5 * (1 + math.erf(x / math.sqrt(2)))
184
+
185
+
186
+ def welch_t_test(
187
+ values1: Sequence[float],
188
+ values2: Sequence[float],
189
+ alpha: float = 0.05,
190
+ ) -> StatisticalTestResult:
191
+ """Perform Welch's t-test for comparing two group means.
192
+
193
+ Welch's t-test is more robust than Student's t-test when
194
+ variances are unequal (heteroscedastic data).
195
+
196
+ Args:
197
+ values1: First group values.
198
+ values2: Second group values.
199
+ alpha: Significance level.
200
+
201
+ Returns:
202
+ Statistical test result.
203
+ """
204
+ n1, n2 = len(values1), len(values2)
205
+
206
+ if n1 < 2 or n2 < 2:
207
+ return StatisticalTestResult(
208
+ test_name="Welch's t-test",
209
+ statistic=0.0,
210
+ p_value=1.0,
211
+ significance_level=SignificanceLevel.NOT_SIGNIFICANT,
212
+ is_significant=False,
213
+ interpretation="Insufficient data for t-test (need at least 2 samples per group)",
214
+ sample_sizes=(n1, n2),
215
+ )
216
+
217
+ mean1 = _compute_mean(values1)
218
+ mean2 = _compute_mean(values2)
219
+ var1 = _compute_variance(values1)
220
+ var2 = _compute_variance(values2)
221
+
222
+ # Welch's t-statistic
223
+ se1 = var1 / n1
224
+ se2 = var2 / n2
225
+ se_diff = math.sqrt(se1 + se2)
226
+
227
+ if se_diff == 0:
228
+ return StatisticalTestResult(
229
+ test_name="Welch's t-test",
230
+ statistic=0.0,
231
+ p_value=1.0,
232
+ significance_level=SignificanceLevel.NOT_SIGNIFICANT,
233
+ is_significant=False,
234
+ interpretation="Zero variance in both groups",
235
+ sample_sizes=(n1, n2),
236
+ )
237
+
238
+ t_stat = (mean1 - mean2) / se_diff
239
+
240
+ # Welch-Satterthwaite degrees of freedom
241
+ if se1 + se2 == 0:
242
+ df = n1 + n2 - 2
243
+ else:
244
+ df_num = (se1 + se2) ** 2
245
+ df_denom = (se1 ** 2) / (n1 - 1) + (se2 ** 2) / (n2 - 1)
246
+ if df_denom == 0:
247
+ df = n1 + n2 - 2
248
+ else:
249
+ df = df_num / df_denom
250
+
251
+ # Two-tailed p-value (approximation)
252
+ p_value = 2 * (1 - _t_distribution_cdf(abs(t_stat), int(df)))
253
+ p_value = max(0.0, min(1.0, p_value)) # Clamp to [0, 1]
254
+
255
+ # Effect size
256
+ d = cohens_d(values1, values2)
257
+ effect_interp = interpret_effect_size(d)
258
+
259
+ # Confidence interval for difference in means
260
+ se = se_diff
261
+ # Use z-value approximation for large samples
262
+ z = 1.96 if alpha == 0.05 else 2.576 if alpha == 0.01 else 1.645
263
+ ci_low = (mean1 - mean2) - z * se
264
+ ci_high = (mean1 - mean2) + z * se
265
+
266
+ # Interpret results
267
+ sig_level = interpret_p_value(p_value)
268
+ is_significant = p_value < alpha
269
+
270
+ if is_significant:
271
+ direction = "higher" if mean1 > mean2 else "lower"
272
+ interp = (
273
+ f"Statistically significant difference (p={p_value:.4f}). "
274
+ f"Group 1 mean ({mean1:.2f}) is {direction} than Group 2 mean ({mean2:.2f}). "
275
+ f"Effect size: {effect_interp.value} (d={d:.2f})"
276
+ )
277
+ else:
278
+ interp = (
279
+ f"No statistically significant difference (p={p_value:.4f}). "
280
+ f"Group 1 mean: {mean1:.2f}, Group 2 mean: {mean2:.2f}"
281
+ )
282
+
283
+ return StatisticalTestResult(
284
+ test_name="Welch's t-test",
285
+ statistic=t_stat,
286
+ p_value=p_value,
287
+ significance_level=sig_level,
288
+ effect_size=d,
289
+ effect_interpretation=effect_interp,
290
+ confidence_interval=(ci_low, ci_high),
291
+ sample_sizes=(n1, n2),
292
+ is_significant=is_significant,
293
+ interpretation=interp,
294
+ )
295
+
296
+
297
+ def mann_whitney_u_test(
298
+ values1: Sequence[float],
299
+ values2: Sequence[float],
300
+ alpha: float = 0.05,
301
+ ) -> StatisticalTestResult:
302
+ """Perform Mann-Whitney U test (non-parametric).
303
+
304
+ Non-parametric alternative to t-test that doesn't assume
305
+ normal distribution. Tests whether one group tends to have
306
+ larger values than the other.
307
+
308
+ Args:
309
+ values1: First group values.
310
+ values2: Second group values.
311
+ alpha: Significance level.
312
+
313
+ Returns:
314
+ Statistical test result.
315
+ """
316
+ n1, n2 = len(values1), len(values2)
317
+
318
+ if n1 < 1 or n2 < 1:
319
+ return StatisticalTestResult(
320
+ test_name="Mann-Whitney U test",
321
+ statistic=0.0,
322
+ p_value=1.0,
323
+ significance_level=SignificanceLevel.NOT_SIGNIFICANT,
324
+ is_significant=False,
325
+ interpretation="Insufficient data for Mann-Whitney test",
326
+ sample_sizes=(n1, n2),
327
+ )
328
+
329
+ # Combine and rank all values
330
+ combined = [(v, 1) for v in values1] + [(v, 2) for v in values2]
331
+ combined.sort(key=lambda x: x[0])
332
+
333
+ # Assign ranks (handle ties with average rank)
334
+ ranks: dict[int, float] = {}
335
+ i = 0
336
+ while i < len(combined):
337
+ j = i
338
+ # Find all tied values
339
+ while j < len(combined) and combined[j][0] == combined[i][0]:
340
+ j += 1
341
+ # Assign average rank to all tied values
342
+ avg_rank = (i + j + 1) / 2 # Ranks are 1-based
343
+ for k in range(i, j):
344
+ ranks[k] = avg_rank
345
+ i = j
346
+
347
+ # Sum ranks for each group
348
+ r1 = sum(ranks[i] for i, (_, group) in enumerate(combined) if group == 1)
349
+
350
+ # Calculate U statistic
351
+ u1 = r1 - n1 * (n1 + 1) / 2
352
+ u2 = n1 * n2 - u1
353
+ u = min(u1, u2)
354
+
355
+ # Normal approximation for large samples
356
+ mu = n1 * n2 / 2
357
+
358
+ # Tie correction for standard deviation
359
+ n = n1 + n2
360
+ tie_sum = 0
361
+ unique_values: dict[float, int] = {}
362
+ for v, _ in combined:
363
+ unique_values[v] = unique_values.get(v, 0) + 1
364
+ for count in unique_values.values():
365
+ if count > 1:
366
+ tie_sum += count ** 3 - count
367
+
368
+ sigma = math.sqrt(
369
+ (n1 * n2 / 12) * (n + 1 - tie_sum / (n * (n - 1)))
370
+ if n > 1 else 0
371
+ )
372
+
373
+ if sigma == 0:
374
+ z = 0
375
+ p_value = 1.0
376
+ else:
377
+ z = (u - mu) / sigma
378
+ p_value = 2 * (1 - _normal_cdf(abs(z)))
379
+ p_value = max(0.0, min(1.0, p_value))
380
+
381
+ # Calculate effect size (rank-biserial correlation)
382
+ # r = 1 - (2*U)/(n1*n2)
383
+ if n1 * n2 > 0:
384
+ r = 1 - (2 * u) / (n1 * n2)
385
+ else:
386
+ r = 0.0
387
+
388
+ # Interpret effect size
389
+ abs_r = abs(r)
390
+ if abs_r < 0.1:
391
+ effect_interp = EffectSize.NEGLIGIBLE
392
+ elif abs_r < 0.3:
393
+ effect_interp = EffectSize.SMALL
394
+ elif abs_r < 0.5:
395
+ effect_interp = EffectSize.MEDIUM
396
+ else:
397
+ effect_interp = EffectSize.LARGE
398
+
399
+ sig_level = interpret_p_value(p_value)
400
+ is_significant = p_value < alpha
401
+
402
+ if is_significant:
403
+ median1 = sorted(values1)[n1 // 2] if n1 > 0 else 0
404
+ median2 = sorted(values2)[n2 // 2] if n2 > 0 else 0
405
+ direction = "tends to be higher" if u1 > u2 else "tends to be lower"
406
+ interp = (
407
+ f"Statistically significant difference (p={p_value:.4f}). "
408
+ f"Group 1 (median={median1:.2f}) {direction} than Group 2 (median={median2:.2f}). "
409
+ f"Effect size: {effect_interp.value} (r={r:.2f})"
410
+ )
411
+ else:
412
+ interp = (
413
+ f"No statistically significant difference (p={p_value:.4f}). "
414
+ f"Distributions are similar."
415
+ )
416
+
417
+ return StatisticalTestResult(
418
+ test_name="Mann-Whitney U test",
419
+ statistic=u,
420
+ p_value=p_value,
421
+ significance_level=sig_level,
422
+ effect_size=r,
423
+ effect_interpretation=effect_interp,
424
+ sample_sizes=(n1, n2),
425
+ is_significant=is_significant,
426
+ interpretation=interp,
427
+ )
428
+
429
+
430
+ def chi_square_test(
431
+ observed1: Sequence[int],
432
+ observed2: Sequence[int],
433
+ alpha: float = 0.05,
434
+ ) -> StatisticalTestResult:
435
+ """Perform chi-square test for comparing categorical distributions.
436
+
437
+ Args:
438
+ observed1: First group category counts.
439
+ observed2: Second group category counts.
440
+ alpha: Significance level.
441
+
442
+ Returns:
443
+ Statistical test result.
444
+ """
445
+ if len(observed1) != len(observed2):
446
+ return StatisticalTestResult(
447
+ test_name="Chi-square test",
448
+ statistic=0.0,
449
+ p_value=1.0,
450
+ significance_level=SignificanceLevel.NOT_SIGNIFICANT,
451
+ is_significant=False,
452
+ interpretation="Categories must match between groups",
453
+ )
454
+
455
+ k = len(observed1) # Number of categories
456
+
457
+ if k < 2:
458
+ return StatisticalTestResult(
459
+ test_name="Chi-square test",
460
+ statistic=0.0,
461
+ p_value=1.0,
462
+ significance_level=SignificanceLevel.NOT_SIGNIFICANT,
463
+ is_significant=False,
464
+ interpretation="Need at least 2 categories",
465
+ )
466
+
467
+ total1 = sum(observed1)
468
+ total2 = sum(observed2)
469
+ total = total1 + total2
470
+
471
+ if total == 0:
472
+ return StatisticalTestResult(
473
+ test_name="Chi-square test",
474
+ statistic=0.0,
475
+ p_value=1.0,
476
+ significance_level=SignificanceLevel.NOT_SIGNIFICANT,
477
+ is_significant=False,
478
+ interpretation="No observations",
479
+ )
480
+
481
+ # Calculate chi-square statistic
482
+ chi2 = 0.0
483
+ for i in range(k):
484
+ row_total = observed1[i] + observed2[i]
485
+
486
+ # Expected values under null hypothesis
487
+ expected1 = (row_total * total1) / total if total > 0 else 0
488
+ expected2 = (row_total * total2) / total if total > 0 else 0
489
+
490
+ if expected1 > 0:
491
+ chi2 += (observed1[i] - expected1) ** 2 / expected1
492
+ if expected2 > 0:
493
+ chi2 += (observed2[i] - expected2) ** 2 / expected2
494
+
495
+ # Degrees of freedom
496
+ df = k - 1
497
+
498
+ # Approximate p-value using chi-square distribution
499
+ # Using Wilson-Hilferty approximation
500
+ if df > 0:
501
+ z = ((chi2 / df) ** (1/3) - (1 - 2 / (9 * df))) / math.sqrt(2 / (9 * df))
502
+ p_value = 1 - _normal_cdf(z)
503
+ p_value = max(0.0, min(1.0, p_value))
504
+ else:
505
+ p_value = 1.0
506
+
507
+ # Effect size (Cramer's V)
508
+ min_dim = 1 # 2 groups - 1
509
+ if total > 0 and min_dim > 0:
510
+ cramers_v = math.sqrt(chi2 / (total * min_dim))
511
+ else:
512
+ cramers_v = 0.0
513
+
514
+ # Interpret effect size
515
+ if cramers_v < 0.1:
516
+ effect_interp = EffectSize.NEGLIGIBLE
517
+ elif cramers_v < 0.3:
518
+ effect_interp = EffectSize.SMALL
519
+ elif cramers_v < 0.5:
520
+ effect_interp = EffectSize.MEDIUM
521
+ else:
522
+ effect_interp = EffectSize.LARGE
523
+
524
+ sig_level = interpret_p_value(p_value)
525
+ is_significant = p_value < alpha
526
+
527
+ if is_significant:
528
+ interp = (
529
+ f"Statistically significant difference in distributions (p={p_value:.4f}, "
530
+ f"χ²={chi2:.2f}, df={df}). Effect size: {effect_interp.value} (V={cramers_v:.2f})"
531
+ )
532
+ else:
533
+ interp = (
534
+ f"No significant difference in distributions (p={p_value:.4f}, "
535
+ f"χ²={chi2:.2f}, df={df})"
536
+ )
537
+
538
+ return StatisticalTestResult(
539
+ test_name="Chi-square test",
540
+ statistic=chi2,
541
+ p_value=p_value,
542
+ significance_level=sig_level,
543
+ effect_size=cramers_v,
544
+ effect_interpretation=effect_interp,
545
+ sample_sizes=(int(total1), int(total2)),
546
+ is_significant=is_significant,
547
+ interpretation=interp,
548
+ )
549
+
550
+
551
+ def trend_significance_test(
552
+ values: Sequence[float],
553
+ timestamps: Sequence[float] | None = None,
554
+ alpha: float = 0.05,
555
+ ) -> StatisticalTestResult:
556
+ """Test for significant trend over time using linear regression.
557
+
558
+ Args:
559
+ values: Time series values.
560
+ timestamps: Optional timestamps (uses indices if not provided).
561
+ alpha: Significance level.
562
+
563
+ Returns:
564
+ Statistical test result.
565
+ """
566
+ n = len(values)
567
+
568
+ if n < 3:
569
+ return StatisticalTestResult(
570
+ test_name="Trend significance test",
571
+ statistic=0.0,
572
+ p_value=1.0,
573
+ significance_level=SignificanceLevel.NOT_SIGNIFICANT,
574
+ is_significant=False,
575
+ interpretation="Need at least 3 data points for trend analysis",
576
+ sample_sizes=(n, 0),
577
+ )
578
+
579
+ # Use indices if no timestamps provided
580
+ x = list(timestamps) if timestamps else list(range(n))
581
+ y = list(values)
582
+
583
+ # Calculate linear regression
584
+ mean_x = _compute_mean(x)
585
+ mean_y = _compute_mean(y)
586
+
587
+ # Slope and intercept
588
+ numerator = sum((xi - mean_x) * (yi - mean_y) for xi, yi in zip(x, y))
589
+ denominator = sum((xi - mean_x) ** 2 for xi in x)
590
+
591
+ if denominator == 0:
592
+ return StatisticalTestResult(
593
+ test_name="Trend significance test",
594
+ statistic=0.0,
595
+ p_value=1.0,
596
+ significance_level=SignificanceLevel.NOT_SIGNIFICANT,
597
+ is_significant=False,
598
+ interpretation="Cannot calculate trend (constant x values)",
599
+ sample_sizes=(n, 0),
600
+ )
601
+
602
+ slope = numerator / denominator
603
+ intercept = mean_y - slope * mean_x
604
+
605
+ # Calculate residuals and standard error
606
+ residuals = [yi - (slope * xi + intercept) for xi, yi in zip(x, y)]
607
+ sse = sum(r ** 2 for r in residuals)
608
+ mse = sse / (n - 2) if n > 2 else 0
609
+
610
+ se_slope = math.sqrt(mse / denominator) if mse > 0 and denominator > 0 else 0
611
+
612
+ # T-statistic for slope
613
+ if se_slope > 0:
614
+ t_stat = slope / se_slope
615
+ df = n - 2
616
+ p_value = 2 * (1 - _t_distribution_cdf(abs(t_stat), df))
617
+ p_value = max(0.0, min(1.0, p_value))
618
+ else:
619
+ t_stat = 0.0
620
+ p_value = 1.0
621
+
622
+ # R-squared
623
+ ss_total = sum((yi - mean_y) ** 2 for yi in y)
624
+ r_squared = 1 - (sse / ss_total) if ss_total > 0 else 0
625
+
626
+ sig_level = interpret_p_value(p_value)
627
+ is_significant = p_value < alpha
628
+
629
+ # Interpret trend direction
630
+ if slope > 0:
631
+ direction = "increasing"
632
+ elif slope < 0:
633
+ direction = "decreasing"
634
+ else:
635
+ direction = "flat"
636
+
637
+ if is_significant:
638
+ interp = (
639
+ f"Statistically significant {direction} trend (p={p_value:.4f}). "
640
+ f"Slope: {slope:.4f} per unit, R²={r_squared:.2f}"
641
+ )
642
+ else:
643
+ interp = (
644
+ f"No significant trend detected (p={p_value:.4f}). "
645
+ f"Slope: {slope:.4f}, R²={r_squared:.2f}"
646
+ )
647
+
648
+ return StatisticalTestResult(
649
+ test_name="Trend significance test",
650
+ statistic=t_stat,
651
+ p_value=p_value,
652
+ significance_level=sig_level,
653
+ effect_size=slope,
654
+ sample_sizes=(n, 0),
655
+ is_significant=is_significant,
656
+ interpretation=interp,
657
+ )
658
+
659
+
660
+ @dataclass
661
+ class ComparisonResult:
662
+ """Result of comparing two data series with multiple tests."""
663
+
664
+ t_test: StatisticalTestResult
665
+ mann_whitney: StatisticalTestResult
666
+ recommended_test: str
667
+ overall_significant: bool
668
+ summary: str
669
+
670
+
671
+ def comprehensive_comparison(
672
+ values1: Sequence[float],
673
+ values2: Sequence[float],
674
+ alpha: float = 0.05,
675
+ ) -> ComparisonResult:
676
+ """Perform comprehensive statistical comparison.
677
+
678
+ Runs multiple tests and provides recommendation based on data characteristics.
679
+
680
+ Args:
681
+ values1: First group values.
682
+ values2: Second group values.
683
+ alpha: Significance level.
684
+
685
+ Returns:
686
+ Comprehensive comparison result.
687
+ """
688
+ t_result = welch_t_test(values1, values2, alpha)
689
+ mw_result = mann_whitney_u_test(values1, values2, alpha)
690
+
691
+ n1, n2 = len(values1), len(values2)
692
+
693
+ # Recommend test based on sample size and distribution
694
+ if n1 < 30 or n2 < 30:
695
+ # Small samples - use non-parametric
696
+ recommended = "Mann-Whitney U test"
697
+ primary_result = mw_result
698
+ else:
699
+ # Large samples - t-test is robust
700
+ recommended = "Welch's t-test"
701
+ primary_result = t_result
702
+
703
+ # Overall significance
704
+ overall = primary_result.is_significant
705
+
706
+ # Summary
707
+ if overall:
708
+ summary = f"Significant difference detected ({recommended}, p={primary_result.p_value:.4f})"
709
+ else:
710
+ summary = f"No significant difference ({recommended}, p={primary_result.p_value:.4f})"
711
+
712
+ return ComparisonResult(
713
+ t_test=t_result,
714
+ mann_whitney=mw_result,
715
+ recommended_test=recommended,
716
+ overall_significant=overall,
717
+ summary=summary,
718
+ )