truthound-dashboard 1.3.1__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +258 -0
- truthound_dashboard/api/anomaly.py +1302 -0
- truthound_dashboard/api/cross_alerts.py +352 -0
- truthound_dashboard/api/deps.py +143 -0
- truthound_dashboard/api/drift_monitor.py +540 -0
- truthound_dashboard/api/lineage.py +1151 -0
- truthound_dashboard/api/maintenance.py +363 -0
- truthound_dashboard/api/middleware.py +373 -1
- truthound_dashboard/api/model_monitoring.py +805 -0
- truthound_dashboard/api/notifications_advanced.py +2452 -0
- truthound_dashboard/api/plugins.py +2096 -0
- truthound_dashboard/api/profile.py +211 -14
- truthound_dashboard/api/reports.py +853 -0
- truthound_dashboard/api/router.py +147 -0
- truthound_dashboard/api/rule_suggestions.py +310 -0
- truthound_dashboard/api/schema_evolution.py +231 -0
- truthound_dashboard/api/sources.py +47 -3
- truthound_dashboard/api/triggers.py +190 -0
- truthound_dashboard/api/validations.py +13 -0
- truthound_dashboard/api/validators.py +333 -4
- truthound_dashboard/api/versioning.py +309 -0
- truthound_dashboard/api/websocket.py +301 -0
- truthound_dashboard/core/__init__.py +27 -0
- truthound_dashboard/core/anomaly.py +1395 -0
- truthound_dashboard/core/anomaly_explainer.py +633 -0
- truthound_dashboard/core/cache.py +206 -0
- truthound_dashboard/core/cached_services.py +422 -0
- truthound_dashboard/core/charts.py +352 -0
- truthound_dashboard/core/connections.py +1069 -42
- truthound_dashboard/core/cross_alerts.py +837 -0
- truthound_dashboard/core/drift_monitor.py +1477 -0
- truthound_dashboard/core/drift_sampling.py +669 -0
- truthound_dashboard/core/i18n/__init__.py +42 -0
- truthound_dashboard/core/i18n/detector.py +173 -0
- truthound_dashboard/core/i18n/messages.py +564 -0
- truthound_dashboard/core/lineage.py +971 -0
- truthound_dashboard/core/maintenance.py +443 -5
- truthound_dashboard/core/model_monitoring.py +1043 -0
- truthound_dashboard/core/notifications/channels.py +1020 -1
- truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
- truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
- truthound_dashboard/core/notifications/deduplication/service.py +400 -0
- truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
- truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
- truthound_dashboard/core/notifications/dispatcher.py +43 -0
- truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
- truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
- truthound_dashboard/core/notifications/escalation/engine.py +429 -0
- truthound_dashboard/core/notifications/escalation/models.py +336 -0
- truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
- truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
- truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
- truthound_dashboard/core/notifications/events.py +49 -0
- truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
- truthound_dashboard/core/notifications/metrics/base.py +528 -0
- truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
- truthound_dashboard/core/notifications/routing/__init__.py +169 -0
- truthound_dashboard/core/notifications/routing/combinators.py +184 -0
- truthound_dashboard/core/notifications/routing/config.py +375 -0
- truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
- truthound_dashboard/core/notifications/routing/engine.py +382 -0
- truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
- truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
- truthound_dashboard/core/notifications/routing/rules.py +625 -0
- truthound_dashboard/core/notifications/routing/validator.py +678 -0
- truthound_dashboard/core/notifications/service.py +2 -0
- truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
- truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
- truthound_dashboard/core/notifications/throttling/builder.py +311 -0
- truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
- truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
- truthound_dashboard/core/openlineage.py +1028 -0
- truthound_dashboard/core/plugins/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/extractor.py +703 -0
- truthound_dashboard/core/plugins/docs/renderers.py +804 -0
- truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
- truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
- truthound_dashboard/core/plugins/hooks/manager.py +403 -0
- truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
- truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
- truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
- truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
- truthound_dashboard/core/plugins/loader.py +504 -0
- truthound_dashboard/core/plugins/registry.py +810 -0
- truthound_dashboard/core/plugins/reporter_executor.py +588 -0
- truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
- truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
- truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
- truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
- truthound_dashboard/core/plugins/sandbox.py +617 -0
- truthound_dashboard/core/plugins/security/__init__.py +68 -0
- truthound_dashboard/core/plugins/security/analyzer.py +535 -0
- truthound_dashboard/core/plugins/security/policies.py +311 -0
- truthound_dashboard/core/plugins/security/protocols.py +296 -0
- truthound_dashboard/core/plugins/security/signing.py +842 -0
- truthound_dashboard/core/plugins/security.py +446 -0
- truthound_dashboard/core/plugins/validator_executor.py +401 -0
- truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
- truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
- truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
- truthound_dashboard/core/plugins/versioning/semver.py +266 -0
- truthound_dashboard/core/profile_comparison.py +601 -0
- truthound_dashboard/core/report_history.py +570 -0
- truthound_dashboard/core/reporters/__init__.py +57 -0
- truthound_dashboard/core/reporters/base.py +296 -0
- truthound_dashboard/core/reporters/csv_reporter.py +155 -0
- truthound_dashboard/core/reporters/html_reporter.py +598 -0
- truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
- truthound_dashboard/core/reporters/i18n/base.py +494 -0
- truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
- truthound_dashboard/core/reporters/json_reporter.py +160 -0
- truthound_dashboard/core/reporters/junit_reporter.py +233 -0
- truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
- truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
- truthound_dashboard/core/reporters/registry.py +272 -0
- truthound_dashboard/core/rule_generator.py +2088 -0
- truthound_dashboard/core/scheduler.py +822 -12
- truthound_dashboard/core/schema_evolution.py +858 -0
- truthound_dashboard/core/services.py +152 -9
- truthound_dashboard/core/statistics.py +718 -0
- truthound_dashboard/core/streaming_anomaly.py +883 -0
- truthound_dashboard/core/triggers/__init__.py +45 -0
- truthound_dashboard/core/triggers/base.py +226 -0
- truthound_dashboard/core/triggers/evaluators.py +609 -0
- truthound_dashboard/core/triggers/factory.py +363 -0
- truthound_dashboard/core/unified_alerts.py +870 -0
- truthound_dashboard/core/validation_limits.py +509 -0
- truthound_dashboard/core/versioning.py +709 -0
- truthound_dashboard/core/websocket/__init__.py +59 -0
- truthound_dashboard/core/websocket/manager.py +512 -0
- truthound_dashboard/core/websocket/messages.py +130 -0
- truthound_dashboard/db/__init__.py +30 -0
- truthound_dashboard/db/models.py +3375 -3
- truthound_dashboard/main.py +22 -0
- truthound_dashboard/schemas/__init__.py +396 -1
- truthound_dashboard/schemas/anomaly.py +1258 -0
- truthound_dashboard/schemas/base.py +4 -0
- truthound_dashboard/schemas/cross_alerts.py +334 -0
- truthound_dashboard/schemas/drift_monitor.py +890 -0
- truthound_dashboard/schemas/lineage.py +428 -0
- truthound_dashboard/schemas/maintenance.py +154 -0
- truthound_dashboard/schemas/model_monitoring.py +374 -0
- truthound_dashboard/schemas/notifications_advanced.py +1363 -0
- truthound_dashboard/schemas/openlineage.py +704 -0
- truthound_dashboard/schemas/plugins.py +1293 -0
- truthound_dashboard/schemas/profile.py +420 -34
- truthound_dashboard/schemas/profile_comparison.py +242 -0
- truthound_dashboard/schemas/reports.py +285 -0
- truthound_dashboard/schemas/rule_suggestion.py +434 -0
- truthound_dashboard/schemas/schema_evolution.py +164 -0
- truthound_dashboard/schemas/source.py +117 -2
- truthound_dashboard/schemas/triggers.py +511 -0
- truthound_dashboard/schemas/unified_alerts.py +223 -0
- truthound_dashboard/schemas/validation.py +25 -1
- truthound_dashboard/schemas/validators/__init__.py +11 -0
- truthound_dashboard/schemas/validators/base.py +151 -0
- truthound_dashboard/schemas/versioning.py +152 -0
- truthound_dashboard/static/index.html +2 -2
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/METADATA +147 -23
- truthound_dashboard-1.4.1.dist-info/RECORD +239 -0
- truthound_dashboard/static/assets/index-BZG20KuF.js +0 -586
- truthound_dashboard/static/assets/index-D_HyZ3pb.css +0 -1
- truthound_dashboard/static/assets/unmerged_dictionaries-CtpqQBm0.js +0 -1
- truthound_dashboard-1.3.1.dist-info/RECORD +0 -110
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,718 @@
|
|
|
1
|
+
"""Statistical analysis utilities.
|
|
2
|
+
|
|
3
|
+
This module provides statistical significance testing and analysis
|
|
4
|
+
utilities for profile comparison and data quality assessment.
|
|
5
|
+
|
|
6
|
+
Features:
|
|
7
|
+
- T-test for comparing means
|
|
8
|
+
- Mann-Whitney U test for non-parametric comparison
|
|
9
|
+
- Chi-square test for categorical data
|
|
10
|
+
- Effect size calculation (Cohen's d)
|
|
11
|
+
- Confidence interval estimation
|
|
12
|
+
- Trend significance detection
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import math
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
from enum import Enum
|
|
20
|
+
from typing import Sequence
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class SignificanceLevel(str, Enum):
|
|
24
|
+
"""Statistical significance levels."""
|
|
25
|
+
|
|
26
|
+
NOT_SIGNIFICANT = "not_significant"
|
|
27
|
+
MARGINALLY_SIGNIFICANT = "marginally_significant" # p < 0.10
|
|
28
|
+
SIGNIFICANT = "significant" # p < 0.05
|
|
29
|
+
HIGHLY_SIGNIFICANT = "highly_significant" # p < 0.01
|
|
30
|
+
VERY_HIGHLY_SIGNIFICANT = "very_highly_significant" # p < 0.001
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class EffectSize(str, Enum):
|
|
34
|
+
"""Cohen's d effect size interpretation."""
|
|
35
|
+
|
|
36
|
+
NEGLIGIBLE = "negligible" # < 0.2
|
|
37
|
+
SMALL = "small" # 0.2 - 0.5
|
|
38
|
+
MEDIUM = "medium" # 0.5 - 0.8
|
|
39
|
+
LARGE = "large" # > 0.8
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class StatisticalTestResult:
|
|
44
|
+
"""Result of a statistical significance test."""
|
|
45
|
+
|
|
46
|
+
test_name: str
|
|
47
|
+
statistic: float
|
|
48
|
+
p_value: float
|
|
49
|
+
significance_level: SignificanceLevel
|
|
50
|
+
effect_size: float | None = None
|
|
51
|
+
effect_interpretation: EffectSize | None = None
|
|
52
|
+
confidence_interval: tuple[float, float] | None = None
|
|
53
|
+
sample_sizes: tuple[int, int] | None = None
|
|
54
|
+
is_significant: bool = False
|
|
55
|
+
interpretation: str = ""
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _compute_mean(values: Sequence[float]) -> float:
|
|
59
|
+
"""Compute mean of values."""
|
|
60
|
+
if not values:
|
|
61
|
+
return 0.0
|
|
62
|
+
return sum(values) / len(values)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _compute_variance(values: Sequence[float], ddof: int = 1) -> float:
|
|
66
|
+
"""Compute variance with degrees of freedom adjustment."""
|
|
67
|
+
if len(values) <= ddof:
|
|
68
|
+
return 0.0
|
|
69
|
+
mean = _compute_mean(values)
|
|
70
|
+
squared_diffs = [(x - mean) ** 2 for x in values]
|
|
71
|
+
return sum(squared_diffs) / (len(values) - ddof)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _compute_std(values: Sequence[float], ddof: int = 1) -> float:
|
|
75
|
+
"""Compute standard deviation."""
|
|
76
|
+
return math.sqrt(_compute_variance(values, ddof))
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _pooled_std(
|
|
80
|
+
values1: Sequence[float],
|
|
81
|
+
values2: Sequence[float],
|
|
82
|
+
) -> float:
|
|
83
|
+
"""Compute pooled standard deviation for two groups."""
|
|
84
|
+
n1 = len(values1)
|
|
85
|
+
n2 = len(values2)
|
|
86
|
+
|
|
87
|
+
if n1 <= 1 or n2 <= 1:
|
|
88
|
+
return 0.0
|
|
89
|
+
|
|
90
|
+
var1 = _compute_variance(values1, ddof=1)
|
|
91
|
+
var2 = _compute_variance(values2, ddof=1)
|
|
92
|
+
|
|
93
|
+
# Pooled variance formula
|
|
94
|
+
pooled_var = ((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2)
|
|
95
|
+
return math.sqrt(pooled_var)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def cohens_d(
|
|
99
|
+
values1: Sequence[float],
|
|
100
|
+
values2: Sequence[float],
|
|
101
|
+
) -> float:
|
|
102
|
+
"""Calculate Cohen's d effect size.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
values1: First group values.
|
|
106
|
+
values2: Second group values.
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Cohen's d effect size.
|
|
110
|
+
"""
|
|
111
|
+
mean1 = _compute_mean(values1)
|
|
112
|
+
mean2 = _compute_mean(values2)
|
|
113
|
+
pooled = _pooled_std(values1, values2)
|
|
114
|
+
|
|
115
|
+
if pooled == 0:
|
|
116
|
+
return 0.0
|
|
117
|
+
|
|
118
|
+
return abs(mean1 - mean2) / pooled
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def interpret_effect_size(d: float) -> EffectSize:
|
|
122
|
+
"""Interpret Cohen's d effect size.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
d: Cohen's d value.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
Effect size interpretation.
|
|
129
|
+
"""
|
|
130
|
+
abs_d = abs(d)
|
|
131
|
+
if abs_d < 0.2:
|
|
132
|
+
return EffectSize.NEGLIGIBLE
|
|
133
|
+
elif abs_d < 0.5:
|
|
134
|
+
return EffectSize.SMALL
|
|
135
|
+
elif abs_d < 0.8:
|
|
136
|
+
return EffectSize.MEDIUM
|
|
137
|
+
else:
|
|
138
|
+
return EffectSize.LARGE
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def interpret_p_value(p_value: float) -> SignificanceLevel:
|
|
142
|
+
"""Interpret p-value significance level.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
p_value: P-value from statistical test.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
Significance level interpretation.
|
|
149
|
+
"""
|
|
150
|
+
if p_value >= 0.10:
|
|
151
|
+
return SignificanceLevel.NOT_SIGNIFICANT
|
|
152
|
+
elif p_value >= 0.05:
|
|
153
|
+
return SignificanceLevel.MARGINALLY_SIGNIFICANT
|
|
154
|
+
elif p_value >= 0.01:
|
|
155
|
+
return SignificanceLevel.SIGNIFICANT
|
|
156
|
+
elif p_value >= 0.001:
|
|
157
|
+
return SignificanceLevel.HIGHLY_SIGNIFICANT
|
|
158
|
+
else:
|
|
159
|
+
return SignificanceLevel.VERY_HIGHLY_SIGNIFICANT
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _t_distribution_cdf(t: float, df: int) -> float:
|
|
163
|
+
"""Approximate CDF of t-distribution using normal approximation.
|
|
164
|
+
|
|
165
|
+
For large df, t-distribution approaches normal.
|
|
166
|
+
This is a simplified implementation without scipy.
|
|
167
|
+
"""
|
|
168
|
+
# Use normal approximation for large df
|
|
169
|
+
if df > 100:
|
|
170
|
+
# Approximate using normal distribution
|
|
171
|
+
return _normal_cdf(t)
|
|
172
|
+
|
|
173
|
+
# For smaller df, use a simple approximation
|
|
174
|
+
# This is not exact but provides reasonable estimates
|
|
175
|
+
x = t / math.sqrt(df)
|
|
176
|
+
a = 0.5 * (1 + x / math.sqrt(1 + x * x))
|
|
177
|
+
return a
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _normal_cdf(x: float) -> float:
|
|
181
|
+
"""Approximate standard normal CDF using error function approximation."""
|
|
182
|
+
# Approximation of standard normal CDF
|
|
183
|
+
return 0.5 * (1 + math.erf(x / math.sqrt(2)))
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def welch_t_test(
|
|
187
|
+
values1: Sequence[float],
|
|
188
|
+
values2: Sequence[float],
|
|
189
|
+
alpha: float = 0.05,
|
|
190
|
+
) -> StatisticalTestResult:
|
|
191
|
+
"""Perform Welch's t-test for comparing two group means.
|
|
192
|
+
|
|
193
|
+
Welch's t-test is more robust than Student's t-test when
|
|
194
|
+
variances are unequal (heteroscedastic data).
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
values1: First group values.
|
|
198
|
+
values2: Second group values.
|
|
199
|
+
alpha: Significance level.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
Statistical test result.
|
|
203
|
+
"""
|
|
204
|
+
n1, n2 = len(values1), len(values2)
|
|
205
|
+
|
|
206
|
+
if n1 < 2 or n2 < 2:
|
|
207
|
+
return StatisticalTestResult(
|
|
208
|
+
test_name="Welch's t-test",
|
|
209
|
+
statistic=0.0,
|
|
210
|
+
p_value=1.0,
|
|
211
|
+
significance_level=SignificanceLevel.NOT_SIGNIFICANT,
|
|
212
|
+
is_significant=False,
|
|
213
|
+
interpretation="Insufficient data for t-test (need at least 2 samples per group)",
|
|
214
|
+
sample_sizes=(n1, n2),
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
mean1 = _compute_mean(values1)
|
|
218
|
+
mean2 = _compute_mean(values2)
|
|
219
|
+
var1 = _compute_variance(values1)
|
|
220
|
+
var2 = _compute_variance(values2)
|
|
221
|
+
|
|
222
|
+
# Welch's t-statistic
|
|
223
|
+
se1 = var1 / n1
|
|
224
|
+
se2 = var2 / n2
|
|
225
|
+
se_diff = math.sqrt(se1 + se2)
|
|
226
|
+
|
|
227
|
+
if se_diff == 0:
|
|
228
|
+
return StatisticalTestResult(
|
|
229
|
+
test_name="Welch's t-test",
|
|
230
|
+
statistic=0.0,
|
|
231
|
+
p_value=1.0,
|
|
232
|
+
significance_level=SignificanceLevel.NOT_SIGNIFICANT,
|
|
233
|
+
is_significant=False,
|
|
234
|
+
interpretation="Zero variance in both groups",
|
|
235
|
+
sample_sizes=(n1, n2),
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
t_stat = (mean1 - mean2) / se_diff
|
|
239
|
+
|
|
240
|
+
# Welch-Satterthwaite degrees of freedom
|
|
241
|
+
if se1 + se2 == 0:
|
|
242
|
+
df = n1 + n2 - 2
|
|
243
|
+
else:
|
|
244
|
+
df_num = (se1 + se2) ** 2
|
|
245
|
+
df_denom = (se1 ** 2) / (n1 - 1) + (se2 ** 2) / (n2 - 1)
|
|
246
|
+
if df_denom == 0:
|
|
247
|
+
df = n1 + n2 - 2
|
|
248
|
+
else:
|
|
249
|
+
df = df_num / df_denom
|
|
250
|
+
|
|
251
|
+
# Two-tailed p-value (approximation)
|
|
252
|
+
p_value = 2 * (1 - _t_distribution_cdf(abs(t_stat), int(df)))
|
|
253
|
+
p_value = max(0.0, min(1.0, p_value)) # Clamp to [0, 1]
|
|
254
|
+
|
|
255
|
+
# Effect size
|
|
256
|
+
d = cohens_d(values1, values2)
|
|
257
|
+
effect_interp = interpret_effect_size(d)
|
|
258
|
+
|
|
259
|
+
# Confidence interval for difference in means
|
|
260
|
+
se = se_diff
|
|
261
|
+
# Use z-value approximation for large samples
|
|
262
|
+
z = 1.96 if alpha == 0.05 else 2.576 if alpha == 0.01 else 1.645
|
|
263
|
+
ci_low = (mean1 - mean2) - z * se
|
|
264
|
+
ci_high = (mean1 - mean2) + z * se
|
|
265
|
+
|
|
266
|
+
# Interpret results
|
|
267
|
+
sig_level = interpret_p_value(p_value)
|
|
268
|
+
is_significant = p_value < alpha
|
|
269
|
+
|
|
270
|
+
if is_significant:
|
|
271
|
+
direction = "higher" if mean1 > mean2 else "lower"
|
|
272
|
+
interp = (
|
|
273
|
+
f"Statistically significant difference (p={p_value:.4f}). "
|
|
274
|
+
f"Group 1 mean ({mean1:.2f}) is {direction} than Group 2 mean ({mean2:.2f}). "
|
|
275
|
+
f"Effect size: {effect_interp.value} (d={d:.2f})"
|
|
276
|
+
)
|
|
277
|
+
else:
|
|
278
|
+
interp = (
|
|
279
|
+
f"No statistically significant difference (p={p_value:.4f}). "
|
|
280
|
+
f"Group 1 mean: {mean1:.2f}, Group 2 mean: {mean2:.2f}"
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
return StatisticalTestResult(
|
|
284
|
+
test_name="Welch's t-test",
|
|
285
|
+
statistic=t_stat,
|
|
286
|
+
p_value=p_value,
|
|
287
|
+
significance_level=sig_level,
|
|
288
|
+
effect_size=d,
|
|
289
|
+
effect_interpretation=effect_interp,
|
|
290
|
+
confidence_interval=(ci_low, ci_high),
|
|
291
|
+
sample_sizes=(n1, n2),
|
|
292
|
+
is_significant=is_significant,
|
|
293
|
+
interpretation=interp,
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def mann_whitney_u_test(
|
|
298
|
+
values1: Sequence[float],
|
|
299
|
+
values2: Sequence[float],
|
|
300
|
+
alpha: float = 0.05,
|
|
301
|
+
) -> StatisticalTestResult:
|
|
302
|
+
"""Perform Mann-Whitney U test (non-parametric).
|
|
303
|
+
|
|
304
|
+
Non-parametric alternative to t-test that doesn't assume
|
|
305
|
+
normal distribution. Tests whether one group tends to have
|
|
306
|
+
larger values than the other.
|
|
307
|
+
|
|
308
|
+
Args:
|
|
309
|
+
values1: First group values.
|
|
310
|
+
values2: Second group values.
|
|
311
|
+
alpha: Significance level.
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
Statistical test result.
|
|
315
|
+
"""
|
|
316
|
+
n1, n2 = len(values1), len(values2)
|
|
317
|
+
|
|
318
|
+
if n1 < 1 or n2 < 1:
|
|
319
|
+
return StatisticalTestResult(
|
|
320
|
+
test_name="Mann-Whitney U test",
|
|
321
|
+
statistic=0.0,
|
|
322
|
+
p_value=1.0,
|
|
323
|
+
significance_level=SignificanceLevel.NOT_SIGNIFICANT,
|
|
324
|
+
is_significant=False,
|
|
325
|
+
interpretation="Insufficient data for Mann-Whitney test",
|
|
326
|
+
sample_sizes=(n1, n2),
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
# Combine and rank all values
|
|
330
|
+
combined = [(v, 1) for v in values1] + [(v, 2) for v in values2]
|
|
331
|
+
combined.sort(key=lambda x: x[0])
|
|
332
|
+
|
|
333
|
+
# Assign ranks (handle ties with average rank)
|
|
334
|
+
ranks: dict[int, float] = {}
|
|
335
|
+
i = 0
|
|
336
|
+
while i < len(combined):
|
|
337
|
+
j = i
|
|
338
|
+
# Find all tied values
|
|
339
|
+
while j < len(combined) and combined[j][0] == combined[i][0]:
|
|
340
|
+
j += 1
|
|
341
|
+
# Assign average rank to all tied values
|
|
342
|
+
avg_rank = (i + j + 1) / 2 # Ranks are 1-based
|
|
343
|
+
for k in range(i, j):
|
|
344
|
+
ranks[k] = avg_rank
|
|
345
|
+
i = j
|
|
346
|
+
|
|
347
|
+
# Sum ranks for each group
|
|
348
|
+
r1 = sum(ranks[i] for i, (_, group) in enumerate(combined) if group == 1)
|
|
349
|
+
|
|
350
|
+
# Calculate U statistic
|
|
351
|
+
u1 = r1 - n1 * (n1 + 1) / 2
|
|
352
|
+
u2 = n1 * n2 - u1
|
|
353
|
+
u = min(u1, u2)
|
|
354
|
+
|
|
355
|
+
# Normal approximation for large samples
|
|
356
|
+
mu = n1 * n2 / 2
|
|
357
|
+
|
|
358
|
+
# Tie correction for standard deviation
|
|
359
|
+
n = n1 + n2
|
|
360
|
+
tie_sum = 0
|
|
361
|
+
unique_values: dict[float, int] = {}
|
|
362
|
+
for v, _ in combined:
|
|
363
|
+
unique_values[v] = unique_values.get(v, 0) + 1
|
|
364
|
+
for count in unique_values.values():
|
|
365
|
+
if count > 1:
|
|
366
|
+
tie_sum += count ** 3 - count
|
|
367
|
+
|
|
368
|
+
sigma = math.sqrt(
|
|
369
|
+
(n1 * n2 / 12) * (n + 1 - tie_sum / (n * (n - 1)))
|
|
370
|
+
if n > 1 else 0
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
if sigma == 0:
|
|
374
|
+
z = 0
|
|
375
|
+
p_value = 1.0
|
|
376
|
+
else:
|
|
377
|
+
z = (u - mu) / sigma
|
|
378
|
+
p_value = 2 * (1 - _normal_cdf(abs(z)))
|
|
379
|
+
p_value = max(0.0, min(1.0, p_value))
|
|
380
|
+
|
|
381
|
+
# Calculate effect size (rank-biserial correlation)
|
|
382
|
+
# r = 1 - (2*U)/(n1*n2)
|
|
383
|
+
if n1 * n2 > 0:
|
|
384
|
+
r = 1 - (2 * u) / (n1 * n2)
|
|
385
|
+
else:
|
|
386
|
+
r = 0.0
|
|
387
|
+
|
|
388
|
+
# Interpret effect size
|
|
389
|
+
abs_r = abs(r)
|
|
390
|
+
if abs_r < 0.1:
|
|
391
|
+
effect_interp = EffectSize.NEGLIGIBLE
|
|
392
|
+
elif abs_r < 0.3:
|
|
393
|
+
effect_interp = EffectSize.SMALL
|
|
394
|
+
elif abs_r < 0.5:
|
|
395
|
+
effect_interp = EffectSize.MEDIUM
|
|
396
|
+
else:
|
|
397
|
+
effect_interp = EffectSize.LARGE
|
|
398
|
+
|
|
399
|
+
sig_level = interpret_p_value(p_value)
|
|
400
|
+
is_significant = p_value < alpha
|
|
401
|
+
|
|
402
|
+
if is_significant:
|
|
403
|
+
median1 = sorted(values1)[n1 // 2] if n1 > 0 else 0
|
|
404
|
+
median2 = sorted(values2)[n2 // 2] if n2 > 0 else 0
|
|
405
|
+
direction = "tends to be higher" if u1 > u2 else "tends to be lower"
|
|
406
|
+
interp = (
|
|
407
|
+
f"Statistically significant difference (p={p_value:.4f}). "
|
|
408
|
+
f"Group 1 (median={median1:.2f}) {direction} than Group 2 (median={median2:.2f}). "
|
|
409
|
+
f"Effect size: {effect_interp.value} (r={r:.2f})"
|
|
410
|
+
)
|
|
411
|
+
else:
|
|
412
|
+
interp = (
|
|
413
|
+
f"No statistically significant difference (p={p_value:.4f}). "
|
|
414
|
+
f"Distributions are similar."
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
return StatisticalTestResult(
|
|
418
|
+
test_name="Mann-Whitney U test",
|
|
419
|
+
statistic=u,
|
|
420
|
+
p_value=p_value,
|
|
421
|
+
significance_level=sig_level,
|
|
422
|
+
effect_size=r,
|
|
423
|
+
effect_interpretation=effect_interp,
|
|
424
|
+
sample_sizes=(n1, n2),
|
|
425
|
+
is_significant=is_significant,
|
|
426
|
+
interpretation=interp,
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def chi_square_test(
|
|
431
|
+
observed1: Sequence[int],
|
|
432
|
+
observed2: Sequence[int],
|
|
433
|
+
alpha: float = 0.05,
|
|
434
|
+
) -> StatisticalTestResult:
|
|
435
|
+
"""Perform chi-square test for comparing categorical distributions.
|
|
436
|
+
|
|
437
|
+
Args:
|
|
438
|
+
observed1: First group category counts.
|
|
439
|
+
observed2: Second group category counts.
|
|
440
|
+
alpha: Significance level.
|
|
441
|
+
|
|
442
|
+
Returns:
|
|
443
|
+
Statistical test result.
|
|
444
|
+
"""
|
|
445
|
+
if len(observed1) != len(observed2):
|
|
446
|
+
return StatisticalTestResult(
|
|
447
|
+
test_name="Chi-square test",
|
|
448
|
+
statistic=0.0,
|
|
449
|
+
p_value=1.0,
|
|
450
|
+
significance_level=SignificanceLevel.NOT_SIGNIFICANT,
|
|
451
|
+
is_significant=False,
|
|
452
|
+
interpretation="Categories must match between groups",
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
k = len(observed1) # Number of categories
|
|
456
|
+
|
|
457
|
+
if k < 2:
|
|
458
|
+
return StatisticalTestResult(
|
|
459
|
+
test_name="Chi-square test",
|
|
460
|
+
statistic=0.0,
|
|
461
|
+
p_value=1.0,
|
|
462
|
+
significance_level=SignificanceLevel.NOT_SIGNIFICANT,
|
|
463
|
+
is_significant=False,
|
|
464
|
+
interpretation="Need at least 2 categories",
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
total1 = sum(observed1)
|
|
468
|
+
total2 = sum(observed2)
|
|
469
|
+
total = total1 + total2
|
|
470
|
+
|
|
471
|
+
if total == 0:
|
|
472
|
+
return StatisticalTestResult(
|
|
473
|
+
test_name="Chi-square test",
|
|
474
|
+
statistic=0.0,
|
|
475
|
+
p_value=1.0,
|
|
476
|
+
significance_level=SignificanceLevel.NOT_SIGNIFICANT,
|
|
477
|
+
is_significant=False,
|
|
478
|
+
interpretation="No observations",
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
# Calculate chi-square statistic
|
|
482
|
+
chi2 = 0.0
|
|
483
|
+
for i in range(k):
|
|
484
|
+
row_total = observed1[i] + observed2[i]
|
|
485
|
+
|
|
486
|
+
# Expected values under null hypothesis
|
|
487
|
+
expected1 = (row_total * total1) / total if total > 0 else 0
|
|
488
|
+
expected2 = (row_total * total2) / total if total > 0 else 0
|
|
489
|
+
|
|
490
|
+
if expected1 > 0:
|
|
491
|
+
chi2 += (observed1[i] - expected1) ** 2 / expected1
|
|
492
|
+
if expected2 > 0:
|
|
493
|
+
chi2 += (observed2[i] - expected2) ** 2 / expected2
|
|
494
|
+
|
|
495
|
+
# Degrees of freedom
|
|
496
|
+
df = k - 1
|
|
497
|
+
|
|
498
|
+
# Approximate p-value using chi-square distribution
|
|
499
|
+
# Using Wilson-Hilferty approximation
|
|
500
|
+
if df > 0:
|
|
501
|
+
z = ((chi2 / df) ** (1/3) - (1 - 2 / (9 * df))) / math.sqrt(2 / (9 * df))
|
|
502
|
+
p_value = 1 - _normal_cdf(z)
|
|
503
|
+
p_value = max(0.0, min(1.0, p_value))
|
|
504
|
+
else:
|
|
505
|
+
p_value = 1.0
|
|
506
|
+
|
|
507
|
+
# Effect size (Cramer's V)
|
|
508
|
+
min_dim = 1 # 2 groups - 1
|
|
509
|
+
if total > 0 and min_dim > 0:
|
|
510
|
+
cramers_v = math.sqrt(chi2 / (total * min_dim))
|
|
511
|
+
else:
|
|
512
|
+
cramers_v = 0.0
|
|
513
|
+
|
|
514
|
+
# Interpret effect size
|
|
515
|
+
if cramers_v < 0.1:
|
|
516
|
+
effect_interp = EffectSize.NEGLIGIBLE
|
|
517
|
+
elif cramers_v < 0.3:
|
|
518
|
+
effect_interp = EffectSize.SMALL
|
|
519
|
+
elif cramers_v < 0.5:
|
|
520
|
+
effect_interp = EffectSize.MEDIUM
|
|
521
|
+
else:
|
|
522
|
+
effect_interp = EffectSize.LARGE
|
|
523
|
+
|
|
524
|
+
sig_level = interpret_p_value(p_value)
|
|
525
|
+
is_significant = p_value < alpha
|
|
526
|
+
|
|
527
|
+
if is_significant:
|
|
528
|
+
interp = (
|
|
529
|
+
f"Statistically significant difference in distributions (p={p_value:.4f}, "
|
|
530
|
+
f"χ²={chi2:.2f}, df={df}). Effect size: {effect_interp.value} (V={cramers_v:.2f})"
|
|
531
|
+
)
|
|
532
|
+
else:
|
|
533
|
+
interp = (
|
|
534
|
+
f"No significant difference in distributions (p={p_value:.4f}, "
|
|
535
|
+
f"χ²={chi2:.2f}, df={df})"
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
return StatisticalTestResult(
|
|
539
|
+
test_name="Chi-square test",
|
|
540
|
+
statistic=chi2,
|
|
541
|
+
p_value=p_value,
|
|
542
|
+
significance_level=sig_level,
|
|
543
|
+
effect_size=cramers_v,
|
|
544
|
+
effect_interpretation=effect_interp,
|
|
545
|
+
sample_sizes=(int(total1), int(total2)),
|
|
546
|
+
is_significant=is_significant,
|
|
547
|
+
interpretation=interp,
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def trend_significance_test(
|
|
552
|
+
values: Sequence[float],
|
|
553
|
+
timestamps: Sequence[float] | None = None,
|
|
554
|
+
alpha: float = 0.05,
|
|
555
|
+
) -> StatisticalTestResult:
|
|
556
|
+
"""Test for significant trend over time using linear regression.
|
|
557
|
+
|
|
558
|
+
Args:
|
|
559
|
+
values: Time series values.
|
|
560
|
+
timestamps: Optional timestamps (uses indices if not provided).
|
|
561
|
+
alpha: Significance level.
|
|
562
|
+
|
|
563
|
+
Returns:
|
|
564
|
+
Statistical test result.
|
|
565
|
+
"""
|
|
566
|
+
n = len(values)
|
|
567
|
+
|
|
568
|
+
if n < 3:
|
|
569
|
+
return StatisticalTestResult(
|
|
570
|
+
test_name="Trend significance test",
|
|
571
|
+
statistic=0.0,
|
|
572
|
+
p_value=1.0,
|
|
573
|
+
significance_level=SignificanceLevel.NOT_SIGNIFICANT,
|
|
574
|
+
is_significant=False,
|
|
575
|
+
interpretation="Need at least 3 data points for trend analysis",
|
|
576
|
+
sample_sizes=(n, 0),
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
# Use indices if no timestamps provided
|
|
580
|
+
x = list(timestamps) if timestamps else list(range(n))
|
|
581
|
+
y = list(values)
|
|
582
|
+
|
|
583
|
+
# Calculate linear regression
|
|
584
|
+
mean_x = _compute_mean(x)
|
|
585
|
+
mean_y = _compute_mean(y)
|
|
586
|
+
|
|
587
|
+
# Slope and intercept
|
|
588
|
+
numerator = sum((xi - mean_x) * (yi - mean_y) for xi, yi in zip(x, y))
|
|
589
|
+
denominator = sum((xi - mean_x) ** 2 for xi in x)
|
|
590
|
+
|
|
591
|
+
if denominator == 0:
|
|
592
|
+
return StatisticalTestResult(
|
|
593
|
+
test_name="Trend significance test",
|
|
594
|
+
statistic=0.0,
|
|
595
|
+
p_value=1.0,
|
|
596
|
+
significance_level=SignificanceLevel.NOT_SIGNIFICANT,
|
|
597
|
+
is_significant=False,
|
|
598
|
+
interpretation="Cannot calculate trend (constant x values)",
|
|
599
|
+
sample_sizes=(n, 0),
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
slope = numerator / denominator
|
|
603
|
+
intercept = mean_y - slope * mean_x
|
|
604
|
+
|
|
605
|
+
# Calculate residuals and standard error
|
|
606
|
+
residuals = [yi - (slope * xi + intercept) for xi, yi in zip(x, y)]
|
|
607
|
+
sse = sum(r ** 2 for r in residuals)
|
|
608
|
+
mse = sse / (n - 2) if n > 2 else 0
|
|
609
|
+
|
|
610
|
+
se_slope = math.sqrt(mse / denominator) if mse > 0 and denominator > 0 else 0
|
|
611
|
+
|
|
612
|
+
# T-statistic for slope
|
|
613
|
+
if se_slope > 0:
|
|
614
|
+
t_stat = slope / se_slope
|
|
615
|
+
df = n - 2
|
|
616
|
+
p_value = 2 * (1 - _t_distribution_cdf(abs(t_stat), df))
|
|
617
|
+
p_value = max(0.0, min(1.0, p_value))
|
|
618
|
+
else:
|
|
619
|
+
t_stat = 0.0
|
|
620
|
+
p_value = 1.0
|
|
621
|
+
|
|
622
|
+
# R-squared
|
|
623
|
+
ss_total = sum((yi - mean_y) ** 2 for yi in y)
|
|
624
|
+
r_squared = 1 - (sse / ss_total) if ss_total > 0 else 0
|
|
625
|
+
|
|
626
|
+
sig_level = interpret_p_value(p_value)
|
|
627
|
+
is_significant = p_value < alpha
|
|
628
|
+
|
|
629
|
+
# Interpret trend direction
|
|
630
|
+
if slope > 0:
|
|
631
|
+
direction = "increasing"
|
|
632
|
+
elif slope < 0:
|
|
633
|
+
direction = "decreasing"
|
|
634
|
+
else:
|
|
635
|
+
direction = "flat"
|
|
636
|
+
|
|
637
|
+
if is_significant:
|
|
638
|
+
interp = (
|
|
639
|
+
f"Statistically significant {direction} trend (p={p_value:.4f}). "
|
|
640
|
+
f"Slope: {slope:.4f} per unit, R²={r_squared:.2f}"
|
|
641
|
+
)
|
|
642
|
+
else:
|
|
643
|
+
interp = (
|
|
644
|
+
f"No significant trend detected (p={p_value:.4f}). "
|
|
645
|
+
f"Slope: {slope:.4f}, R²={r_squared:.2f}"
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
return StatisticalTestResult(
|
|
649
|
+
test_name="Trend significance test",
|
|
650
|
+
statistic=t_stat,
|
|
651
|
+
p_value=p_value,
|
|
652
|
+
significance_level=sig_level,
|
|
653
|
+
effect_size=slope,
|
|
654
|
+
sample_sizes=(n, 0),
|
|
655
|
+
is_significant=is_significant,
|
|
656
|
+
interpretation=interp,
|
|
657
|
+
)
|
|
658
|
+
|
|
659
|
+
|
|
660
|
+
@dataclass
|
|
661
|
+
class ComparisonResult:
|
|
662
|
+
"""Result of comparing two data series with multiple tests."""
|
|
663
|
+
|
|
664
|
+
t_test: StatisticalTestResult
|
|
665
|
+
mann_whitney: StatisticalTestResult
|
|
666
|
+
recommended_test: str
|
|
667
|
+
overall_significant: bool
|
|
668
|
+
summary: str
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
def comprehensive_comparison(
|
|
672
|
+
values1: Sequence[float],
|
|
673
|
+
values2: Sequence[float],
|
|
674
|
+
alpha: float = 0.05,
|
|
675
|
+
) -> ComparisonResult:
|
|
676
|
+
"""Perform comprehensive statistical comparison.
|
|
677
|
+
|
|
678
|
+
Runs multiple tests and provides recommendation based on data characteristics.
|
|
679
|
+
|
|
680
|
+
Args:
|
|
681
|
+
values1: First group values.
|
|
682
|
+
values2: Second group values.
|
|
683
|
+
alpha: Significance level.
|
|
684
|
+
|
|
685
|
+
Returns:
|
|
686
|
+
Comprehensive comparison result.
|
|
687
|
+
"""
|
|
688
|
+
t_result = welch_t_test(values1, values2, alpha)
|
|
689
|
+
mw_result = mann_whitney_u_test(values1, values2, alpha)
|
|
690
|
+
|
|
691
|
+
n1, n2 = len(values1), len(values2)
|
|
692
|
+
|
|
693
|
+
# Recommend test based on sample size and distribution
|
|
694
|
+
if n1 < 30 or n2 < 30:
|
|
695
|
+
# Small samples - use non-parametric
|
|
696
|
+
recommended = "Mann-Whitney U test"
|
|
697
|
+
primary_result = mw_result
|
|
698
|
+
else:
|
|
699
|
+
# Large samples - t-test is robust
|
|
700
|
+
recommended = "Welch's t-test"
|
|
701
|
+
primary_result = t_result
|
|
702
|
+
|
|
703
|
+
# Overall significance
|
|
704
|
+
overall = primary_result.is_significant
|
|
705
|
+
|
|
706
|
+
# Summary
|
|
707
|
+
if overall:
|
|
708
|
+
summary = f"Significant difference detected ({recommended}, p={primary_result.p_value:.4f})"
|
|
709
|
+
else:
|
|
710
|
+
summary = f"No significant difference ({recommended}, p={primary_result.p_value:.4f})"
|
|
711
|
+
|
|
712
|
+
return ComparisonResult(
|
|
713
|
+
t_test=t_result,
|
|
714
|
+
mann_whitney=mw_result,
|
|
715
|
+
recommended_test=recommended,
|
|
716
|
+
overall_significant=overall,
|
|
717
|
+
summary=summary,
|
|
718
|
+
)
|