truthound-dashboard 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +258 -0
- truthound_dashboard/api/anomaly.py +1302 -0
- truthound_dashboard/api/cross_alerts.py +352 -0
- truthound_dashboard/api/deps.py +143 -0
- truthound_dashboard/api/drift_monitor.py +540 -0
- truthound_dashboard/api/lineage.py +1151 -0
- truthound_dashboard/api/maintenance.py +363 -0
- truthound_dashboard/api/middleware.py +373 -1
- truthound_dashboard/api/model_monitoring.py +805 -0
- truthound_dashboard/api/notifications_advanced.py +2452 -0
- truthound_dashboard/api/plugins.py +2096 -0
- truthound_dashboard/api/profile.py +211 -14
- truthound_dashboard/api/reports.py +853 -0
- truthound_dashboard/api/router.py +147 -0
- truthound_dashboard/api/rule_suggestions.py +310 -0
- truthound_dashboard/api/schema_evolution.py +231 -0
- truthound_dashboard/api/sources.py +47 -3
- truthound_dashboard/api/triggers.py +190 -0
- truthound_dashboard/api/validations.py +13 -0
- truthound_dashboard/api/validators.py +333 -4
- truthound_dashboard/api/versioning.py +309 -0
- truthound_dashboard/api/websocket.py +301 -0
- truthound_dashboard/core/__init__.py +27 -0
- truthound_dashboard/core/anomaly.py +1395 -0
- truthound_dashboard/core/anomaly_explainer.py +633 -0
- truthound_dashboard/core/cache.py +206 -0
- truthound_dashboard/core/cached_services.py +422 -0
- truthound_dashboard/core/charts.py +352 -0
- truthound_dashboard/core/connections.py +1069 -42
- truthound_dashboard/core/cross_alerts.py +837 -0
- truthound_dashboard/core/drift_monitor.py +1477 -0
- truthound_dashboard/core/drift_sampling.py +669 -0
- truthound_dashboard/core/i18n/__init__.py +42 -0
- truthound_dashboard/core/i18n/detector.py +173 -0
- truthound_dashboard/core/i18n/messages.py +564 -0
- truthound_dashboard/core/lineage.py +971 -0
- truthound_dashboard/core/maintenance.py +443 -5
- truthound_dashboard/core/model_monitoring.py +1043 -0
- truthound_dashboard/core/notifications/channels.py +1020 -1
- truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
- truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
- truthound_dashboard/core/notifications/deduplication/service.py +400 -0
- truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
- truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
- truthound_dashboard/core/notifications/dispatcher.py +43 -0
- truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
- truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
- truthound_dashboard/core/notifications/escalation/engine.py +429 -0
- truthound_dashboard/core/notifications/escalation/models.py +336 -0
- truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
- truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
- truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
- truthound_dashboard/core/notifications/events.py +49 -0
- truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
- truthound_dashboard/core/notifications/metrics/base.py +528 -0
- truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
- truthound_dashboard/core/notifications/routing/__init__.py +169 -0
- truthound_dashboard/core/notifications/routing/combinators.py +184 -0
- truthound_dashboard/core/notifications/routing/config.py +375 -0
- truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
- truthound_dashboard/core/notifications/routing/engine.py +382 -0
- truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
- truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
- truthound_dashboard/core/notifications/routing/rules.py +625 -0
- truthound_dashboard/core/notifications/routing/validator.py +678 -0
- truthound_dashboard/core/notifications/service.py +2 -0
- truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
- truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
- truthound_dashboard/core/notifications/throttling/builder.py +311 -0
- truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
- truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
- truthound_dashboard/core/openlineage.py +1028 -0
- truthound_dashboard/core/plugins/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/extractor.py +703 -0
- truthound_dashboard/core/plugins/docs/renderers.py +804 -0
- truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
- truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
- truthound_dashboard/core/plugins/hooks/manager.py +403 -0
- truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
- truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
- truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
- truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
- truthound_dashboard/core/plugins/loader.py +504 -0
- truthound_dashboard/core/plugins/registry.py +810 -0
- truthound_dashboard/core/plugins/reporter_executor.py +588 -0
- truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
- truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
- truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
- truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
- truthound_dashboard/core/plugins/sandbox.py +617 -0
- truthound_dashboard/core/plugins/security/__init__.py +68 -0
- truthound_dashboard/core/plugins/security/analyzer.py +535 -0
- truthound_dashboard/core/plugins/security/policies.py +311 -0
- truthound_dashboard/core/plugins/security/protocols.py +296 -0
- truthound_dashboard/core/plugins/security/signing.py +842 -0
- truthound_dashboard/core/plugins/security.py +446 -0
- truthound_dashboard/core/plugins/validator_executor.py +401 -0
- truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
- truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
- truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
- truthound_dashboard/core/plugins/versioning/semver.py +266 -0
- truthound_dashboard/core/profile_comparison.py +601 -0
- truthound_dashboard/core/report_history.py +570 -0
- truthound_dashboard/core/reporters/__init__.py +57 -0
- truthound_dashboard/core/reporters/base.py +296 -0
- truthound_dashboard/core/reporters/csv_reporter.py +155 -0
- truthound_dashboard/core/reporters/html_reporter.py +598 -0
- truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
- truthound_dashboard/core/reporters/i18n/base.py +494 -0
- truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
- truthound_dashboard/core/reporters/json_reporter.py +160 -0
- truthound_dashboard/core/reporters/junit_reporter.py +233 -0
- truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
- truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
- truthound_dashboard/core/reporters/registry.py +272 -0
- truthound_dashboard/core/rule_generator.py +2088 -0
- truthound_dashboard/core/scheduler.py +822 -12
- truthound_dashboard/core/schema_evolution.py +858 -0
- truthound_dashboard/core/services.py +152 -9
- truthound_dashboard/core/statistics.py +718 -0
- truthound_dashboard/core/streaming_anomaly.py +883 -0
- truthound_dashboard/core/triggers/__init__.py +45 -0
- truthound_dashboard/core/triggers/base.py +226 -0
- truthound_dashboard/core/triggers/evaluators.py +609 -0
- truthound_dashboard/core/triggers/factory.py +363 -0
- truthound_dashboard/core/unified_alerts.py +870 -0
- truthound_dashboard/core/validation_limits.py +509 -0
- truthound_dashboard/core/versioning.py +709 -0
- truthound_dashboard/core/websocket/__init__.py +59 -0
- truthound_dashboard/core/websocket/manager.py +512 -0
- truthound_dashboard/core/websocket/messages.py +130 -0
- truthound_dashboard/db/__init__.py +30 -0
- truthound_dashboard/db/models.py +3375 -3
- truthound_dashboard/main.py +22 -0
- truthound_dashboard/schemas/__init__.py +396 -1
- truthound_dashboard/schemas/anomaly.py +1258 -0
- truthound_dashboard/schemas/base.py +4 -0
- truthound_dashboard/schemas/cross_alerts.py +334 -0
- truthound_dashboard/schemas/drift_monitor.py +890 -0
- truthound_dashboard/schemas/lineage.py +428 -0
- truthound_dashboard/schemas/maintenance.py +154 -0
- truthound_dashboard/schemas/model_monitoring.py +374 -0
- truthound_dashboard/schemas/notifications_advanced.py +1363 -0
- truthound_dashboard/schemas/openlineage.py +704 -0
- truthound_dashboard/schemas/plugins.py +1293 -0
- truthound_dashboard/schemas/profile.py +420 -34
- truthound_dashboard/schemas/profile_comparison.py +242 -0
- truthound_dashboard/schemas/reports.py +285 -0
- truthound_dashboard/schemas/rule_suggestion.py +434 -0
- truthound_dashboard/schemas/schema_evolution.py +164 -0
- truthound_dashboard/schemas/source.py +117 -2
- truthound_dashboard/schemas/triggers.py +511 -0
- truthound_dashboard/schemas/unified_alerts.py +223 -0
- truthound_dashboard/schemas/validation.py +25 -1
- truthound_dashboard/schemas/validators/__init__.py +11 -0
- truthound_dashboard/schemas/validators/base.py +151 -0
- truthound_dashboard/schemas/versioning.py +152 -0
- truthound_dashboard/static/index.html +2 -2
- {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/METADATA +142 -18
- truthound_dashboard-1.4.0.dist-info/RECORD +239 -0
- truthound_dashboard/static/assets/index-BCA8H1hO.js +0 -574
- truthound_dashboard/static/assets/index-BNsSQ2fN.css +0 -1
- truthound_dashboard/static/assets/unmerged_dictionaries-CsJWCRx9.js +0 -1
- truthound_dashboard-1.3.0.dist-info/RECORD +0 -110
- {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,890 @@
|
|
|
1
|
+
"""Drift monitoring schemas.
|
|
2
|
+
|
|
3
|
+
This module defines schemas for automatic drift monitoring operations.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from typing import Literal
|
|
10
|
+
|
|
11
|
+
from pydantic import Field
|
|
12
|
+
|
|
13
|
+
from .base import BaseSchema, IDMixin, ListResponseWrapper, TimestampMixin
|
|
14
|
+
|
|
15
|
+
# Drift monitoring status
|
|
16
|
+
DriftMonitorStatus = Literal["active", "paused", "error"]
|
|
17
|
+
|
|
18
|
+
# Drift alert severity levels
|
|
19
|
+
DriftAlertSeverity = Literal["critical", "high", "medium", "low", "info"]
|
|
20
|
+
|
|
21
|
+
# Alert status
|
|
22
|
+
AlertStatus = Literal["open", "acknowledged", "resolved", "ignored"]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class DriftMonitorBase(BaseSchema):
|
|
26
|
+
"""Base drift monitor schema."""
|
|
27
|
+
|
|
28
|
+
name: str = Field(
|
|
29
|
+
...,
|
|
30
|
+
min_length=1,
|
|
31
|
+
max_length=255,
|
|
32
|
+
description="Monitor name",
|
|
33
|
+
examples=["Daily Sales Drift Check"],
|
|
34
|
+
)
|
|
35
|
+
baseline_source_id: str = Field(
|
|
36
|
+
...,
|
|
37
|
+
description="Baseline data source ID",
|
|
38
|
+
)
|
|
39
|
+
current_source_id: str = Field(
|
|
40
|
+
...,
|
|
41
|
+
description="Current data source ID to compare",
|
|
42
|
+
)
|
|
43
|
+
cron_expression: str = Field(
|
|
44
|
+
default="0 0 * * *",
|
|
45
|
+
description="Cron expression for monitoring schedule",
|
|
46
|
+
examples=["0 0 * * *", "0 */6 * * *"],
|
|
47
|
+
)
|
|
48
|
+
method: str = Field(
|
|
49
|
+
default="auto",
|
|
50
|
+
description="Drift detection method",
|
|
51
|
+
examples=["auto", "ks", "psi", "chi2"],
|
|
52
|
+
)
|
|
53
|
+
threshold: float = Field(
|
|
54
|
+
default=0.05,
|
|
55
|
+
ge=0.0,
|
|
56
|
+
le=1.0,
|
|
57
|
+
description="Drift threshold",
|
|
58
|
+
)
|
|
59
|
+
columns: list[str] | None = Field(
|
|
60
|
+
default=None,
|
|
61
|
+
description="Specific columns to monitor (null for all)",
|
|
62
|
+
)
|
|
63
|
+
alert_on_drift: bool = Field(
|
|
64
|
+
default=True,
|
|
65
|
+
description="Whether to create alerts when drift is detected",
|
|
66
|
+
)
|
|
67
|
+
alert_threshold_critical: float = Field(
|
|
68
|
+
default=0.3,
|
|
69
|
+
ge=0.0,
|
|
70
|
+
le=1.0,
|
|
71
|
+
description="Drift percentage threshold for critical alerts",
|
|
72
|
+
)
|
|
73
|
+
alert_threshold_high: float = Field(
|
|
74
|
+
default=0.2,
|
|
75
|
+
ge=0.0,
|
|
76
|
+
le=1.0,
|
|
77
|
+
description="Drift percentage threshold for high alerts",
|
|
78
|
+
)
|
|
79
|
+
notification_channel_ids: list[str] | None = Field(
|
|
80
|
+
default=None,
|
|
81
|
+
description="Notification channel IDs for alerts",
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class DriftMonitorCreate(DriftMonitorBase):
|
|
86
|
+
"""Schema for creating a drift monitor."""
|
|
87
|
+
|
|
88
|
+
pass
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class DriftMonitorUpdate(BaseSchema):
|
|
92
|
+
"""Schema for updating a drift monitor."""
|
|
93
|
+
|
|
94
|
+
name: str | None = Field(default=None, min_length=1, max_length=255)
|
|
95
|
+
cron_expression: str | None = Field(default=None)
|
|
96
|
+
method: str | None = Field(default=None)
|
|
97
|
+
threshold: float | None = Field(default=None, ge=0.0, le=1.0)
|
|
98
|
+
columns: list[str] | None = Field(default=None)
|
|
99
|
+
alert_on_drift: bool | None = Field(default=None)
|
|
100
|
+
alert_threshold_critical: float | None = Field(default=None, ge=0.0, le=1.0)
|
|
101
|
+
alert_threshold_high: float | None = Field(default=None, ge=0.0, le=1.0)
|
|
102
|
+
notification_channel_ids: list[str] | None = Field(default=None)
|
|
103
|
+
status: DriftMonitorStatus | None = Field(default=None)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class DriftMonitorResponse(DriftMonitorBase, IDMixin, TimestampMixin):
|
|
107
|
+
"""Schema for drift monitor response."""
|
|
108
|
+
|
|
109
|
+
status: DriftMonitorStatus = Field(
|
|
110
|
+
default="active",
|
|
111
|
+
description="Monitor status",
|
|
112
|
+
)
|
|
113
|
+
last_run_at: datetime | None = Field(
|
|
114
|
+
default=None,
|
|
115
|
+
description="Last monitoring run timestamp",
|
|
116
|
+
)
|
|
117
|
+
last_drift_detected: bool | None = Field(
|
|
118
|
+
default=None,
|
|
119
|
+
description="Whether drift was detected in last run",
|
|
120
|
+
)
|
|
121
|
+
total_runs: int = Field(
|
|
122
|
+
default=0,
|
|
123
|
+
description="Total number of monitoring runs",
|
|
124
|
+
)
|
|
125
|
+
drift_detected_count: int = Field(
|
|
126
|
+
default=0,
|
|
127
|
+
description="Number of runs with drift detected",
|
|
128
|
+
)
|
|
129
|
+
consecutive_drift_count: int = Field(
|
|
130
|
+
default=0,
|
|
131
|
+
description="Number of consecutive runs with drift",
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class DriftMonitorListResponse(ListResponseWrapper):
|
|
136
|
+
"""List response for drift monitors."""
|
|
137
|
+
|
|
138
|
+
data: list[DriftMonitorResponse]
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# Drift Alert Schemas
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class DriftAlertBase(BaseSchema):
|
|
145
|
+
"""Base drift alert schema."""
|
|
146
|
+
|
|
147
|
+
monitor_id: str = Field(
|
|
148
|
+
...,
|
|
149
|
+
description="Associated drift monitor ID",
|
|
150
|
+
)
|
|
151
|
+
comparison_id: str = Field(
|
|
152
|
+
...,
|
|
153
|
+
description="Drift comparison ID that triggered the alert",
|
|
154
|
+
)
|
|
155
|
+
severity: DriftAlertSeverity = Field(
|
|
156
|
+
...,
|
|
157
|
+
description="Alert severity level",
|
|
158
|
+
)
|
|
159
|
+
drift_percentage: float = Field(
|
|
160
|
+
...,
|
|
161
|
+
ge=0.0,
|
|
162
|
+
le=100.0,
|
|
163
|
+
description="Drift percentage that triggered the alert",
|
|
164
|
+
)
|
|
165
|
+
drifted_columns: list[str] = Field(
|
|
166
|
+
default_factory=list,
|
|
167
|
+
description="List of columns with drift",
|
|
168
|
+
)
|
|
169
|
+
message: str = Field(
|
|
170
|
+
...,
|
|
171
|
+
description="Alert message",
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class DriftAlertResponse(DriftAlertBase, IDMixin, TimestampMixin):
|
|
176
|
+
"""Schema for drift alert response."""
|
|
177
|
+
|
|
178
|
+
status: AlertStatus = Field(
|
|
179
|
+
default="open",
|
|
180
|
+
description="Alert status",
|
|
181
|
+
)
|
|
182
|
+
acknowledged_at: datetime | None = Field(
|
|
183
|
+
default=None,
|
|
184
|
+
description="When the alert was acknowledged",
|
|
185
|
+
)
|
|
186
|
+
acknowledged_by: str | None = Field(
|
|
187
|
+
default=None,
|
|
188
|
+
description="User who acknowledged the alert",
|
|
189
|
+
)
|
|
190
|
+
resolved_at: datetime | None = Field(
|
|
191
|
+
default=None,
|
|
192
|
+
description="When the alert was resolved",
|
|
193
|
+
)
|
|
194
|
+
notes: str | None = Field(
|
|
195
|
+
default=None,
|
|
196
|
+
description="Notes about the alert",
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
class DriftAlertListResponse(ListResponseWrapper):
|
|
201
|
+
"""List response for drift alerts."""
|
|
202
|
+
|
|
203
|
+
data: list[DriftAlertResponse]
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
class DriftAlertUpdate(BaseSchema):
|
|
207
|
+
"""Schema for updating a drift alert."""
|
|
208
|
+
|
|
209
|
+
status: AlertStatus | None = Field(default=None)
|
|
210
|
+
notes: str | None = Field(default=None, max_length=2000)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
# Drift Trend Schemas
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
class DriftTrendPoint(BaseSchema):
|
|
217
|
+
"""Single point in drift trend data."""
|
|
218
|
+
|
|
219
|
+
timestamp: datetime
|
|
220
|
+
drift_percentage: float
|
|
221
|
+
drifted_columns: int
|
|
222
|
+
total_columns: int
|
|
223
|
+
has_drift: bool
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
class DriftTrendResponse(BaseSchema):
|
|
227
|
+
"""Drift trend over time."""
|
|
228
|
+
|
|
229
|
+
monitor_id: str
|
|
230
|
+
period_start: datetime
|
|
231
|
+
period_end: datetime
|
|
232
|
+
data_points: list[DriftTrendPoint]
|
|
233
|
+
avg_drift_percentage: float
|
|
234
|
+
max_drift_percentage: float
|
|
235
|
+
drift_occurrence_rate: float
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
# Monitor Summary Schemas
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
class DriftMonitorSummary(BaseSchema):
|
|
242
|
+
"""Summary of all drift monitors."""
|
|
243
|
+
|
|
244
|
+
total_monitors: int
|
|
245
|
+
active_monitors: int
|
|
246
|
+
paused_monitors: int
|
|
247
|
+
monitors_with_drift: int
|
|
248
|
+
total_open_alerts: int
|
|
249
|
+
critical_alerts: int
|
|
250
|
+
high_alerts: int
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
# Root Cause Analysis Types
|
|
254
|
+
RootCauseType = Literal[
|
|
255
|
+
"mean_shift",
|
|
256
|
+
"variance_change",
|
|
257
|
+
"new_categories",
|
|
258
|
+
"missing_categories",
|
|
259
|
+
"outlier_introduction",
|
|
260
|
+
"data_volume_change",
|
|
261
|
+
"temporal_pattern",
|
|
262
|
+
"distribution_shape_change",
|
|
263
|
+
"null_rate_change",
|
|
264
|
+
]
|
|
265
|
+
|
|
266
|
+
RemediationActionType = Literal[
|
|
267
|
+
"investigate_upstream",
|
|
268
|
+
"update_baseline",
|
|
269
|
+
"adjust_threshold",
|
|
270
|
+
"review_data_pipeline",
|
|
271
|
+
"check_data_source",
|
|
272
|
+
"normalize_values",
|
|
273
|
+
"filter_outliers",
|
|
274
|
+
"retrain_model",
|
|
275
|
+
"acknowledge_expected_change",
|
|
276
|
+
]
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
# Root Cause Analysis Schemas
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
class StatisticalShift(BaseSchema):
|
|
283
|
+
"""Statistical shift details for a column."""
|
|
284
|
+
|
|
285
|
+
baseline_value: float = Field(..., description="Value in baseline dataset")
|
|
286
|
+
current_value: float = Field(..., description="Value in current dataset")
|
|
287
|
+
absolute_change: float = Field(..., description="Absolute change")
|
|
288
|
+
percent_change: float = Field(..., description="Percentage change")
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
class CategoryChange(BaseSchema):
|
|
292
|
+
"""Category change details for categorical columns."""
|
|
293
|
+
|
|
294
|
+
category: str = Field(..., description="Category name")
|
|
295
|
+
baseline_count: int = Field(default=0, description="Count in baseline")
|
|
296
|
+
current_count: int = Field(default=0, description="Count in current")
|
|
297
|
+
baseline_percentage: float = Field(default=0, description="Percentage in baseline")
|
|
298
|
+
current_percentage: float = Field(default=0, description="Percentage in current")
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
class OutlierInfo(BaseSchema):
|
|
302
|
+
"""Information about detected outliers."""
|
|
303
|
+
|
|
304
|
+
count: int = Field(..., description="Number of outliers")
|
|
305
|
+
percentage: float = Field(..., description="Percentage of total")
|
|
306
|
+
sample_values: list[float | str] = Field(
|
|
307
|
+
default_factory=list, description="Sample outlier values"
|
|
308
|
+
)
|
|
309
|
+
threshold_method: str = Field(default="iqr", description="Method used to detect outliers")
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
class TemporalPattern(BaseSchema):
|
|
313
|
+
"""Temporal pattern information."""
|
|
314
|
+
|
|
315
|
+
pattern_type: str = Field(
|
|
316
|
+
...,
|
|
317
|
+
description="Type of temporal pattern",
|
|
318
|
+
examples=["weekly_seasonality", "monthly_trend", "recent_spike"],
|
|
319
|
+
)
|
|
320
|
+
affected_period: str = Field(
|
|
321
|
+
..., description="Period affected by the pattern"
|
|
322
|
+
)
|
|
323
|
+
confidence: float = Field(
|
|
324
|
+
default=0.0, ge=0.0, le=1.0, description="Confidence in pattern detection"
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
class ColumnRootCause(BaseSchema):
|
|
329
|
+
"""Root cause analysis for a single column."""
|
|
330
|
+
|
|
331
|
+
column: str = Field(..., description="Column name")
|
|
332
|
+
dtype: str = Field(..., description="Column data type")
|
|
333
|
+
drift_level: str = Field(
|
|
334
|
+
...,
|
|
335
|
+
description="Drift severity level",
|
|
336
|
+
examples=["none", "low", "medium", "high"],
|
|
337
|
+
)
|
|
338
|
+
causes: list[RootCauseType] = Field(
|
|
339
|
+
default_factory=list, description="Detected root causes"
|
|
340
|
+
)
|
|
341
|
+
primary_cause: RootCauseType | None = Field(
|
|
342
|
+
default=None, description="Primary root cause"
|
|
343
|
+
)
|
|
344
|
+
confidence: float = Field(
|
|
345
|
+
default=0.0, ge=0.0, le=1.0, description="Confidence score for analysis"
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
# Statistical shifts
|
|
349
|
+
mean_shift: StatisticalShift | None = Field(
|
|
350
|
+
default=None, description="Mean shift details"
|
|
351
|
+
)
|
|
352
|
+
std_shift: StatisticalShift | None = Field(
|
|
353
|
+
default=None, description="Standard deviation shift details"
|
|
354
|
+
)
|
|
355
|
+
min_shift: StatisticalShift | None = Field(
|
|
356
|
+
default=None, description="Min value shift details"
|
|
357
|
+
)
|
|
358
|
+
max_shift: StatisticalShift | None = Field(
|
|
359
|
+
default=None, description="Max value shift details"
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
# Categorical changes
|
|
363
|
+
new_categories: list[CategoryChange] = Field(
|
|
364
|
+
default_factory=list, description="New categories in current"
|
|
365
|
+
)
|
|
366
|
+
missing_categories: list[CategoryChange] = Field(
|
|
367
|
+
default_factory=list, description="Missing categories from baseline"
|
|
368
|
+
)
|
|
369
|
+
category_distribution_changes: list[CategoryChange] = Field(
|
|
370
|
+
default_factory=list, description="Significant distribution changes"
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
# Outliers
|
|
374
|
+
outlier_info: OutlierInfo | None = Field(
|
|
375
|
+
default=None, description="Outlier information"
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
# Temporal
|
|
379
|
+
temporal_patterns: list[TemporalPattern] = Field(
|
|
380
|
+
default_factory=list, description="Detected temporal patterns"
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
# Null rate
|
|
384
|
+
null_rate_baseline: float | None = Field(
|
|
385
|
+
default=None, description="Null rate in baseline"
|
|
386
|
+
)
|
|
387
|
+
null_rate_current: float | None = Field(
|
|
388
|
+
default=None, description="Null rate in current"
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
class RemediationSuggestion(BaseSchema):
|
|
393
|
+
"""Suggested remediation action."""
|
|
394
|
+
|
|
395
|
+
action: RemediationActionType = Field(..., description="Recommended action type")
|
|
396
|
+
priority: int = Field(
|
|
397
|
+
default=1, ge=1, le=5, description="Priority (1=highest, 5=lowest)"
|
|
398
|
+
)
|
|
399
|
+
title: str = Field(..., description="Short title for the action")
|
|
400
|
+
description: str = Field(..., description="Detailed description of the action")
|
|
401
|
+
affected_columns: list[str] = Field(
|
|
402
|
+
default_factory=list, description="Columns this action applies to"
|
|
403
|
+
)
|
|
404
|
+
estimated_impact: str = Field(
|
|
405
|
+
default="medium",
|
|
406
|
+
description="Expected impact of taking this action",
|
|
407
|
+
examples=["high", "medium", "low"],
|
|
408
|
+
)
|
|
409
|
+
requires_manual_review: bool = Field(
|
|
410
|
+
default=True, description="Whether manual review is needed"
|
|
411
|
+
)
|
|
412
|
+
automation_available: bool = Field(
|
|
413
|
+
default=False, description="Whether this action can be automated"
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
class DataVolumeChange(BaseSchema):
|
|
418
|
+
"""Data volume change summary."""
|
|
419
|
+
|
|
420
|
+
baseline_rows: int = Field(..., description="Number of rows in baseline")
|
|
421
|
+
current_rows: int = Field(..., description="Number of rows in current")
|
|
422
|
+
absolute_change: int = Field(..., description="Absolute row count change")
|
|
423
|
+
percent_change: float = Field(..., description="Percentage change in rows")
|
|
424
|
+
significance: str = Field(
|
|
425
|
+
default="normal",
|
|
426
|
+
description="Significance of volume change",
|
|
427
|
+
examples=["normal", "notable", "significant", "critical"],
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
class RootCauseAnalysis(BaseSchema):
|
|
432
|
+
"""Complete root cause analysis for a drift run."""
|
|
433
|
+
|
|
434
|
+
run_id: str = Field(..., description="Drift comparison/run ID")
|
|
435
|
+
monitor_id: str | None = Field(
|
|
436
|
+
default=None, description="Associated monitor ID if applicable"
|
|
437
|
+
)
|
|
438
|
+
analyzed_at: datetime = Field(..., description="When analysis was performed")
|
|
439
|
+
|
|
440
|
+
# Summary
|
|
441
|
+
total_columns: int = Field(..., description="Total columns analyzed")
|
|
442
|
+
drifted_columns: int = Field(..., description="Number of drifted columns")
|
|
443
|
+
drift_percentage: float = Field(
|
|
444
|
+
..., ge=0.0, le=100.0, description="Percentage of columns with drift"
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
# Volume change
|
|
448
|
+
data_volume_change: DataVolumeChange | None = Field(
|
|
449
|
+
default=None, description="Data volume change summary"
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
# Per-column analysis
|
|
453
|
+
column_analyses: list[ColumnRootCause] = Field(
|
|
454
|
+
default_factory=list, description="Root cause analysis per column"
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
# Aggregated causes
|
|
458
|
+
primary_causes: list[RootCauseType] = Field(
|
|
459
|
+
default_factory=list, description="Primary causes across all columns"
|
|
460
|
+
)
|
|
461
|
+
cause_distribution: dict[str, int] = Field(
|
|
462
|
+
default_factory=dict, description="Count of each cause type"
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
# Remediation suggestions
|
|
466
|
+
remediations: list[RemediationSuggestion] = Field(
|
|
467
|
+
default_factory=list, description="Suggested remediation actions"
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
# Confidence and metadata
|
|
471
|
+
overall_confidence: float = Field(
|
|
472
|
+
default=0.0, ge=0.0, le=1.0, description="Overall confidence in analysis"
|
|
473
|
+
)
|
|
474
|
+
analysis_duration_ms: int = Field(
|
|
475
|
+
default=0, description="Analysis duration in milliseconds"
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
class RootCauseAnalysisResponse(BaseSchema):
|
|
480
|
+
"""Response wrapper for root cause analysis."""
|
|
481
|
+
|
|
482
|
+
success: bool = Field(default=True)
|
|
483
|
+
data: RootCauseAnalysis
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
# Drift Preview Schemas
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
class DriftPreviewRequest(BaseSchema):
|
|
490
|
+
"""Request body for drift preview."""
|
|
491
|
+
|
|
492
|
+
baseline_source_id: str = Field(
|
|
493
|
+
...,
|
|
494
|
+
description="Baseline data source ID",
|
|
495
|
+
)
|
|
496
|
+
current_source_id: str = Field(
|
|
497
|
+
...,
|
|
498
|
+
description="Current data source ID to compare",
|
|
499
|
+
)
|
|
500
|
+
columns: list[str] | None = Field(
|
|
501
|
+
default=None,
|
|
502
|
+
description="Specific columns to compare (null for all)",
|
|
503
|
+
)
|
|
504
|
+
method: str = Field(
|
|
505
|
+
default="auto",
|
|
506
|
+
description="Drift detection method",
|
|
507
|
+
examples=["auto", "ks", "psi", "chi2", "js", "kl", "wasserstein", "cvm", "anderson"],
|
|
508
|
+
)
|
|
509
|
+
threshold: float | None = Field(
|
|
510
|
+
default=None,
|
|
511
|
+
ge=0.0,
|
|
512
|
+
le=1.0,
|
|
513
|
+
description="Custom drift threshold",
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
class ColumnDistributionData(BaseSchema):
|
|
518
|
+
"""Distribution data for a column."""
|
|
519
|
+
|
|
520
|
+
values: list[float] = Field(
|
|
521
|
+
default_factory=list,
|
|
522
|
+
description="Binned values for histogram",
|
|
523
|
+
)
|
|
524
|
+
bins: list[str] = Field(
|
|
525
|
+
default_factory=list,
|
|
526
|
+
description="Bin labels or category names",
|
|
527
|
+
)
|
|
528
|
+
counts: list[int] = Field(
|
|
529
|
+
default_factory=list,
|
|
530
|
+
description="Count per bin",
|
|
531
|
+
)
|
|
532
|
+
percentages: list[float] = Field(
|
|
533
|
+
default_factory=list,
|
|
534
|
+
description="Percentage per bin",
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
|
|
538
|
+
class ColumnPreviewResult(BaseSchema):
|
|
539
|
+
"""Drift preview result for a single column."""
|
|
540
|
+
|
|
541
|
+
column: str = Field(..., description="Column name")
|
|
542
|
+
dtype: str = Field(..., description="Data type")
|
|
543
|
+
drifted: bool = Field(..., description="Whether drift was detected")
|
|
544
|
+
level: str = Field(
|
|
545
|
+
default="none",
|
|
546
|
+
description="Drift level (high, medium, low, none)",
|
|
547
|
+
)
|
|
548
|
+
method: str = Field(..., description="Detection method used")
|
|
549
|
+
statistic: float | None = Field(None, description="Test statistic value")
|
|
550
|
+
p_value: float | None = Field(None, description="P-value")
|
|
551
|
+
|
|
552
|
+
# Statistics
|
|
553
|
+
baseline_stats: dict = Field(
|
|
554
|
+
default_factory=dict,
|
|
555
|
+
description="Baseline statistics (mean, std, min, max, etc.)",
|
|
556
|
+
)
|
|
557
|
+
current_stats: dict = Field(
|
|
558
|
+
default_factory=dict,
|
|
559
|
+
description="Current statistics (mean, std, min, max, etc.)",
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
# Distribution data for charts
|
|
563
|
+
baseline_distribution: ColumnDistributionData | None = Field(
|
|
564
|
+
default=None,
|
|
565
|
+
description="Baseline distribution data for visualization",
|
|
566
|
+
)
|
|
567
|
+
current_distribution: ColumnDistributionData | None = Field(
|
|
568
|
+
default=None,
|
|
569
|
+
description="Current distribution data for visualization",
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
class DriftPreviewData(BaseSchema):
|
|
574
|
+
"""Drift preview result data."""
|
|
575
|
+
|
|
576
|
+
baseline_source_id: str = Field(..., description="Baseline source ID")
|
|
577
|
+
current_source_id: str = Field(..., description="Current source ID")
|
|
578
|
+
baseline_source_name: str | None = Field(None, description="Baseline source name")
|
|
579
|
+
current_source_name: str | None = Field(None, description="Current source name")
|
|
580
|
+
|
|
581
|
+
# Summary metrics
|
|
582
|
+
has_drift: bool = Field(..., description="Whether any drift was detected")
|
|
583
|
+
has_high_drift: bool = Field(
|
|
584
|
+
default=False,
|
|
585
|
+
description="Whether high-severity drift was detected",
|
|
586
|
+
)
|
|
587
|
+
total_columns: int = Field(..., description="Total columns compared")
|
|
588
|
+
drifted_columns: int = Field(
|
|
589
|
+
default=0,
|
|
590
|
+
description="Number of columns with drift",
|
|
591
|
+
)
|
|
592
|
+
drift_percentage: float = Field(
|
|
593
|
+
default=0.0,
|
|
594
|
+
ge=0.0,
|
|
595
|
+
le=100.0,
|
|
596
|
+
description="Percentage of columns with drift",
|
|
597
|
+
)
|
|
598
|
+
|
|
599
|
+
# Row counts
|
|
600
|
+
baseline_rows: int = Field(default=0, description="Number of baseline rows")
|
|
601
|
+
current_rows: int = Field(default=0, description="Number of current rows")
|
|
602
|
+
|
|
603
|
+
# Configuration used
|
|
604
|
+
method: str = Field(default="auto", description="Detection method used")
|
|
605
|
+
threshold: float = Field(default=0.05, description="Threshold used")
|
|
606
|
+
|
|
607
|
+
# Per-column results
|
|
608
|
+
columns: list[ColumnPreviewResult] = Field(
|
|
609
|
+
default_factory=list,
|
|
610
|
+
description="Per-column drift results",
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
# Most affected columns (sorted by drift severity)
|
|
614
|
+
most_affected: list[str] = Field(
|
|
615
|
+
default_factory=list,
|
|
616
|
+
description="List of most affected columns (sorted by severity)",
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
class DriftPreviewResponse(BaseSchema):
|
|
621
|
+
"""Response for drift preview."""
|
|
622
|
+
|
|
623
|
+
success: bool = Field(default=True)
|
|
624
|
+
data: DriftPreviewData
|
|
625
|
+
|
|
626
|
+
|
|
627
|
+
# Large-Scale Dataset Optimization Schemas
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
# Sampling method types
|
|
631
|
+
SamplingMethodType = Literal["random", "stratified", "reservoir", "systematic"]
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
class SamplingConfig(BaseSchema):
|
|
635
|
+
"""Configuration for sampled drift comparison."""
|
|
636
|
+
|
|
637
|
+
method: SamplingMethodType = Field(
|
|
638
|
+
default="random",
|
|
639
|
+
description="Sampling method to use",
|
|
640
|
+
)
|
|
641
|
+
sample_size: int | None = Field(
|
|
642
|
+
default=None,
|
|
643
|
+
ge=100,
|
|
644
|
+
description="Sample size (auto-estimated if null)",
|
|
645
|
+
)
|
|
646
|
+
confidence_level: float = Field(
|
|
647
|
+
default=0.95,
|
|
648
|
+
ge=0.80,
|
|
649
|
+
le=0.99,
|
|
650
|
+
description="Target confidence level for sample size estimation",
|
|
651
|
+
)
|
|
652
|
+
margin_of_error: float = Field(
|
|
653
|
+
default=0.03,
|
|
654
|
+
ge=0.01,
|
|
655
|
+
le=0.10,
|
|
656
|
+
description="Acceptable margin of error",
|
|
657
|
+
)
|
|
658
|
+
strata_column: str | None = Field(
|
|
659
|
+
default=None,
|
|
660
|
+
description="Column for stratified sampling",
|
|
661
|
+
)
|
|
662
|
+
seed: int | None = Field(
|
|
663
|
+
default=None,
|
|
664
|
+
description="Random seed for reproducibility",
|
|
665
|
+
)
|
|
666
|
+
early_stop_threshold: float = Field(
|
|
667
|
+
default=0.5,
|
|
668
|
+
ge=0.1,
|
|
669
|
+
le=1.0,
|
|
670
|
+
description="Proportion of drifted columns to trigger early stopping",
|
|
671
|
+
)
|
|
672
|
+
max_workers: int = Field(
|
|
673
|
+
default=4,
|
|
674
|
+
ge=1,
|
|
675
|
+
le=16,
|
|
676
|
+
description="Maximum parallel workers for column comparison",
|
|
677
|
+
)
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
class SampledComparisonRequest(BaseSchema):
|
|
681
|
+
"""Request for sampled drift comparison."""
|
|
682
|
+
|
|
683
|
+
monitor_id: str = Field(..., description="Monitor ID to run with sampling")
|
|
684
|
+
sampling: SamplingConfig = Field(
|
|
685
|
+
default_factory=SamplingConfig,
|
|
686
|
+
description="Sampling configuration",
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
|
|
690
|
+
class SampleSizeEstimate(BaseSchema):
|
|
691
|
+
"""Estimated sample size for drift detection."""
|
|
692
|
+
|
|
693
|
+
recommended_size: int = Field(
|
|
694
|
+
..., description="Recommended sample size for target confidence"
|
|
695
|
+
)
|
|
696
|
+
min_size: int = Field(
|
|
697
|
+
..., description="Minimum sample size for basic detection"
|
|
698
|
+
)
|
|
699
|
+
max_size: int = Field(
|
|
700
|
+
..., description="Maximum useful sample size (diminishing returns beyond)"
|
|
701
|
+
)
|
|
702
|
+
confidence_level: float = Field(
|
|
703
|
+
..., description="Target confidence level"
|
|
704
|
+
)
|
|
705
|
+
margin_of_error: float = Field(
|
|
706
|
+
..., description="Expected margin of error at recommended size"
|
|
707
|
+
)
|
|
708
|
+
estimated_time_seconds: float = Field(
|
|
709
|
+
..., description="Estimated processing time in seconds"
|
|
710
|
+
)
|
|
711
|
+
memory_mb: float = Field(
|
|
712
|
+
..., description="Estimated memory usage in MB"
|
|
713
|
+
)
|
|
714
|
+
|
|
715
|
+
|
|
716
|
+
class SpeedupOption(BaseSchema):
|
|
717
|
+
"""Speedup option for different sample sizes."""
|
|
718
|
+
|
|
719
|
+
sample_size: int = Field(..., description="Sample size for this option")
|
|
720
|
+
speedup_factor: float = Field(..., description="Expected speedup factor")
|
|
721
|
+
estimated_time_seconds: float = Field(..., description="Estimated time in seconds")
|
|
722
|
+
|
|
723
|
+
|
|
724
|
+
class DatasetInfo(BaseSchema):
|
|
725
|
+
"""Information about dataset sizes."""
|
|
726
|
+
|
|
727
|
+
baseline_rows: int = Field(..., description="Number of rows in baseline")
|
|
728
|
+
current_rows: int = Field(..., description="Number of rows in current")
|
|
729
|
+
population_size: int = Field(..., description="Larger of baseline/current rows")
|
|
730
|
+
is_large_dataset: bool = Field(..., description="Whether dataset exceeds threshold")
|
|
731
|
+
large_dataset_threshold: int = Field(..., description="Row count threshold")
|
|
732
|
+
|
|
733
|
+
|
|
734
|
+
class SamplingRecommendation(BaseSchema):
|
|
735
|
+
"""Sampling recommendation for a dataset."""
|
|
736
|
+
|
|
737
|
+
sampling_recommended: bool = Field(..., description="Whether sampling is recommended")
|
|
738
|
+
reason: str = Field(..., description="Reason for recommendation")
|
|
739
|
+
|
|
740
|
+
|
|
741
|
+
class SamplingMethod(BaseSchema):
|
|
742
|
+
"""Description of a sampling method."""
|
|
743
|
+
|
|
744
|
+
method: str = Field(..., description="Method identifier")
|
|
745
|
+
description: str = Field(..., description="Method description")
|
|
746
|
+
best_for: str = Field(..., description="Ideal use case")
|
|
747
|
+
|
|
748
|
+
|
|
749
|
+
class SampleSizeEstimateResponse(BaseSchema):
|
|
750
|
+
"""Response for sample size estimation."""
|
|
751
|
+
|
|
752
|
+
baseline_source_id: str
|
|
753
|
+
current_source_id: str
|
|
754
|
+
dataset_info: DatasetInfo
|
|
755
|
+
sampling_recommendation: SamplingRecommendation
|
|
756
|
+
sample_size_estimate: SampleSizeEstimate
|
|
757
|
+
performance_estimates: dict = Field(
|
|
758
|
+
..., description="Performance estimates with speedup options"
|
|
759
|
+
)
|
|
760
|
+
available_methods: list[SamplingMethod]
|
|
761
|
+
|
|
762
|
+
|
|
763
|
+
class ChunkedComparisonProgress(BaseSchema):
|
|
764
|
+
"""Progress tracking for chunked comparison operations."""
|
|
765
|
+
|
|
766
|
+
total_chunks: int = Field(..., description="Total number of chunks to process")
|
|
767
|
+
processed_chunks: int = Field(..., description="Number of chunks processed")
|
|
768
|
+
total_rows: int = Field(..., description="Total rows to process")
|
|
769
|
+
processed_rows: int = Field(..., description="Rows processed so far")
|
|
770
|
+
current_chunk: int = Field(..., description="Current chunk being processed")
|
|
771
|
+
percentage: float = Field(
|
|
772
|
+
default=0.0,
|
|
773
|
+
ge=0.0,
|
|
774
|
+
le=100.0,
|
|
775
|
+
description="Completion percentage",
|
|
776
|
+
)
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
class ChunkedComparisonTiming(BaseSchema):
|
|
780
|
+
"""Timing information for chunked comparison."""
|
|
781
|
+
|
|
782
|
+
elapsed_seconds: float = Field(..., description="Time elapsed since start")
|
|
783
|
+
estimated_remaining_seconds: float = Field(..., description="Estimated time remaining")
|
|
784
|
+
|
|
785
|
+
|
|
786
|
+
class ChunkedComparisonInterimResults(BaseSchema):
|
|
787
|
+
"""Interim results for chunked comparison."""
|
|
788
|
+
|
|
789
|
+
columns_with_drift: list[str] = Field(
|
|
790
|
+
default_factory=list,
|
|
791
|
+
description="Columns detected with drift so far",
|
|
792
|
+
)
|
|
793
|
+
early_stop_triggered: bool = Field(
|
|
794
|
+
default=False,
|
|
795
|
+
description="Whether early stopping was triggered",
|
|
796
|
+
)
|
|
797
|
+
|
|
798
|
+
|
|
799
|
+
class JobProgressResponse(BaseSchema):
|
|
800
|
+
"""Response for job progress query."""
|
|
801
|
+
|
|
802
|
+
job_id: str = Field(..., description="Job identifier")
|
|
803
|
+
status: str = Field(
|
|
804
|
+
...,
|
|
805
|
+
description="Job status (running, completed, cancelled, error)",
|
|
806
|
+
)
|
|
807
|
+
progress: ChunkedComparisonProgress
|
|
808
|
+
timing: ChunkedComparisonTiming
|
|
809
|
+
interim_results: ChunkedComparisonInterimResults
|
|
810
|
+
|
|
811
|
+
|
|
812
|
+
class SamplingInfo(BaseSchema):
|
|
813
|
+
"""Sampling information for comparison result."""
|
|
814
|
+
|
|
815
|
+
method: str = Field(..., description="Sampling method used")
|
|
816
|
+
sample_size: int = Field(..., description="Sample size used")
|
|
817
|
+
confidence_level: float = Field(..., description="Confidence level")
|
|
818
|
+
population_baseline: int = Field(..., description="Baseline population size")
|
|
819
|
+
population_current: int = Field(..., description="Current population size")
|
|
820
|
+
|
|
821
|
+
|
|
822
|
+
class ProcessingInfo(BaseSchema):
|
|
823
|
+
"""Processing information for comparison result."""
|
|
824
|
+
|
|
825
|
+
num_chunks: int = Field(..., description="Number of chunks processed")
|
|
826
|
+
total_chunks_planned: int = Field(..., description="Total chunks planned")
|
|
827
|
+
early_stopped: bool = Field(..., description="Whether early stopped")
|
|
828
|
+
parallel_workers: int = Field(..., description="Number of parallel workers")
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
class ComparisonResults(BaseSchema):
|
|
832
|
+
"""Results of sampled comparison."""
|
|
833
|
+
|
|
834
|
+
has_drift: bool = Field(..., description="Whether drift was detected")
|
|
835
|
+
total_columns: int = Field(..., description="Total columns compared")
|
|
836
|
+
drifted_columns: int = Field(..., description="Number of drifted columns")
|
|
837
|
+
drifted_column_names: list[str] = Field(
|
|
838
|
+
default_factory=list,
|
|
839
|
+
description="Names of drifted columns",
|
|
840
|
+
)
|
|
841
|
+
drift_percentage: float = Field(
|
|
842
|
+
default=0.0,
|
|
843
|
+
ge=0.0,
|
|
844
|
+
le=100.0,
|
|
845
|
+
description="Percentage of columns with drift",
|
|
846
|
+
)
|
|
847
|
+
|
|
848
|
+
|
|
849
|
+
class PerformanceMetrics(BaseSchema):
|
|
850
|
+
"""Performance metrics for comparison."""
|
|
851
|
+
|
|
852
|
+
total_time_seconds: float = Field(..., description="Total processing time")
|
|
853
|
+
estimated_time_seconds: float = Field(..., description="Originally estimated time")
|
|
854
|
+
estimated_memory_mb: float = Field(..., description="Estimated memory usage")
|
|
855
|
+
speedup_factor: float = Field(..., description="Speedup vs full dataset")
|
|
856
|
+
|
|
857
|
+
|
|
858
|
+
class ChunkDetail(BaseSchema):
|
|
859
|
+
"""Details for a single processed chunk."""
|
|
860
|
+
|
|
861
|
+
chunk_index: int = Field(..., description="Chunk index")
|
|
862
|
+
rows_processed: int = Field(..., description="Rows in this chunk")
|
|
863
|
+
drifted_columns: list[str] = Field(
|
|
864
|
+
default_factory=list,
|
|
865
|
+
description="Columns with drift in this chunk",
|
|
866
|
+
)
|
|
867
|
+
processing_time_seconds: float = Field(..., description="Time to process chunk")
|
|
868
|
+
|
|
869
|
+
|
|
870
|
+
class SampledComparisonResult(BaseSchema):
|
|
871
|
+
"""Complete result of sampled comparison."""
|
|
872
|
+
|
|
873
|
+
job_id: str = Field(..., description="Job identifier")
|
|
874
|
+
monitor_id: str = Field(..., description="Monitor identifier")
|
|
875
|
+
status: str = Field(..., description="Completion status")
|
|
876
|
+
sampling: SamplingInfo
|
|
877
|
+
processing: ProcessingInfo
|
|
878
|
+
results: ComparisonResults
|
|
879
|
+
performance: PerformanceMetrics
|
|
880
|
+
chunk_details: list[ChunkDetail] = Field(
|
|
881
|
+
default_factory=list,
|
|
882
|
+
description="Details for each processed chunk",
|
|
883
|
+
)
|
|
884
|
+
|
|
885
|
+
|
|
886
|
+
class SampledComparisonResponse(BaseSchema):
|
|
887
|
+
"""Response for sampled comparison."""
|
|
888
|
+
|
|
889
|
+
success: bool = Field(default=True)
|
|
890
|
+
data: SampledComparisonResult
|