truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +75 -86
- truthound_dashboard/api/anomaly.py +7 -13
- truthound_dashboard/api/cross_alerts.py +38 -52
- truthound_dashboard/api/drift.py +49 -59
- truthound_dashboard/api/drift_monitor.py +234 -79
- truthound_dashboard/api/enterprise_sampling.py +498 -0
- truthound_dashboard/api/history.py +57 -5
- truthound_dashboard/api/lineage.py +3 -48
- truthound_dashboard/api/maintenance.py +104 -49
- truthound_dashboard/api/mask.py +1 -2
- truthound_dashboard/api/middleware.py +2 -1
- truthound_dashboard/api/model_monitoring.py +435 -311
- truthound_dashboard/api/notifications.py +227 -191
- truthound_dashboard/api/notifications_advanced.py +21 -20
- truthound_dashboard/api/observability.py +586 -0
- truthound_dashboard/api/plugins.py +2 -433
- truthound_dashboard/api/profile.py +199 -37
- truthound_dashboard/api/quality_reporter.py +701 -0
- truthound_dashboard/api/reports.py +7 -16
- truthound_dashboard/api/router.py +66 -0
- truthound_dashboard/api/rule_suggestions.py +5 -5
- truthound_dashboard/api/scan.py +17 -19
- truthound_dashboard/api/schedules.py +85 -50
- truthound_dashboard/api/schema_evolution.py +6 -6
- truthound_dashboard/api/schema_watcher.py +667 -0
- truthound_dashboard/api/sources.py +98 -27
- truthound_dashboard/api/tiering.py +1323 -0
- truthound_dashboard/api/triggers.py +14 -11
- truthound_dashboard/api/validations.py +12 -11
- truthound_dashboard/api/versioning.py +1 -6
- truthound_dashboard/core/__init__.py +129 -3
- truthound_dashboard/core/actions/__init__.py +62 -0
- truthound_dashboard/core/actions/custom.py +426 -0
- truthound_dashboard/core/actions/notifications.py +910 -0
- truthound_dashboard/core/actions/storage.py +472 -0
- truthound_dashboard/core/actions/webhook.py +281 -0
- truthound_dashboard/core/anomaly.py +262 -67
- truthound_dashboard/core/anomaly_explainer.py +4 -3
- truthound_dashboard/core/backends/__init__.py +67 -0
- truthound_dashboard/core/backends/base.py +299 -0
- truthound_dashboard/core/backends/errors.py +191 -0
- truthound_dashboard/core/backends/factory.py +423 -0
- truthound_dashboard/core/backends/mock_backend.py +451 -0
- truthound_dashboard/core/backends/truthound_backend.py +718 -0
- truthound_dashboard/core/checkpoint/__init__.py +87 -0
- truthound_dashboard/core/checkpoint/adapters.py +814 -0
- truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
- truthound_dashboard/core/checkpoint/runner.py +270 -0
- truthound_dashboard/core/connections.py +645 -23
- truthound_dashboard/core/converters/__init__.py +14 -0
- truthound_dashboard/core/converters/truthound.py +620 -0
- truthound_dashboard/core/cross_alerts.py +540 -320
- truthound_dashboard/core/datasource_factory.py +1672 -0
- truthound_dashboard/core/drift_monitor.py +216 -20
- truthound_dashboard/core/enterprise_sampling.py +1291 -0
- truthound_dashboard/core/interfaces/__init__.py +225 -0
- truthound_dashboard/core/interfaces/actions.py +652 -0
- truthound_dashboard/core/interfaces/base.py +247 -0
- truthound_dashboard/core/interfaces/checkpoint.py +676 -0
- truthound_dashboard/core/interfaces/protocols.py +664 -0
- truthound_dashboard/core/interfaces/reporters.py +650 -0
- truthound_dashboard/core/interfaces/routing.py +646 -0
- truthound_dashboard/core/interfaces/triggers.py +619 -0
- truthound_dashboard/core/lineage.py +407 -71
- truthound_dashboard/core/model_monitoring.py +431 -3
- truthound_dashboard/core/notifications/base.py +4 -0
- truthound_dashboard/core/notifications/channels.py +501 -1203
- truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
- truthound_dashboard/core/notifications/deduplication/service.py +131 -348
- truthound_dashboard/core/notifications/dispatcher.py +202 -11
- truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
- truthound_dashboard/core/notifications/escalation/engine.py +168 -358
- truthound_dashboard/core/notifications/routing/__init__.py +88 -128
- truthound_dashboard/core/notifications/routing/engine.py +90 -317
- truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
- truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
- truthound_dashboard/core/notifications/throttling/builder.py +117 -255
- truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
- truthound_dashboard/core/phase5/collaboration.py +1 -1
- truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
- truthound_dashboard/core/quality_reporter.py +1359 -0
- truthound_dashboard/core/report_history.py +0 -6
- truthound_dashboard/core/reporters/__init__.py +175 -14
- truthound_dashboard/core/reporters/adapters.py +943 -0
- truthound_dashboard/core/reporters/base.py +0 -3
- truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
- truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
- truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
- truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
- truthound_dashboard/core/reporters/compat.py +266 -0
- truthound_dashboard/core/reporters/csv_reporter.py +2 -35
- truthound_dashboard/core/reporters/factory.py +526 -0
- truthound_dashboard/core/reporters/interfaces.py +745 -0
- truthound_dashboard/core/reporters/registry.py +1 -10
- truthound_dashboard/core/scheduler.py +165 -0
- truthound_dashboard/core/schema_evolution.py +3 -3
- truthound_dashboard/core/schema_watcher.py +1528 -0
- truthound_dashboard/core/services.py +595 -76
- truthound_dashboard/core/store_manager.py +810 -0
- truthound_dashboard/core/streaming_anomaly.py +169 -4
- truthound_dashboard/core/tiering.py +1309 -0
- truthound_dashboard/core/triggers/evaluators.py +178 -8
- truthound_dashboard/core/truthound_adapter.py +2620 -197
- truthound_dashboard/core/unified_alerts.py +23 -20
- truthound_dashboard/db/__init__.py +8 -0
- truthound_dashboard/db/database.py +8 -2
- truthound_dashboard/db/models.py +944 -25
- truthound_dashboard/db/repository.py +2 -0
- truthound_dashboard/main.py +15 -0
- truthound_dashboard/schemas/__init__.py +177 -16
- truthound_dashboard/schemas/base.py +44 -23
- truthound_dashboard/schemas/collaboration.py +19 -6
- truthound_dashboard/schemas/cross_alerts.py +19 -3
- truthound_dashboard/schemas/drift.py +61 -55
- truthound_dashboard/schemas/drift_monitor.py +67 -23
- truthound_dashboard/schemas/enterprise_sampling.py +653 -0
- truthound_dashboard/schemas/lineage.py +0 -33
- truthound_dashboard/schemas/mask.py +10 -8
- truthound_dashboard/schemas/model_monitoring.py +89 -10
- truthound_dashboard/schemas/notifications_advanced.py +13 -0
- truthound_dashboard/schemas/observability.py +453 -0
- truthound_dashboard/schemas/plugins.py +0 -280
- truthound_dashboard/schemas/profile.py +154 -247
- truthound_dashboard/schemas/quality_reporter.py +403 -0
- truthound_dashboard/schemas/reports.py +2 -2
- truthound_dashboard/schemas/rule_suggestion.py +8 -1
- truthound_dashboard/schemas/scan.py +4 -24
- truthound_dashboard/schemas/schedule.py +11 -3
- truthound_dashboard/schemas/schema_watcher.py +727 -0
- truthound_dashboard/schemas/source.py +17 -2
- truthound_dashboard/schemas/tiering.py +822 -0
- truthound_dashboard/schemas/triggers.py +16 -0
- truthound_dashboard/schemas/unified_alerts.py +7 -0
- truthound_dashboard/schemas/validation.py +0 -13
- truthound_dashboard/schemas/validators/base.py +41 -21
- truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
- truthound_dashboard/schemas/validators/localization_validators.py +273 -0
- truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
- truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
- truthound_dashboard/schemas/validators/referential_validators.py +312 -0
- truthound_dashboard/schemas/validators/registry.py +93 -8
- truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
- truthound_dashboard/schemas/versioning.py +1 -6
- truthound_dashboard/static/index.html +2 -2
- truthound_dashboard-1.5.1.dist-info/METADATA +312 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/RECORD +149 -148
- truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
- truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
- truthound_dashboard/core/plugins/hooks/manager.py +0 -403
- truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
- truthound_dashboard/core/reporters/junit_reporter.py +0 -233
- truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
- truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
- truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
- truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
- truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
- truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
- truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
- truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
- truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
- truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
- truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
- truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
- truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
- truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
- truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
- truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
- truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
- truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
- truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
- truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
- truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
- truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
- truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
- truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
- truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
- truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
- truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
- truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
- truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
- truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
- truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
- truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
- truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
- truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
- truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
- truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
- truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
- truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
- truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
- truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
- truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
- truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
- truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
- truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
- truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
- truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
- truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
- truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
- truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
- truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -730,6 +730,91 @@ class ModelMonitoringService:
|
|
|
730
730
|
"""Delete an alert handler."""
|
|
731
731
|
return await self.handler_repo.delete(handler_id)
|
|
732
732
|
|
|
733
|
+
async def test_alert_handler(
|
|
734
|
+
self,
|
|
735
|
+
handler_id: str,
|
|
736
|
+
) -> dict[str, Any]:
|
|
737
|
+
"""Test an alert handler by sending a test notification.
|
|
738
|
+
|
|
739
|
+
Args:
|
|
740
|
+
handler_id: Handler ID.
|
|
741
|
+
|
|
742
|
+
Returns:
|
|
743
|
+
Test result with success status and message.
|
|
744
|
+
|
|
745
|
+
Raises:
|
|
746
|
+
ValueError: If handler not found.
|
|
747
|
+
"""
|
|
748
|
+
handler = await self.handler_repo.get_by_id(handler_id)
|
|
749
|
+
if handler is None:
|
|
750
|
+
raise ValueError(f"Handler '{handler_id}' not found")
|
|
751
|
+
|
|
752
|
+
handler_type = handler.handler_type
|
|
753
|
+
config = handler.config or {}
|
|
754
|
+
|
|
755
|
+
# Simulate test based on handler type
|
|
756
|
+
test_result = {
|
|
757
|
+
"handler_id": handler_id,
|
|
758
|
+
"handler_type": handler_type,
|
|
759
|
+
"success": False,
|
|
760
|
+
"message": "",
|
|
761
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
try:
|
|
765
|
+
if handler_type == "webhook":
|
|
766
|
+
# For webhook, we would normally send a test request
|
|
767
|
+
# For safety, we just validate the config
|
|
768
|
+
url = config.get("url")
|
|
769
|
+
if not url:
|
|
770
|
+
test_result["message"] = "Webhook URL not configured"
|
|
771
|
+
elif not url.startswith(("http://", "https://")):
|
|
772
|
+
test_result["message"] = "Invalid webhook URL format"
|
|
773
|
+
else:
|
|
774
|
+
test_result["success"] = True
|
|
775
|
+
test_result["message"] = f"Webhook configuration valid. URL: {url}"
|
|
776
|
+
|
|
777
|
+
elif handler_type == "email":
|
|
778
|
+
recipients = config.get("recipients", [])
|
|
779
|
+
if not recipients:
|
|
780
|
+
test_result["message"] = "No email recipients configured"
|
|
781
|
+
else:
|
|
782
|
+
test_result["success"] = True
|
|
783
|
+
test_result["message"] = f"Email configuration valid. Recipients: {len(recipients)}"
|
|
784
|
+
|
|
785
|
+
elif handler_type == "slack":
|
|
786
|
+
webhook_url = config.get("webhook_url") or config.get("url")
|
|
787
|
+
if not webhook_url:
|
|
788
|
+
test_result["message"] = "Slack webhook URL not configured"
|
|
789
|
+
else:
|
|
790
|
+
test_result["success"] = True
|
|
791
|
+
test_result["message"] = "Slack configuration valid"
|
|
792
|
+
|
|
793
|
+
elif handler_type == "pagerduty":
|
|
794
|
+
integration_key = config.get("integration_key") or config.get("routing_key")
|
|
795
|
+
if not integration_key:
|
|
796
|
+
test_result["message"] = "PagerDuty integration key not configured"
|
|
797
|
+
else:
|
|
798
|
+
test_result["success"] = True
|
|
799
|
+
test_result["message"] = "PagerDuty configuration valid"
|
|
800
|
+
|
|
801
|
+
elif handler_type == "opsgenie":
|
|
802
|
+
api_key = config.get("api_key")
|
|
803
|
+
if not api_key:
|
|
804
|
+
test_result["message"] = "OpsGenie API key not configured"
|
|
805
|
+
else:
|
|
806
|
+
test_result["success"] = True
|
|
807
|
+
test_result["message"] = "OpsGenie configuration valid"
|
|
808
|
+
|
|
809
|
+
else:
|
|
810
|
+
test_result["success"] = True
|
|
811
|
+
test_result["message"] = f"Handler type '{handler_type}' configuration accepted"
|
|
812
|
+
|
|
813
|
+
except Exception as e:
|
|
814
|
+
test_result["message"] = f"Test failed: {str(e)}"
|
|
815
|
+
|
|
816
|
+
return test_result
|
|
817
|
+
|
|
733
818
|
# =========================================================================
|
|
734
819
|
# Alerts
|
|
735
820
|
# =========================================================================
|
|
@@ -893,21 +978,36 @@ class ModelMonitoringService:
|
|
|
893
978
|
|
|
894
979
|
elif rule_type == "statistical":
|
|
895
980
|
# Statistical anomaly detection based on standard deviations
|
|
981
|
+
# Maps to truthound.ml.monitoring.alerting.AnomalyRule
|
|
896
982
|
metric_name = config.get("metric_name", "latency_ms")
|
|
897
983
|
std_devs = config.get("std_devs", 3.0)
|
|
984
|
+
window_size = config.get("window_size", 100)
|
|
898
985
|
|
|
899
986
|
for m in metrics.get("metrics", []):
|
|
900
987
|
if m.get("name") == metric_name:
|
|
901
988
|
avg = m.get("avg_value")
|
|
902
989
|
p95 = m.get("p95_value")
|
|
990
|
+
count = m.get("count", 0)
|
|
991
|
+
|
|
992
|
+
# Only evaluate if we have enough samples
|
|
993
|
+
if count < window_size:
|
|
994
|
+
break
|
|
995
|
+
|
|
903
996
|
if avg and p95:
|
|
904
|
-
#
|
|
905
|
-
|
|
906
|
-
|
|
997
|
+
# AnomalyRule: if p95 is more than std_devs above the mean
|
|
998
|
+
threshold_value = avg * (1 + std_devs * 0.1)
|
|
999
|
+
if p95 > threshold_value:
|
|
1000
|
+
return True, p95, threshold_value
|
|
907
1001
|
break
|
|
908
1002
|
|
|
909
1003
|
return False, None, None
|
|
910
1004
|
|
|
1005
|
+
elif rule_type == "trend":
|
|
1006
|
+
# Trend-based alerting - evaluated async separately
|
|
1007
|
+
# This is a placeholder; actual evaluation done in evaluate_trend_rule()
|
|
1008
|
+
# Return False here as trend rules need historical data
|
|
1009
|
+
return False, None, None
|
|
1010
|
+
|
|
911
1011
|
return False, None, None
|
|
912
1012
|
|
|
913
1013
|
# =========================================================================
|
|
@@ -1041,3 +1141,331 @@ class ModelMonitoringService:
|
|
|
1041
1141
|
"created_at": alert.created_at.isoformat() if alert.created_at else None,
|
|
1042
1142
|
"updated_at": alert.updated_at.isoformat() if alert.updated_at else None,
|
|
1043
1143
|
}
|
|
1144
|
+
|
|
1145
|
+
# =========================================================================
|
|
1146
|
+
# Truthound Integration - Drift Detection
|
|
1147
|
+
# =========================================================================
|
|
1148
|
+
|
|
1149
|
+
async def compute_drift_score(
|
|
1150
|
+
self,
|
|
1151
|
+
model_id: str,
|
|
1152
|
+
reference_data: Any,
|
|
1153
|
+
current_data: Any,
|
|
1154
|
+
*,
|
|
1155
|
+
method: str = "auto",
|
|
1156
|
+
columns: list[str] | None = None,
|
|
1157
|
+
) -> dict[str, Any]:
|
|
1158
|
+
"""Compute drift score using truthound th.compare().
|
|
1159
|
+
|
|
1160
|
+
Uses truthound's drift detection methods:
|
|
1161
|
+
- auto: Auto-select best method based on column type
|
|
1162
|
+
- psi: Population Stability Index
|
|
1163
|
+
- ks: Kolmogorov-Smirnov test
|
|
1164
|
+
- js: Jensen-Shannon divergence
|
|
1165
|
+
- wasserstein: Earth Mover's Distance
|
|
1166
|
+
- chi2: Chi-squared (categorical)
|
|
1167
|
+
- kl: Kullback-Leibler divergence
|
|
1168
|
+
- cvm: Cramér-von Mises test
|
|
1169
|
+
- anderson: Anderson-Darling test
|
|
1170
|
+
- hellinger: Hellinger distance
|
|
1171
|
+
- energy: Energy distance
|
|
1172
|
+
- mmd: Maximum Mean Discrepancy
|
|
1173
|
+
|
|
1174
|
+
Args:
|
|
1175
|
+
model_id: Model ID to update.
|
|
1176
|
+
reference_data: Reference/baseline dataset.
|
|
1177
|
+
current_data: Current dataset to compare.
|
|
1178
|
+
method: Drift detection method (default: auto).
|
|
1179
|
+
columns: Specific columns to check (default: all).
|
|
1180
|
+
|
|
1181
|
+
Returns:
|
|
1182
|
+
Drift detection result with per-column scores.
|
|
1183
|
+
"""
|
|
1184
|
+
import truthound as th
|
|
1185
|
+
|
|
1186
|
+
model = await self.model_repo.get_by_id(model_id)
|
|
1187
|
+
if model is None:
|
|
1188
|
+
raise ValueError(f"Model '{model_id}' not found")
|
|
1189
|
+
|
|
1190
|
+
# Get method from model config if not specified
|
|
1191
|
+
if method == "auto":
|
|
1192
|
+
config = model.config or {}
|
|
1193
|
+
method = config.get("drift_method", "auto")
|
|
1194
|
+
|
|
1195
|
+
# Run drift detection
|
|
1196
|
+
drift_result = th.compare(
|
|
1197
|
+
reference_data,
|
|
1198
|
+
current_data,
|
|
1199
|
+
method=method,
|
|
1200
|
+
columns=columns,
|
|
1201
|
+
)
|
|
1202
|
+
|
|
1203
|
+
# Calculate overall drift score
|
|
1204
|
+
if drift_result.has_drift:
|
|
1205
|
+
# Get max drift score across columns
|
|
1206
|
+
max_score = 0.0
|
|
1207
|
+
drifted_columns = []
|
|
1208
|
+
column_scores = {}
|
|
1209
|
+
|
|
1210
|
+
for col in drift_result.columns:
|
|
1211
|
+
score = col.result.statistic if hasattr(col.result, "statistic") else 0.0
|
|
1212
|
+
column_scores[col.column] = score
|
|
1213
|
+
if col.result.drifted:
|
|
1214
|
+
drifted_columns.append(col.column)
|
|
1215
|
+
if score > max_score:
|
|
1216
|
+
max_score = score
|
|
1217
|
+
|
|
1218
|
+
overall_score = max_score
|
|
1219
|
+
else:
|
|
1220
|
+
overall_score = 0.0
|
|
1221
|
+
drifted_columns = []
|
|
1222
|
+
column_scores = {}
|
|
1223
|
+
for col in drift_result.columns:
|
|
1224
|
+
score = col.result.statistic if hasattr(col.result, "statistic") else 0.0
|
|
1225
|
+
column_scores[col.column] = score
|
|
1226
|
+
|
|
1227
|
+
# Update model drift score
|
|
1228
|
+
model.update_drift_score(overall_score)
|
|
1229
|
+
|
|
1230
|
+
# Get drift threshold from config
|
|
1231
|
+
config = model.config or {}
|
|
1232
|
+
drift_threshold = config.get("drift_threshold", 0.1)
|
|
1233
|
+
|
|
1234
|
+
# Create alert if drift exceeds threshold
|
|
1235
|
+
if overall_score > drift_threshold:
|
|
1236
|
+
# Find or create drift alert rule
|
|
1237
|
+
rules = await self.rule_repo.get_by_model_id(model_id, active_only=True)
|
|
1238
|
+
drift_rule = next(
|
|
1239
|
+
(r for r in rules if "drift" in r.name.lower()),
|
|
1240
|
+
None
|
|
1241
|
+
)
|
|
1242
|
+
|
|
1243
|
+
if drift_rule:
|
|
1244
|
+
await self.create_alert(
|
|
1245
|
+
model_id=model_id,
|
|
1246
|
+
rule_id=drift_rule.id,
|
|
1247
|
+
message=f"Drift detected: score={overall_score:.3f} exceeds threshold={drift_threshold:.3f}. "
|
|
1248
|
+
f"Drifted columns: {', '.join(drifted_columns)}",
|
|
1249
|
+
severity="warning" if overall_score < 0.25 else "critical",
|
|
1250
|
+
metric_value=overall_score,
|
|
1251
|
+
threshold_value=drift_threshold,
|
|
1252
|
+
)
|
|
1253
|
+
|
|
1254
|
+
await self.session.flush()
|
|
1255
|
+
|
|
1256
|
+
return {
|
|
1257
|
+
"model_id": model_id,
|
|
1258
|
+
"method": method,
|
|
1259
|
+
"has_drift": drift_result.has_drift,
|
|
1260
|
+
"overall_score": overall_score,
|
|
1261
|
+
"drift_threshold": drift_threshold,
|
|
1262
|
+
"drifted_columns": drifted_columns,
|
|
1263
|
+
"column_scores": column_scores,
|
|
1264
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
1265
|
+
}
|
|
1266
|
+
|
|
1267
|
+
async def compute_quality_metrics(
|
|
1268
|
+
self,
|
|
1269
|
+
model_id: str,
|
|
1270
|
+
hours: int = 24,
|
|
1271
|
+
) -> dict[str, Any]:
|
|
1272
|
+
"""Compute quality metrics from predictions with actual values.
|
|
1273
|
+
|
|
1274
|
+
Calculates accuracy, precision, recall, F1 for classification models,
|
|
1275
|
+
or MAE, MSE, RMSE for regression models.
|
|
1276
|
+
|
|
1277
|
+
Args:
|
|
1278
|
+
model_id: Model ID.
|
|
1279
|
+
hours: Time range in hours.
|
|
1280
|
+
|
|
1281
|
+
Returns:
|
|
1282
|
+
Quality metrics dictionary.
|
|
1283
|
+
"""
|
|
1284
|
+
model = await self.model_repo.get_by_id(model_id)
|
|
1285
|
+
if model is None:
|
|
1286
|
+
raise ValueError(f"Model '{model_id}' not found")
|
|
1287
|
+
|
|
1288
|
+
# Check if quality metrics are enabled
|
|
1289
|
+
config = model.config or {}
|
|
1290
|
+
if not config.get("enable_quality_metrics", True):
|
|
1291
|
+
return {
|
|
1292
|
+
"model_id": model_id,
|
|
1293
|
+
"enabled": False,
|
|
1294
|
+
"message": "Quality metrics are disabled for this model",
|
|
1295
|
+
}
|
|
1296
|
+
|
|
1297
|
+
cutoff = datetime.utcnow() - timedelta(hours=hours)
|
|
1298
|
+
predictions = await self.prediction_repo.get_by_model_id(
|
|
1299
|
+
model_id, limit=10000, since=cutoff
|
|
1300
|
+
)
|
|
1301
|
+
|
|
1302
|
+
# Filter predictions with actual values
|
|
1303
|
+
with_actuals = [
|
|
1304
|
+
p for p in predictions
|
|
1305
|
+
if p.actual is not None
|
|
1306
|
+
]
|
|
1307
|
+
|
|
1308
|
+
if not with_actuals:
|
|
1309
|
+
return {
|
|
1310
|
+
"model_id": model_id,
|
|
1311
|
+
"enabled": True,
|
|
1312
|
+
"has_data": False,
|
|
1313
|
+
"message": "No predictions with actual values found",
|
|
1314
|
+
}
|
|
1315
|
+
|
|
1316
|
+
# Determine if classification or regression
|
|
1317
|
+
predictions_list = [p.prediction for p in with_actuals]
|
|
1318
|
+
actuals_list = [p.actual for p in with_actuals]
|
|
1319
|
+
|
|
1320
|
+
# Check if values are binary/categorical (classification)
|
|
1321
|
+
unique_preds = set(predictions_list)
|
|
1322
|
+
unique_actuals = set(actuals_list)
|
|
1323
|
+
|
|
1324
|
+
is_classification = (
|
|
1325
|
+
len(unique_preds) <= 10 and len(unique_actuals) <= 10
|
|
1326
|
+
) or all(isinstance(v, (bool, str)) for v in actuals_list[:100])
|
|
1327
|
+
|
|
1328
|
+
if is_classification:
|
|
1329
|
+
# Classification metrics
|
|
1330
|
+
correct = sum(
|
|
1331
|
+
1 for p, a in zip(predictions_list, actuals_list)
|
|
1332
|
+
if p == a
|
|
1333
|
+
)
|
|
1334
|
+
accuracy = correct / len(with_actuals) if with_actuals else 0.0
|
|
1335
|
+
|
|
1336
|
+
# For binary classification
|
|
1337
|
+
if len(unique_actuals) == 2:
|
|
1338
|
+
positive_class = max(unique_actuals)
|
|
1339
|
+
tp = sum(1 for p, a in zip(predictions_list, actuals_list)
|
|
1340
|
+
if p == positive_class and a == positive_class)
|
|
1341
|
+
fp = sum(1 for p, a in zip(predictions_list, actuals_list)
|
|
1342
|
+
if p == positive_class and a != positive_class)
|
|
1343
|
+
fn = sum(1 for p, a in zip(predictions_list, actuals_list)
|
|
1344
|
+
if p != positive_class and a == positive_class)
|
|
1345
|
+
|
|
1346
|
+
precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
|
|
1347
|
+
recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
|
|
1348
|
+
f1 = (2 * precision * recall / (precision + recall)
|
|
1349
|
+
if (precision + recall) > 0 else 0.0)
|
|
1350
|
+
else:
|
|
1351
|
+
precision = None
|
|
1352
|
+
recall = None
|
|
1353
|
+
f1 = None
|
|
1354
|
+
|
|
1355
|
+
return {
|
|
1356
|
+
"model_id": model_id,
|
|
1357
|
+
"enabled": True,
|
|
1358
|
+
"has_data": True,
|
|
1359
|
+
"model_type": "classification",
|
|
1360
|
+
"sample_count": len(with_actuals),
|
|
1361
|
+
"time_range_hours": hours,
|
|
1362
|
+
"metrics": {
|
|
1363
|
+
"accuracy": accuracy,
|
|
1364
|
+
"precision": precision,
|
|
1365
|
+
"recall": recall,
|
|
1366
|
+
"f1_score": f1,
|
|
1367
|
+
},
|
|
1368
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
1369
|
+
}
|
|
1370
|
+
else:
|
|
1371
|
+
# Regression metrics
|
|
1372
|
+
errors = [
|
|
1373
|
+
float(p) - float(a)
|
|
1374
|
+
for p, a in zip(predictions_list, actuals_list)
|
|
1375
|
+
if isinstance(p, (int, float)) and isinstance(a, (int, float))
|
|
1376
|
+
]
|
|
1377
|
+
|
|
1378
|
+
if not errors:
|
|
1379
|
+
return {
|
|
1380
|
+
"model_id": model_id,
|
|
1381
|
+
"enabled": True,
|
|
1382
|
+
"has_data": False,
|
|
1383
|
+
"message": "No numeric predictions found for regression metrics",
|
|
1384
|
+
}
|
|
1385
|
+
|
|
1386
|
+
mae = sum(abs(e) for e in errors) / len(errors)
|
|
1387
|
+
mse = sum(e ** 2 for e in errors) / len(errors)
|
|
1388
|
+
rmse = mse ** 0.5
|
|
1389
|
+
|
|
1390
|
+
return {
|
|
1391
|
+
"model_id": model_id,
|
|
1392
|
+
"enabled": True,
|
|
1393
|
+
"has_data": True,
|
|
1394
|
+
"model_type": "regression",
|
|
1395
|
+
"sample_count": len(errors),
|
|
1396
|
+
"time_range_hours": hours,
|
|
1397
|
+
"metrics": {
|
|
1398
|
+
"mae": mae,
|
|
1399
|
+
"mse": mse,
|
|
1400
|
+
"rmse": rmse,
|
|
1401
|
+
},
|
|
1402
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
1403
|
+
}
|
|
1404
|
+
|
|
1405
|
+
# =========================================================================
|
|
1406
|
+
# Truthound Integration - Trend Rule Evaluation
|
|
1407
|
+
# =========================================================================
|
|
1408
|
+
|
|
1409
|
+
async def evaluate_trend_rule(
|
|
1410
|
+
self,
|
|
1411
|
+
rule: ModelAlertRule,
|
|
1412
|
+
model_id: str,
|
|
1413
|
+
) -> tuple[bool, float | None, float | None]:
|
|
1414
|
+
"""Evaluate a trend-based alert rule.
|
|
1415
|
+
|
|
1416
|
+
Maps to truthound.ml.monitoring.alerting.TrendRule:
|
|
1417
|
+
- metric_name: The metric to monitor
|
|
1418
|
+
- direction: "increasing" or "decreasing"
|
|
1419
|
+
- slope_threshold: Minimum slope to trigger
|
|
1420
|
+
- lookback_minutes: Time window for trend calculation
|
|
1421
|
+
|
|
1422
|
+
Args:
|
|
1423
|
+
rule: The alert rule to evaluate.
|
|
1424
|
+
model_id: Model ID.
|
|
1425
|
+
|
|
1426
|
+
Returns:
|
|
1427
|
+
Tuple of (triggered, slope_value, slope_threshold).
|
|
1428
|
+
"""
|
|
1429
|
+
config = rule.config or {}
|
|
1430
|
+
metric_name = config.get("metric_name", "latency_ms")
|
|
1431
|
+
direction = config.get("direction", "increasing")
|
|
1432
|
+
slope_threshold = config.get("slope_threshold", 0.01)
|
|
1433
|
+
lookback_minutes = config.get("lookback_minutes", 60)
|
|
1434
|
+
|
|
1435
|
+
# Get metrics for lookback period
|
|
1436
|
+
cutoff = datetime.utcnow() - timedelta(minutes=lookback_minutes)
|
|
1437
|
+
|
|
1438
|
+
# Get metric values over time
|
|
1439
|
+
metrics = await self.metric_repo.get_by_model_id(
|
|
1440
|
+
model_id,
|
|
1441
|
+
metric_type=metric_name.split("_")[0] if "_" in metric_name else "latency",
|
|
1442
|
+
since=cutoff,
|
|
1443
|
+
limit=1000,
|
|
1444
|
+
)
|
|
1445
|
+
|
|
1446
|
+
if len(metrics) < 10:
|
|
1447
|
+
# Not enough data points
|
|
1448
|
+
return False, None, slope_threshold
|
|
1449
|
+
|
|
1450
|
+
# Calculate simple linear regression slope
|
|
1451
|
+
values = [m.value for m in metrics]
|
|
1452
|
+
n = len(values)
|
|
1453
|
+
x = list(range(n))
|
|
1454
|
+
x_mean = sum(x) / n
|
|
1455
|
+
y_mean = sum(values) / n
|
|
1456
|
+
|
|
1457
|
+
numerator = sum((xi - x_mean) * (yi - y_mean) for xi, yi in zip(x, values))
|
|
1458
|
+
denominator = sum((xi - x_mean) ** 2 for xi in x)
|
|
1459
|
+
|
|
1460
|
+
if denominator == 0:
|
|
1461
|
+
return False, 0.0, slope_threshold
|
|
1462
|
+
|
|
1463
|
+
slope = numerator / denominator
|
|
1464
|
+
|
|
1465
|
+
# Check if trend matches expected direction
|
|
1466
|
+
if direction == "increasing":
|
|
1467
|
+
triggered = slope > slope_threshold
|
|
1468
|
+
else: # decreasing
|
|
1469
|
+
triggered = slope < -slope_threshold
|
|
1470
|
+
|
|
1471
|
+
return triggered, slope, slope_threshold
|
|
@@ -49,6 +49,8 @@ class NotificationResult:
|
|
|
49
49
|
error: Error message if delivery failed.
|
|
50
50
|
sent_at: Timestamp of the delivery attempt.
|
|
51
51
|
metadata: Additional metadata about the delivery.
|
|
52
|
+
suppressed: Whether the notification was suppressed (dedup/throttle).
|
|
53
|
+
suppression_reason: Reason for suppression if suppressed.
|
|
52
54
|
"""
|
|
53
55
|
|
|
54
56
|
success: bool
|
|
@@ -58,6 +60,8 @@ class NotificationResult:
|
|
|
58
60
|
error: str | None = None
|
|
59
61
|
sent_at: datetime = field(default_factory=datetime.utcnow)
|
|
60
62
|
metadata: dict[str, Any] = field(default_factory=dict)
|
|
63
|
+
suppressed: bool = False
|
|
64
|
+
suppression_reason: str | None = None
|
|
61
65
|
|
|
62
66
|
|
|
63
67
|
@dataclass
|