truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. truthound_dashboard/api/alerts.py +75 -86
  2. truthound_dashboard/api/anomaly.py +7 -13
  3. truthound_dashboard/api/cross_alerts.py +38 -52
  4. truthound_dashboard/api/drift.py +49 -59
  5. truthound_dashboard/api/drift_monitor.py +234 -79
  6. truthound_dashboard/api/enterprise_sampling.py +498 -0
  7. truthound_dashboard/api/history.py +57 -5
  8. truthound_dashboard/api/lineage.py +3 -48
  9. truthound_dashboard/api/maintenance.py +104 -49
  10. truthound_dashboard/api/mask.py +1 -2
  11. truthound_dashboard/api/middleware.py +2 -1
  12. truthound_dashboard/api/model_monitoring.py +435 -311
  13. truthound_dashboard/api/notifications.py +227 -191
  14. truthound_dashboard/api/notifications_advanced.py +21 -20
  15. truthound_dashboard/api/observability.py +586 -0
  16. truthound_dashboard/api/plugins.py +2 -433
  17. truthound_dashboard/api/profile.py +199 -37
  18. truthound_dashboard/api/quality_reporter.py +701 -0
  19. truthound_dashboard/api/reports.py +7 -16
  20. truthound_dashboard/api/router.py +66 -0
  21. truthound_dashboard/api/rule_suggestions.py +5 -5
  22. truthound_dashboard/api/scan.py +17 -19
  23. truthound_dashboard/api/schedules.py +85 -50
  24. truthound_dashboard/api/schema_evolution.py +6 -6
  25. truthound_dashboard/api/schema_watcher.py +667 -0
  26. truthound_dashboard/api/sources.py +98 -27
  27. truthound_dashboard/api/tiering.py +1323 -0
  28. truthound_dashboard/api/triggers.py +14 -11
  29. truthound_dashboard/api/validations.py +12 -11
  30. truthound_dashboard/api/versioning.py +1 -6
  31. truthound_dashboard/core/__init__.py +129 -3
  32. truthound_dashboard/core/actions/__init__.py +62 -0
  33. truthound_dashboard/core/actions/custom.py +426 -0
  34. truthound_dashboard/core/actions/notifications.py +910 -0
  35. truthound_dashboard/core/actions/storage.py +472 -0
  36. truthound_dashboard/core/actions/webhook.py +281 -0
  37. truthound_dashboard/core/anomaly.py +262 -67
  38. truthound_dashboard/core/anomaly_explainer.py +4 -3
  39. truthound_dashboard/core/backends/__init__.py +67 -0
  40. truthound_dashboard/core/backends/base.py +299 -0
  41. truthound_dashboard/core/backends/errors.py +191 -0
  42. truthound_dashboard/core/backends/factory.py +423 -0
  43. truthound_dashboard/core/backends/mock_backend.py +451 -0
  44. truthound_dashboard/core/backends/truthound_backend.py +718 -0
  45. truthound_dashboard/core/checkpoint/__init__.py +87 -0
  46. truthound_dashboard/core/checkpoint/adapters.py +814 -0
  47. truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
  48. truthound_dashboard/core/checkpoint/runner.py +270 -0
  49. truthound_dashboard/core/connections.py +645 -23
  50. truthound_dashboard/core/converters/__init__.py +14 -0
  51. truthound_dashboard/core/converters/truthound.py +620 -0
  52. truthound_dashboard/core/cross_alerts.py +540 -320
  53. truthound_dashboard/core/datasource_factory.py +1672 -0
  54. truthound_dashboard/core/drift_monitor.py +216 -20
  55. truthound_dashboard/core/enterprise_sampling.py +1291 -0
  56. truthound_dashboard/core/interfaces/__init__.py +225 -0
  57. truthound_dashboard/core/interfaces/actions.py +652 -0
  58. truthound_dashboard/core/interfaces/base.py +247 -0
  59. truthound_dashboard/core/interfaces/checkpoint.py +676 -0
  60. truthound_dashboard/core/interfaces/protocols.py +664 -0
  61. truthound_dashboard/core/interfaces/reporters.py +650 -0
  62. truthound_dashboard/core/interfaces/routing.py +646 -0
  63. truthound_dashboard/core/interfaces/triggers.py +619 -0
  64. truthound_dashboard/core/lineage.py +407 -71
  65. truthound_dashboard/core/model_monitoring.py +431 -3
  66. truthound_dashboard/core/notifications/base.py +4 -0
  67. truthound_dashboard/core/notifications/channels.py +501 -1203
  68. truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
  69. truthound_dashboard/core/notifications/deduplication/service.py +131 -348
  70. truthound_dashboard/core/notifications/dispatcher.py +202 -11
  71. truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
  72. truthound_dashboard/core/notifications/escalation/engine.py +168 -358
  73. truthound_dashboard/core/notifications/routing/__init__.py +88 -128
  74. truthound_dashboard/core/notifications/routing/engine.py +90 -317
  75. truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
  76. truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
  77. truthound_dashboard/core/notifications/throttling/builder.py +117 -255
  78. truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
  79. truthound_dashboard/core/phase5/collaboration.py +1 -1
  80. truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
  81. truthound_dashboard/core/quality_reporter.py +1359 -0
  82. truthound_dashboard/core/report_history.py +0 -6
  83. truthound_dashboard/core/reporters/__init__.py +175 -14
  84. truthound_dashboard/core/reporters/adapters.py +943 -0
  85. truthound_dashboard/core/reporters/base.py +0 -3
  86. truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
  87. truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
  88. truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
  89. truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
  90. truthound_dashboard/core/reporters/compat.py +266 -0
  91. truthound_dashboard/core/reporters/csv_reporter.py +2 -35
  92. truthound_dashboard/core/reporters/factory.py +526 -0
  93. truthound_dashboard/core/reporters/interfaces.py +745 -0
  94. truthound_dashboard/core/reporters/registry.py +1 -10
  95. truthound_dashboard/core/scheduler.py +165 -0
  96. truthound_dashboard/core/schema_evolution.py +3 -3
  97. truthound_dashboard/core/schema_watcher.py +1528 -0
  98. truthound_dashboard/core/services.py +595 -76
  99. truthound_dashboard/core/store_manager.py +810 -0
  100. truthound_dashboard/core/streaming_anomaly.py +169 -4
  101. truthound_dashboard/core/tiering.py +1309 -0
  102. truthound_dashboard/core/triggers/evaluators.py +178 -8
  103. truthound_dashboard/core/truthound_adapter.py +2620 -197
  104. truthound_dashboard/core/unified_alerts.py +23 -20
  105. truthound_dashboard/db/__init__.py +8 -0
  106. truthound_dashboard/db/database.py +8 -2
  107. truthound_dashboard/db/models.py +944 -25
  108. truthound_dashboard/db/repository.py +2 -0
  109. truthound_dashboard/main.py +15 -0
  110. truthound_dashboard/schemas/__init__.py +177 -16
  111. truthound_dashboard/schemas/base.py +44 -23
  112. truthound_dashboard/schemas/collaboration.py +19 -6
  113. truthound_dashboard/schemas/cross_alerts.py +19 -3
  114. truthound_dashboard/schemas/drift.py +61 -55
  115. truthound_dashboard/schemas/drift_monitor.py +67 -23
  116. truthound_dashboard/schemas/enterprise_sampling.py +653 -0
  117. truthound_dashboard/schemas/lineage.py +0 -33
  118. truthound_dashboard/schemas/mask.py +10 -8
  119. truthound_dashboard/schemas/model_monitoring.py +89 -10
  120. truthound_dashboard/schemas/notifications_advanced.py +13 -0
  121. truthound_dashboard/schemas/observability.py +453 -0
  122. truthound_dashboard/schemas/plugins.py +0 -280
  123. truthound_dashboard/schemas/profile.py +154 -247
  124. truthound_dashboard/schemas/quality_reporter.py +403 -0
  125. truthound_dashboard/schemas/reports.py +2 -2
  126. truthound_dashboard/schemas/rule_suggestion.py +8 -1
  127. truthound_dashboard/schemas/scan.py +4 -24
  128. truthound_dashboard/schemas/schedule.py +11 -3
  129. truthound_dashboard/schemas/schema_watcher.py +727 -0
  130. truthound_dashboard/schemas/source.py +17 -2
  131. truthound_dashboard/schemas/tiering.py +822 -0
  132. truthound_dashboard/schemas/triggers.py +16 -0
  133. truthound_dashboard/schemas/unified_alerts.py +7 -0
  134. truthound_dashboard/schemas/validation.py +0 -13
  135. truthound_dashboard/schemas/validators/base.py +41 -21
  136. truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
  137. truthound_dashboard/schemas/validators/localization_validators.py +273 -0
  138. truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
  139. truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
  140. truthound_dashboard/schemas/validators/referential_validators.py +312 -0
  141. truthound_dashboard/schemas/validators/registry.py +93 -8
  142. truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
  143. truthound_dashboard/schemas/versioning.py +1 -6
  144. truthound_dashboard/static/index.html +2 -2
  145. truthound_dashboard-1.5.1.dist-info/METADATA +312 -0
  146. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/RECORD +149 -148
  147. truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
  148. truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
  149. truthound_dashboard/core/plugins/hooks/manager.py +0 -403
  150. truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
  151. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
  152. truthound_dashboard/core/reporters/junit_reporter.py +0 -233
  153. truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
  154. truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
  155. truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
  156. truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
  157. truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
  158. truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
  159. truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
  160. truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
  161. truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
  162. truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
  163. truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
  164. truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
  165. truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
  166. truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
  167. truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
  168. truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
  169. truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
  170. truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
  171. truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
  172. truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
  173. truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
  174. truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
  175. truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
  176. truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
  177. truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
  178. truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
  179. truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
  180. truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
  181. truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
  182. truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
  183. truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
  184. truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
  185. truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
  186. truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
  187. truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
  188. truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
  189. truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
  190. truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
  191. truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
  192. truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
  193. truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
  194. truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
  195. truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
  196. truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
  197. truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
  198. truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
  199. truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
  200. truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
  201. truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
  202. truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
  203. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/WHEEL +0 -0
  204. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/entry_points.txt +0 -0
  205. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -730,6 +730,91 @@ class ModelMonitoringService:
730
730
  """Delete an alert handler."""
731
731
  return await self.handler_repo.delete(handler_id)
732
732
 
733
+ async def test_alert_handler(
734
+ self,
735
+ handler_id: str,
736
+ ) -> dict[str, Any]:
737
+ """Test an alert handler by sending a test notification.
738
+
739
+ Args:
740
+ handler_id: Handler ID.
741
+
742
+ Returns:
743
+ Test result with success status and message.
744
+
745
+ Raises:
746
+ ValueError: If handler not found.
747
+ """
748
+ handler = await self.handler_repo.get_by_id(handler_id)
749
+ if handler is None:
750
+ raise ValueError(f"Handler '{handler_id}' not found")
751
+
752
+ handler_type = handler.handler_type
753
+ config = handler.config or {}
754
+
755
+ # Simulate test based on handler type
756
+ test_result = {
757
+ "handler_id": handler_id,
758
+ "handler_type": handler_type,
759
+ "success": False,
760
+ "message": "",
761
+ "timestamp": datetime.utcnow().isoformat(),
762
+ }
763
+
764
+ try:
765
+ if handler_type == "webhook":
766
+ # For webhook, we would normally send a test request
767
+ # For safety, we just validate the config
768
+ url = config.get("url")
769
+ if not url:
770
+ test_result["message"] = "Webhook URL not configured"
771
+ elif not url.startswith(("http://", "https://")):
772
+ test_result["message"] = "Invalid webhook URL format"
773
+ else:
774
+ test_result["success"] = True
775
+ test_result["message"] = f"Webhook configuration valid. URL: {url}"
776
+
777
+ elif handler_type == "email":
778
+ recipients = config.get("recipients", [])
779
+ if not recipients:
780
+ test_result["message"] = "No email recipients configured"
781
+ else:
782
+ test_result["success"] = True
783
+ test_result["message"] = f"Email configuration valid. Recipients: {len(recipients)}"
784
+
785
+ elif handler_type == "slack":
786
+ webhook_url = config.get("webhook_url") or config.get("url")
787
+ if not webhook_url:
788
+ test_result["message"] = "Slack webhook URL not configured"
789
+ else:
790
+ test_result["success"] = True
791
+ test_result["message"] = "Slack configuration valid"
792
+
793
+ elif handler_type == "pagerduty":
794
+ integration_key = config.get("integration_key") or config.get("routing_key")
795
+ if not integration_key:
796
+ test_result["message"] = "PagerDuty integration key not configured"
797
+ else:
798
+ test_result["success"] = True
799
+ test_result["message"] = "PagerDuty configuration valid"
800
+
801
+ elif handler_type == "opsgenie":
802
+ api_key = config.get("api_key")
803
+ if not api_key:
804
+ test_result["message"] = "OpsGenie API key not configured"
805
+ else:
806
+ test_result["success"] = True
807
+ test_result["message"] = "OpsGenie configuration valid"
808
+
809
+ else:
810
+ test_result["success"] = True
811
+ test_result["message"] = f"Handler type '{handler_type}' configuration accepted"
812
+
813
+ except Exception as e:
814
+ test_result["message"] = f"Test failed: {str(e)}"
815
+
816
+ return test_result
817
+
733
818
  # =========================================================================
734
819
  # Alerts
735
820
  # =========================================================================
@@ -893,21 +978,36 @@ class ModelMonitoringService:
893
978
 
894
979
  elif rule_type == "statistical":
895
980
  # Statistical anomaly detection based on standard deviations
981
+ # Maps to truthound.ml.monitoring.alerting.AnomalyRule
896
982
  metric_name = config.get("metric_name", "latency_ms")
897
983
  std_devs = config.get("std_devs", 3.0)
984
+ window_size = config.get("window_size", 100)
898
985
 
899
986
  for m in metrics.get("metrics", []):
900
987
  if m.get("name") == metric_name:
901
988
  avg = m.get("avg_value")
902
989
  p95 = m.get("p95_value")
990
+ count = m.get("count", 0)
991
+
992
+ # Only evaluate if we have enough samples
993
+ if count < window_size:
994
+ break
995
+
903
996
  if avg and p95:
904
- # Simple heuristic: if p95 is more than std_devs times avg
905
- if p95 > avg * (1 + std_devs * 0.1):
906
- return True, p95, avg * (1 + std_devs * 0.1)
997
+ # AnomalyRule: if p95 is more than std_devs above the mean
998
+ threshold_value = avg * (1 + std_devs * 0.1)
999
+ if p95 > threshold_value:
1000
+ return True, p95, threshold_value
907
1001
  break
908
1002
 
909
1003
  return False, None, None
910
1004
 
1005
+ elif rule_type == "trend":
1006
+ # Trend-based alerting - evaluated async separately
1007
+ # This is a placeholder; actual evaluation done in evaluate_trend_rule()
1008
+ # Return False here as trend rules need historical data
1009
+ return False, None, None
1010
+
911
1011
  return False, None, None
912
1012
 
913
1013
  # =========================================================================
@@ -1041,3 +1141,331 @@ class ModelMonitoringService:
1041
1141
  "created_at": alert.created_at.isoformat() if alert.created_at else None,
1042
1142
  "updated_at": alert.updated_at.isoformat() if alert.updated_at else None,
1043
1143
  }
1144
+
1145
+ # =========================================================================
1146
+ # Truthound Integration - Drift Detection
1147
+ # =========================================================================
1148
+
1149
+ async def compute_drift_score(
1150
+ self,
1151
+ model_id: str,
1152
+ reference_data: Any,
1153
+ current_data: Any,
1154
+ *,
1155
+ method: str = "auto",
1156
+ columns: list[str] | None = None,
1157
+ ) -> dict[str, Any]:
1158
+ """Compute drift score using truthound th.compare().
1159
+
1160
+ Uses truthound's drift detection methods:
1161
+ - auto: Auto-select best method based on column type
1162
+ - psi: Population Stability Index
1163
+ - ks: Kolmogorov-Smirnov test
1164
+ - js: Jensen-Shannon divergence
1165
+ - wasserstein: Earth Mover's Distance
1166
+ - chi2: Chi-squared (categorical)
1167
+ - kl: Kullback-Leibler divergence
1168
+ - cvm: Cramér-von Mises test
1169
+ - anderson: Anderson-Darling test
1170
+ - hellinger: Hellinger distance
1171
+ - energy: Energy distance
1172
+ - mmd: Maximum Mean Discrepancy
1173
+
1174
+ Args:
1175
+ model_id: Model ID to update.
1176
+ reference_data: Reference/baseline dataset.
1177
+ current_data: Current dataset to compare.
1178
+ method: Drift detection method (default: auto).
1179
+ columns: Specific columns to check (default: all).
1180
+
1181
+ Returns:
1182
+ Drift detection result with per-column scores.
1183
+ """
1184
+ import truthound as th
1185
+
1186
+ model = await self.model_repo.get_by_id(model_id)
1187
+ if model is None:
1188
+ raise ValueError(f"Model '{model_id}' not found")
1189
+
1190
+ # Get method from model config if not specified
1191
+ if method == "auto":
1192
+ config = model.config or {}
1193
+ method = config.get("drift_method", "auto")
1194
+
1195
+ # Run drift detection
1196
+ drift_result = th.compare(
1197
+ reference_data,
1198
+ current_data,
1199
+ method=method,
1200
+ columns=columns,
1201
+ )
1202
+
1203
+ # Calculate overall drift score
1204
+ if drift_result.has_drift:
1205
+ # Get max drift score across columns
1206
+ max_score = 0.0
1207
+ drifted_columns = []
1208
+ column_scores = {}
1209
+
1210
+ for col in drift_result.columns:
1211
+ score = col.result.statistic if hasattr(col.result, "statistic") else 0.0
1212
+ column_scores[col.column] = score
1213
+ if col.result.drifted:
1214
+ drifted_columns.append(col.column)
1215
+ if score > max_score:
1216
+ max_score = score
1217
+
1218
+ overall_score = max_score
1219
+ else:
1220
+ overall_score = 0.0
1221
+ drifted_columns = []
1222
+ column_scores = {}
1223
+ for col in drift_result.columns:
1224
+ score = col.result.statistic if hasattr(col.result, "statistic") else 0.0
1225
+ column_scores[col.column] = score
1226
+
1227
+ # Update model drift score
1228
+ model.update_drift_score(overall_score)
1229
+
1230
+ # Get drift threshold from config
1231
+ config = model.config or {}
1232
+ drift_threshold = config.get("drift_threshold", 0.1)
1233
+
1234
+ # Create alert if drift exceeds threshold
1235
+ if overall_score > drift_threshold:
1236
+ # Find or create drift alert rule
1237
+ rules = await self.rule_repo.get_by_model_id(model_id, active_only=True)
1238
+ drift_rule = next(
1239
+ (r for r in rules if "drift" in r.name.lower()),
1240
+ None
1241
+ )
1242
+
1243
+ if drift_rule:
1244
+ await self.create_alert(
1245
+ model_id=model_id,
1246
+ rule_id=drift_rule.id,
1247
+ message=f"Drift detected: score={overall_score:.3f} exceeds threshold={drift_threshold:.3f}. "
1248
+ f"Drifted columns: {', '.join(drifted_columns)}",
1249
+ severity="warning" if overall_score < 0.25 else "critical",
1250
+ metric_value=overall_score,
1251
+ threshold_value=drift_threshold,
1252
+ )
1253
+
1254
+ await self.session.flush()
1255
+
1256
+ return {
1257
+ "model_id": model_id,
1258
+ "method": method,
1259
+ "has_drift": drift_result.has_drift,
1260
+ "overall_score": overall_score,
1261
+ "drift_threshold": drift_threshold,
1262
+ "drifted_columns": drifted_columns,
1263
+ "column_scores": column_scores,
1264
+ "timestamp": datetime.utcnow().isoformat(),
1265
+ }
1266
+
1267
+ async def compute_quality_metrics(
1268
+ self,
1269
+ model_id: str,
1270
+ hours: int = 24,
1271
+ ) -> dict[str, Any]:
1272
+ """Compute quality metrics from predictions with actual values.
1273
+
1274
+ Calculates accuracy, precision, recall, F1 for classification models,
1275
+ or MAE, MSE, RMSE for regression models.
1276
+
1277
+ Args:
1278
+ model_id: Model ID.
1279
+ hours: Time range in hours.
1280
+
1281
+ Returns:
1282
+ Quality metrics dictionary.
1283
+ """
1284
+ model = await self.model_repo.get_by_id(model_id)
1285
+ if model is None:
1286
+ raise ValueError(f"Model '{model_id}' not found")
1287
+
1288
+ # Check if quality metrics are enabled
1289
+ config = model.config or {}
1290
+ if not config.get("enable_quality_metrics", True):
1291
+ return {
1292
+ "model_id": model_id,
1293
+ "enabled": False,
1294
+ "message": "Quality metrics are disabled for this model",
1295
+ }
1296
+
1297
+ cutoff = datetime.utcnow() - timedelta(hours=hours)
1298
+ predictions = await self.prediction_repo.get_by_model_id(
1299
+ model_id, limit=10000, since=cutoff
1300
+ )
1301
+
1302
+ # Filter predictions with actual values
1303
+ with_actuals = [
1304
+ p for p in predictions
1305
+ if p.actual is not None
1306
+ ]
1307
+
1308
+ if not with_actuals:
1309
+ return {
1310
+ "model_id": model_id,
1311
+ "enabled": True,
1312
+ "has_data": False,
1313
+ "message": "No predictions with actual values found",
1314
+ }
1315
+
1316
+ # Determine if classification or regression
1317
+ predictions_list = [p.prediction for p in with_actuals]
1318
+ actuals_list = [p.actual for p in with_actuals]
1319
+
1320
+ # Check if values are binary/categorical (classification)
1321
+ unique_preds = set(predictions_list)
1322
+ unique_actuals = set(actuals_list)
1323
+
1324
+ is_classification = (
1325
+ len(unique_preds) <= 10 and len(unique_actuals) <= 10
1326
+ ) or all(isinstance(v, (bool, str)) for v in actuals_list[:100])
1327
+
1328
+ if is_classification:
1329
+ # Classification metrics
1330
+ correct = sum(
1331
+ 1 for p, a in zip(predictions_list, actuals_list)
1332
+ if p == a
1333
+ )
1334
+ accuracy = correct / len(with_actuals) if with_actuals else 0.0
1335
+
1336
+ # For binary classification
1337
+ if len(unique_actuals) == 2:
1338
+ positive_class = max(unique_actuals)
1339
+ tp = sum(1 for p, a in zip(predictions_list, actuals_list)
1340
+ if p == positive_class and a == positive_class)
1341
+ fp = sum(1 for p, a in zip(predictions_list, actuals_list)
1342
+ if p == positive_class and a != positive_class)
1343
+ fn = sum(1 for p, a in zip(predictions_list, actuals_list)
1344
+ if p != positive_class and a == positive_class)
1345
+
1346
+ precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
1347
+ recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
1348
+ f1 = (2 * precision * recall / (precision + recall)
1349
+ if (precision + recall) > 0 else 0.0)
1350
+ else:
1351
+ precision = None
1352
+ recall = None
1353
+ f1 = None
1354
+
1355
+ return {
1356
+ "model_id": model_id,
1357
+ "enabled": True,
1358
+ "has_data": True,
1359
+ "model_type": "classification",
1360
+ "sample_count": len(with_actuals),
1361
+ "time_range_hours": hours,
1362
+ "metrics": {
1363
+ "accuracy": accuracy,
1364
+ "precision": precision,
1365
+ "recall": recall,
1366
+ "f1_score": f1,
1367
+ },
1368
+ "timestamp": datetime.utcnow().isoformat(),
1369
+ }
1370
+ else:
1371
+ # Regression metrics
1372
+ errors = [
1373
+ float(p) - float(a)
1374
+ for p, a in zip(predictions_list, actuals_list)
1375
+ if isinstance(p, (int, float)) and isinstance(a, (int, float))
1376
+ ]
1377
+
1378
+ if not errors:
1379
+ return {
1380
+ "model_id": model_id,
1381
+ "enabled": True,
1382
+ "has_data": False,
1383
+ "message": "No numeric predictions found for regression metrics",
1384
+ }
1385
+
1386
+ mae = sum(abs(e) for e in errors) / len(errors)
1387
+ mse = sum(e ** 2 for e in errors) / len(errors)
1388
+ rmse = mse ** 0.5
1389
+
1390
+ return {
1391
+ "model_id": model_id,
1392
+ "enabled": True,
1393
+ "has_data": True,
1394
+ "model_type": "regression",
1395
+ "sample_count": len(errors),
1396
+ "time_range_hours": hours,
1397
+ "metrics": {
1398
+ "mae": mae,
1399
+ "mse": mse,
1400
+ "rmse": rmse,
1401
+ },
1402
+ "timestamp": datetime.utcnow().isoformat(),
1403
+ }
1404
+
1405
+ # =========================================================================
1406
+ # Truthound Integration - Trend Rule Evaluation
1407
+ # =========================================================================
1408
+
1409
+ async def evaluate_trend_rule(
1410
+ self,
1411
+ rule: ModelAlertRule,
1412
+ model_id: str,
1413
+ ) -> tuple[bool, float | None, float | None]:
1414
+ """Evaluate a trend-based alert rule.
1415
+
1416
+ Maps to truthound.ml.monitoring.alerting.TrendRule:
1417
+ - metric_name: The metric to monitor
1418
+ - direction: "increasing" or "decreasing"
1419
+ - slope_threshold: Minimum slope to trigger
1420
+ - lookback_minutes: Time window for trend calculation
1421
+
1422
+ Args:
1423
+ rule: The alert rule to evaluate.
1424
+ model_id: Model ID.
1425
+
1426
+ Returns:
1427
+ Tuple of (triggered, slope_value, slope_threshold).
1428
+ """
1429
+ config = rule.config or {}
1430
+ metric_name = config.get("metric_name", "latency_ms")
1431
+ direction = config.get("direction", "increasing")
1432
+ slope_threshold = config.get("slope_threshold", 0.01)
1433
+ lookback_minutes = config.get("lookback_minutes", 60)
1434
+
1435
+ # Get metrics for lookback period
1436
+ cutoff = datetime.utcnow() - timedelta(minutes=lookback_minutes)
1437
+
1438
+ # Get metric values over time
1439
+ metrics = await self.metric_repo.get_by_model_id(
1440
+ model_id,
1441
+ metric_type=metric_name.split("_")[0] if "_" in metric_name else "latency",
1442
+ since=cutoff,
1443
+ limit=1000,
1444
+ )
1445
+
1446
+ if len(metrics) < 10:
1447
+ # Not enough data points
1448
+ return False, None, slope_threshold
1449
+
1450
+ # Calculate simple linear regression slope
1451
+ values = [m.value for m in metrics]
1452
+ n = len(values)
1453
+ x = list(range(n))
1454
+ x_mean = sum(x) / n
1455
+ y_mean = sum(values) / n
1456
+
1457
+ numerator = sum((xi - x_mean) * (yi - y_mean) for xi, yi in zip(x, values))
1458
+ denominator = sum((xi - x_mean) ** 2 for xi in x)
1459
+
1460
+ if denominator == 0:
1461
+ return False, 0.0, slope_threshold
1462
+
1463
+ slope = numerator / denominator
1464
+
1465
+ # Check if trend matches expected direction
1466
+ if direction == "increasing":
1467
+ triggered = slope > slope_threshold
1468
+ else: # decreasing
1469
+ triggered = slope < -slope_threshold
1470
+
1471
+ return triggered, slope, slope_threshold
@@ -49,6 +49,8 @@ class NotificationResult:
49
49
  error: Error message if delivery failed.
50
50
  sent_at: Timestamp of the delivery attempt.
51
51
  metadata: Additional metadata about the delivery.
52
+ suppressed: Whether the notification was suppressed (dedup/throttle).
53
+ suppression_reason: Reason for suppression if suppressed.
52
54
  """
53
55
 
54
56
  success: bool
@@ -58,6 +60,8 @@ class NotificationResult:
58
60
  error: str | None = None
59
61
  sent_at: datetime = field(default_factory=datetime.utcnow)
60
62
  metadata: dict[str, Any] = field(default_factory=dict)
63
+ suppressed: bool = False
64
+ suppression_reason: str | None = None
61
65
 
62
66
 
63
67
  @dataclass