truthound-dashboard 1.3.1__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. truthound_dashboard/api/alerts.py +258 -0
  2. truthound_dashboard/api/anomaly.py +1302 -0
  3. truthound_dashboard/api/cross_alerts.py +352 -0
  4. truthound_dashboard/api/deps.py +143 -0
  5. truthound_dashboard/api/drift_monitor.py +540 -0
  6. truthound_dashboard/api/lineage.py +1151 -0
  7. truthound_dashboard/api/maintenance.py +363 -0
  8. truthound_dashboard/api/middleware.py +373 -1
  9. truthound_dashboard/api/model_monitoring.py +805 -0
  10. truthound_dashboard/api/notifications_advanced.py +2452 -0
  11. truthound_dashboard/api/plugins.py +2096 -0
  12. truthound_dashboard/api/profile.py +211 -14
  13. truthound_dashboard/api/reports.py +853 -0
  14. truthound_dashboard/api/router.py +147 -0
  15. truthound_dashboard/api/rule_suggestions.py +310 -0
  16. truthound_dashboard/api/schema_evolution.py +231 -0
  17. truthound_dashboard/api/sources.py +47 -3
  18. truthound_dashboard/api/triggers.py +190 -0
  19. truthound_dashboard/api/validations.py +13 -0
  20. truthound_dashboard/api/validators.py +333 -4
  21. truthound_dashboard/api/versioning.py +309 -0
  22. truthound_dashboard/api/websocket.py +301 -0
  23. truthound_dashboard/core/__init__.py +27 -0
  24. truthound_dashboard/core/anomaly.py +1395 -0
  25. truthound_dashboard/core/anomaly_explainer.py +633 -0
  26. truthound_dashboard/core/cache.py +206 -0
  27. truthound_dashboard/core/cached_services.py +422 -0
  28. truthound_dashboard/core/charts.py +352 -0
  29. truthound_dashboard/core/connections.py +1069 -42
  30. truthound_dashboard/core/cross_alerts.py +837 -0
  31. truthound_dashboard/core/drift_monitor.py +1477 -0
  32. truthound_dashboard/core/drift_sampling.py +669 -0
  33. truthound_dashboard/core/i18n/__init__.py +42 -0
  34. truthound_dashboard/core/i18n/detector.py +173 -0
  35. truthound_dashboard/core/i18n/messages.py +564 -0
  36. truthound_dashboard/core/lineage.py +971 -0
  37. truthound_dashboard/core/maintenance.py +443 -5
  38. truthound_dashboard/core/model_monitoring.py +1043 -0
  39. truthound_dashboard/core/notifications/channels.py +1020 -1
  40. truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
  41. truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
  42. truthound_dashboard/core/notifications/deduplication/service.py +400 -0
  43. truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
  44. truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
  45. truthound_dashboard/core/notifications/dispatcher.py +43 -0
  46. truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
  47. truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
  48. truthound_dashboard/core/notifications/escalation/engine.py +429 -0
  49. truthound_dashboard/core/notifications/escalation/models.py +336 -0
  50. truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
  51. truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
  52. truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
  53. truthound_dashboard/core/notifications/events.py +49 -0
  54. truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
  55. truthound_dashboard/core/notifications/metrics/base.py +528 -0
  56. truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
  57. truthound_dashboard/core/notifications/routing/__init__.py +169 -0
  58. truthound_dashboard/core/notifications/routing/combinators.py +184 -0
  59. truthound_dashboard/core/notifications/routing/config.py +375 -0
  60. truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
  61. truthound_dashboard/core/notifications/routing/engine.py +382 -0
  62. truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
  63. truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
  64. truthound_dashboard/core/notifications/routing/rules.py +625 -0
  65. truthound_dashboard/core/notifications/routing/validator.py +678 -0
  66. truthound_dashboard/core/notifications/service.py +2 -0
  67. truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
  68. truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
  69. truthound_dashboard/core/notifications/throttling/builder.py +311 -0
  70. truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
  71. truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
  72. truthound_dashboard/core/openlineage.py +1028 -0
  73. truthound_dashboard/core/plugins/__init__.py +39 -0
  74. truthound_dashboard/core/plugins/docs/__init__.py +39 -0
  75. truthound_dashboard/core/plugins/docs/extractor.py +703 -0
  76. truthound_dashboard/core/plugins/docs/renderers.py +804 -0
  77. truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
  78. truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
  79. truthound_dashboard/core/plugins/hooks/manager.py +403 -0
  80. truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
  81. truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
  82. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
  83. truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
  84. truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
  85. truthound_dashboard/core/plugins/loader.py +504 -0
  86. truthound_dashboard/core/plugins/registry.py +810 -0
  87. truthound_dashboard/core/plugins/reporter_executor.py +588 -0
  88. truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
  89. truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
  90. truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
  91. truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
  92. truthound_dashboard/core/plugins/sandbox.py +617 -0
  93. truthound_dashboard/core/plugins/security/__init__.py +68 -0
  94. truthound_dashboard/core/plugins/security/analyzer.py +535 -0
  95. truthound_dashboard/core/plugins/security/policies.py +311 -0
  96. truthound_dashboard/core/plugins/security/protocols.py +296 -0
  97. truthound_dashboard/core/plugins/security/signing.py +842 -0
  98. truthound_dashboard/core/plugins/security.py +446 -0
  99. truthound_dashboard/core/plugins/validator_executor.py +401 -0
  100. truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
  101. truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
  102. truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
  103. truthound_dashboard/core/plugins/versioning/semver.py +266 -0
  104. truthound_dashboard/core/profile_comparison.py +601 -0
  105. truthound_dashboard/core/report_history.py +570 -0
  106. truthound_dashboard/core/reporters/__init__.py +57 -0
  107. truthound_dashboard/core/reporters/base.py +296 -0
  108. truthound_dashboard/core/reporters/csv_reporter.py +155 -0
  109. truthound_dashboard/core/reporters/html_reporter.py +598 -0
  110. truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
  111. truthound_dashboard/core/reporters/i18n/base.py +494 -0
  112. truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
  113. truthound_dashboard/core/reporters/json_reporter.py +160 -0
  114. truthound_dashboard/core/reporters/junit_reporter.py +233 -0
  115. truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
  116. truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
  117. truthound_dashboard/core/reporters/registry.py +272 -0
  118. truthound_dashboard/core/rule_generator.py +2088 -0
  119. truthound_dashboard/core/scheduler.py +822 -12
  120. truthound_dashboard/core/schema_evolution.py +858 -0
  121. truthound_dashboard/core/services.py +152 -9
  122. truthound_dashboard/core/statistics.py +718 -0
  123. truthound_dashboard/core/streaming_anomaly.py +883 -0
  124. truthound_dashboard/core/triggers/__init__.py +45 -0
  125. truthound_dashboard/core/triggers/base.py +226 -0
  126. truthound_dashboard/core/triggers/evaluators.py +609 -0
  127. truthound_dashboard/core/triggers/factory.py +363 -0
  128. truthound_dashboard/core/unified_alerts.py +870 -0
  129. truthound_dashboard/core/validation_limits.py +509 -0
  130. truthound_dashboard/core/versioning.py +709 -0
  131. truthound_dashboard/core/websocket/__init__.py +59 -0
  132. truthound_dashboard/core/websocket/manager.py +512 -0
  133. truthound_dashboard/core/websocket/messages.py +130 -0
  134. truthound_dashboard/db/__init__.py +30 -0
  135. truthound_dashboard/db/models.py +3375 -3
  136. truthound_dashboard/main.py +22 -0
  137. truthound_dashboard/schemas/__init__.py +396 -1
  138. truthound_dashboard/schemas/anomaly.py +1258 -0
  139. truthound_dashboard/schemas/base.py +4 -0
  140. truthound_dashboard/schemas/cross_alerts.py +334 -0
  141. truthound_dashboard/schemas/drift_monitor.py +890 -0
  142. truthound_dashboard/schemas/lineage.py +428 -0
  143. truthound_dashboard/schemas/maintenance.py +154 -0
  144. truthound_dashboard/schemas/model_monitoring.py +374 -0
  145. truthound_dashboard/schemas/notifications_advanced.py +1363 -0
  146. truthound_dashboard/schemas/openlineage.py +704 -0
  147. truthound_dashboard/schemas/plugins.py +1293 -0
  148. truthound_dashboard/schemas/profile.py +420 -34
  149. truthound_dashboard/schemas/profile_comparison.py +242 -0
  150. truthound_dashboard/schemas/reports.py +285 -0
  151. truthound_dashboard/schemas/rule_suggestion.py +434 -0
  152. truthound_dashboard/schemas/schema_evolution.py +164 -0
  153. truthound_dashboard/schemas/source.py +117 -2
  154. truthound_dashboard/schemas/triggers.py +511 -0
  155. truthound_dashboard/schemas/unified_alerts.py +223 -0
  156. truthound_dashboard/schemas/validation.py +25 -1
  157. truthound_dashboard/schemas/validators/__init__.py +11 -0
  158. truthound_dashboard/schemas/validators/base.py +151 -0
  159. truthound_dashboard/schemas/versioning.py +152 -0
  160. truthound_dashboard/static/index.html +2 -2
  161. {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/METADATA +142 -22
  162. truthound_dashboard-1.4.0.dist-info/RECORD +239 -0
  163. truthound_dashboard/static/assets/index-BZG20KuF.js +0 -586
  164. truthound_dashboard/static/assets/index-D_HyZ3pb.css +0 -1
  165. truthound_dashboard/static/assets/unmerged_dictionaries-CtpqQBm0.js +0 -1
  166. truthound_dashboard-1.3.1.dist-info/RECORD +0 -110
  167. {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/WHEEL +0 -0
  168. {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/entry_points.txt +0 -0
  169. {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,883 @@
1
+ """Streaming anomaly detection service.
2
+
3
+ This module provides real-time streaming anomaly detection capabilities,
4
+ supporting sliding window detection and online learning.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import asyncio
10
+ from collections import deque
11
+ from dataclasses import dataclass, field
12
+ from datetime import datetime
13
+ from enum import Enum
14
+ from typing import Any
15
+ from uuid import uuid4
16
+
17
+ import numpy as np
18
+
19
+
20
+ class StreamingSessionStatus(str, Enum):
21
+ """Status of a streaming session."""
22
+
23
+ CREATED = "created"
24
+ RUNNING = "running"
25
+ PAUSED = "paused"
26
+ STOPPED = "stopped"
27
+ ERROR = "error"
28
+
29
+
30
+ class StreamingAlgorithm(str, Enum):
31
+ """Supported streaming anomaly detection algorithms."""
32
+
33
+ ZSCORE_ROLLING = "zscore_rolling"
34
+ EXPONENTIAL_MOVING_AVERAGE = "ema"
35
+ ISOLATION_FOREST_INCREMENTAL = "isolation_forest_incremental"
36
+ HALF_SPACE_TREES = "half_space_trees"
37
+ ROBUST_RANDOM_CUT_FOREST = "rrcf"
38
+
39
+
40
+ @dataclass
41
+ class StreamingAlert:
42
+ """An anomaly alert from streaming detection."""
43
+
44
+ id: str
45
+ session_id: str
46
+ timestamp: datetime
47
+ data_point: dict[str, Any]
48
+ anomaly_score: float
49
+ is_anomaly: bool
50
+ algorithm: StreamingAlgorithm
51
+ details: dict[str, Any] = field(default_factory=dict)
52
+
53
+ def to_dict(self) -> dict[str, Any]:
54
+ """Convert to dictionary."""
55
+ return {
56
+ "id": self.id,
57
+ "session_id": self.session_id,
58
+ "timestamp": self.timestamp.isoformat(),
59
+ "data_point": self.data_point,
60
+ "anomaly_score": self.anomaly_score,
61
+ "is_anomaly": self.is_anomaly,
62
+ "algorithm": self.algorithm.value,
63
+ "details": self.details,
64
+ }
65
+
66
+
67
+ @dataclass
68
+ class StreamingStatistics:
69
+ """Rolling statistics for streaming detection."""
70
+
71
+ count: int = 0
72
+ mean: float = 0.0
73
+ variance: float = 0.0
74
+ min_value: float = float("inf")
75
+ max_value: float = float("-inf")
76
+ anomaly_count: int = 0
77
+
78
+ def update(self, value: float, is_anomaly: bool = False) -> None:
79
+ """Update statistics with a new value using Welford's algorithm."""
80
+ self.count += 1
81
+ delta = value - self.mean
82
+ self.mean += delta / self.count
83
+ delta2 = value - self.mean
84
+ self.variance += delta * delta2
85
+
86
+ self.min_value = min(self.min_value, value)
87
+ self.max_value = max(self.max_value, value)
88
+
89
+ if is_anomaly:
90
+ self.anomaly_count += 1
91
+
92
+ @property
93
+ def std(self) -> float:
94
+ """Get standard deviation."""
95
+ if self.count < 2:
96
+ return 0.0
97
+ return np.sqrt(self.variance / (self.count - 1))
98
+
99
+ @property
100
+ def anomaly_rate(self) -> float:
101
+ """Get anomaly rate."""
102
+ if self.count == 0:
103
+ return 0.0
104
+ return self.anomaly_count / self.count
105
+
106
+ def to_dict(self) -> dict[str, Any]:
107
+ """Convert to dictionary."""
108
+ return {
109
+ "count": self.count,
110
+ "mean": self.mean,
111
+ "std": self.std,
112
+ "min": self.min_value if self.min_value != float("inf") else None,
113
+ "max": self.max_value if self.max_value != float("-inf") else None,
114
+ "anomaly_count": self.anomaly_count,
115
+ "anomaly_rate": self.anomaly_rate,
116
+ }
117
+
118
+
119
+ @dataclass
120
+ class StreamingSession:
121
+ """A streaming anomaly detection session."""
122
+
123
+ id: str
124
+ source_id: str | None
125
+ algorithm: StreamingAlgorithm
126
+ window_size: int
127
+ threshold: float
128
+ columns: list[str]
129
+ status: StreamingSessionStatus
130
+ created_at: datetime
131
+ started_at: datetime | None = None
132
+ stopped_at: datetime | None = None
133
+ config: dict[str, Any] = field(default_factory=dict)
134
+
135
+ # Runtime state (not persisted)
136
+ _buffer: deque = field(default_factory=lambda: deque(maxlen=1000))
137
+ _column_stats: dict[str, StreamingStatistics] = field(default_factory=dict)
138
+ _alerts: list[StreamingAlert] = field(default_factory=list)
139
+ _alert_callbacks: list = field(default_factory=list)
140
+ _ema_values: dict[str, float] = field(default_factory=dict)
141
+
142
+ def __post_init__(self) -> None:
143
+ """Initialize column statistics."""
144
+ for col in self.columns:
145
+ self._column_stats[col] = StreamingStatistics()
146
+ self._ema_values[col] = 0.0
147
+
148
+ def to_dict(self) -> dict[str, Any]:
149
+ """Convert to dictionary."""
150
+ return {
151
+ "id": self.id,
152
+ "source_id": self.source_id,
153
+ "algorithm": self.algorithm.value,
154
+ "window_size": self.window_size,
155
+ "threshold": self.threshold,
156
+ "columns": self.columns,
157
+ "status": self.status.value,
158
+ "created_at": self.created_at.isoformat(),
159
+ "started_at": self.started_at.isoformat() if self.started_at else None,
160
+ "stopped_at": self.stopped_at.isoformat() if self.stopped_at else None,
161
+ "config": self.config,
162
+ "statistics": {col: stats.to_dict() for col, stats in self._column_stats.items()},
163
+ "total_points": len(self._buffer),
164
+ "total_alerts": len(self._alerts),
165
+ }
166
+
167
+
168
+ class StreamingAnomalyDetector:
169
+ """Real-time streaming anomaly detection service.
170
+
171
+ Supports:
172
+ - Sliding window detection
173
+ - Multiple algorithms (Z-score, EMA, etc.)
174
+ - Online learning / model updates
175
+ - Alert callbacks for real-time notifications
176
+ """
177
+
178
+ def __init__(self) -> None:
179
+ """Initialize the streaming detector."""
180
+ self._sessions: dict[str, StreamingSession] = {}
181
+ self._lock = asyncio.Lock()
182
+
183
+ # =========================================================================
184
+ # Session Management
185
+ # =========================================================================
186
+
187
+ async def create_session(
188
+ self,
189
+ *,
190
+ source_id: str | None = None,
191
+ algorithm: StreamingAlgorithm = StreamingAlgorithm.ZSCORE_ROLLING,
192
+ window_size: int = 100,
193
+ threshold: float = 3.0,
194
+ columns: list[str] | None = None,
195
+ config: dict[str, Any] | None = None,
196
+ ) -> StreamingSession:
197
+ """Create a new streaming session.
198
+
199
+ Args:
200
+ source_id: Optional source ID to associate with.
201
+ algorithm: Detection algorithm to use.
202
+ window_size: Size of the sliding window.
203
+ threshold: Anomaly detection threshold.
204
+ columns: Columns to monitor (if None, monitors all numeric).
205
+ config: Additional algorithm configuration.
206
+
207
+ Returns:
208
+ Created streaming session.
209
+ """
210
+ session_id = str(uuid4())
211
+ session = StreamingSession(
212
+ id=session_id,
213
+ source_id=source_id,
214
+ algorithm=algorithm,
215
+ window_size=window_size,
216
+ threshold=threshold,
217
+ columns=columns or [],
218
+ status=StreamingSessionStatus.CREATED,
219
+ created_at=datetime.utcnow(),
220
+ config=config or {},
221
+ )
222
+
223
+ async with self._lock:
224
+ self._sessions[session_id] = session
225
+
226
+ return session
227
+
228
+ async def start_session(self, session_id: str) -> StreamingSession:
229
+ """Start a streaming session.
230
+
231
+ Args:
232
+ session_id: Session ID to start.
233
+
234
+ Returns:
235
+ Updated session.
236
+
237
+ Raises:
238
+ ValueError: If session not found.
239
+ """
240
+ async with self._lock:
241
+ session = self._sessions.get(session_id)
242
+ if session is None:
243
+ raise ValueError(f"Session '{session_id}' not found")
244
+
245
+ session.status = StreamingSessionStatus.RUNNING
246
+ session.started_at = datetime.utcnow()
247
+
248
+ return session
249
+
250
+ async def stop_session(self, session_id: str) -> StreamingSession:
251
+ """Stop a streaming session.
252
+
253
+ Args:
254
+ session_id: Session ID to stop.
255
+
256
+ Returns:
257
+ Updated session.
258
+
259
+ Raises:
260
+ ValueError: If session not found.
261
+ """
262
+ async with self._lock:
263
+ session = self._sessions.get(session_id)
264
+ if session is None:
265
+ raise ValueError(f"Session '{session_id}' not found")
266
+
267
+ session.status = StreamingSessionStatus.STOPPED
268
+ session.stopped_at = datetime.utcnow()
269
+
270
+ return session
271
+
272
+ async def get_session(self, session_id: str) -> StreamingSession | None:
273
+ """Get a session by ID.
274
+
275
+ Args:
276
+ session_id: Session ID.
277
+
278
+ Returns:
279
+ Session or None if not found.
280
+ """
281
+ return self._sessions.get(session_id)
282
+
283
+ async def list_sessions(self) -> list[StreamingSession]:
284
+ """List all active sessions.
285
+
286
+ Returns:
287
+ List of sessions.
288
+ """
289
+ return list(self._sessions.values())
290
+
291
+ async def delete_session(self, session_id: str) -> bool:
292
+ """Delete a session.
293
+
294
+ Args:
295
+ session_id: Session ID to delete.
296
+
297
+ Returns:
298
+ True if deleted.
299
+ """
300
+ async with self._lock:
301
+ if session_id in self._sessions:
302
+ del self._sessions[session_id]
303
+ return True
304
+ return False
305
+
306
+ # =========================================================================
307
+ # Data Processing
308
+ # =========================================================================
309
+
310
+ async def push_data_point(
311
+ self,
312
+ session_id: str,
313
+ data: dict[str, Any],
314
+ timestamp: datetime | None = None,
315
+ ) -> StreamingAlert | None:
316
+ """Push a data point to a streaming session.
317
+
318
+ Args:
319
+ session_id: Session ID.
320
+ data: Data point (column name -> value).
321
+ timestamp: Optional timestamp (defaults to now).
322
+
323
+ Returns:
324
+ Alert if anomaly detected, None otherwise.
325
+
326
+ Raises:
327
+ ValueError: If session not found or not running.
328
+ """
329
+ session = self._sessions.get(session_id)
330
+ if session is None:
331
+ raise ValueError(f"Session '{session_id}' not found")
332
+
333
+ if session.status != StreamingSessionStatus.RUNNING:
334
+ raise ValueError(f"Session '{session_id}' is not running")
335
+
336
+ timestamp = timestamp or datetime.utcnow()
337
+
338
+ # Store data point in buffer
339
+ session._buffer.append({"timestamp": timestamp, "data": data})
340
+
341
+ # Run anomaly detection
342
+ alert = await self._detect_anomaly(session, data, timestamp)
343
+
344
+ # Update statistics
345
+ for col, value in data.items():
346
+ if col in session._column_stats:
347
+ try:
348
+ numeric_value = float(value)
349
+ is_anomaly = alert is not None and alert.is_anomaly
350
+ session._column_stats[col].update(numeric_value, is_anomaly)
351
+ except (ValueError, TypeError):
352
+ pass
353
+
354
+ # Store alert and trigger callbacks
355
+ if alert is not None:
356
+ session._alerts.append(alert)
357
+ await self._trigger_alert_callbacks(session, alert)
358
+
359
+ return alert
360
+
361
+ async def push_batch(
362
+ self,
363
+ session_id: str,
364
+ data_points: list[dict[str, Any]],
365
+ timestamps: list[datetime] | None = None,
366
+ ) -> list[StreamingAlert]:
367
+ """Push a batch of data points.
368
+
369
+ Args:
370
+ session_id: Session ID.
371
+ data_points: List of data points.
372
+ timestamps: Optional list of timestamps.
373
+
374
+ Returns:
375
+ List of alerts.
376
+ """
377
+ alerts = []
378
+ timestamps = timestamps or [datetime.utcnow()] * len(data_points)
379
+
380
+ for data, ts in zip(data_points, timestamps):
381
+ alert = await self.push_data_point(session_id, data, ts)
382
+ if alert is not None:
383
+ alerts.append(alert)
384
+
385
+ return alerts
386
+
387
+ # =========================================================================
388
+ # Anomaly Detection Algorithms
389
+ # =========================================================================
390
+
391
+ async def _detect_anomaly(
392
+ self,
393
+ session: StreamingSession,
394
+ data: dict[str, Any],
395
+ timestamp: datetime,
396
+ ) -> StreamingAlert | None:
397
+ """Run anomaly detection on a data point.
398
+
399
+ Args:
400
+ session: Streaming session.
401
+ data: Data point.
402
+ timestamp: Timestamp.
403
+
404
+ Returns:
405
+ Alert if anomaly detected.
406
+ """
407
+ algorithm = session.algorithm
408
+
409
+ if algorithm == StreamingAlgorithm.ZSCORE_ROLLING:
410
+ return await self._detect_zscore_rolling(session, data, timestamp)
411
+ elif algorithm == StreamingAlgorithm.EXPONENTIAL_MOVING_AVERAGE:
412
+ return await self._detect_ema(session, data, timestamp)
413
+ elif algorithm == StreamingAlgorithm.ISOLATION_FOREST_INCREMENTAL:
414
+ return await self._detect_isolation_forest_incremental(session, data, timestamp)
415
+ elif algorithm == StreamingAlgorithm.HALF_SPACE_TREES:
416
+ return await self._detect_half_space_trees(session, data, timestamp)
417
+ elif algorithm == StreamingAlgorithm.ROBUST_RANDOM_CUT_FOREST:
418
+ return await self._detect_rrcf(session, data, timestamp)
419
+ else:
420
+ return None
421
+
422
+ async def _detect_zscore_rolling(
423
+ self,
424
+ session: StreamingSession,
425
+ data: dict[str, Any],
426
+ timestamp: datetime,
427
+ ) -> StreamingAlert | None:
428
+ """Z-score based anomaly detection using rolling statistics.
429
+
430
+ Args:
431
+ session: Streaming session.
432
+ data: Data point.
433
+ timestamp: Timestamp.
434
+
435
+ Returns:
436
+ Alert if anomaly detected.
437
+ """
438
+ # Need at least window_size points for reliable detection
439
+ if len(session._buffer) < min(session.window_size, 10):
440
+ return None
441
+
442
+ # Get recent values for each column
443
+ window_data = list(session._buffer)[-session.window_size:]
444
+
445
+ max_zscore = 0.0
446
+ anomaly_columns = []
447
+ details = {}
448
+
449
+ for col in session.columns:
450
+ if col not in data:
451
+ continue
452
+
453
+ try:
454
+ current_value = float(data[col])
455
+ except (ValueError, TypeError):
456
+ continue
457
+
458
+ # Calculate rolling mean and std from window
459
+ window_values = []
460
+ for point in window_data[:-1]: # Exclude current point
461
+ if col in point.get("data", {}):
462
+ try:
463
+ window_values.append(float(point["data"][col]))
464
+ except (ValueError, TypeError):
465
+ pass
466
+
467
+ if len(window_values) < 2:
468
+ continue
469
+
470
+ window_mean = np.mean(window_values)
471
+ window_std = np.std(window_values)
472
+
473
+ if window_std == 0:
474
+ window_std = 1e-10 # Avoid division by zero
475
+
476
+ zscore = abs(current_value - window_mean) / window_std
477
+
478
+ if zscore > max_zscore:
479
+ max_zscore = zscore
480
+
481
+ if zscore > session.threshold:
482
+ anomaly_columns.append(col)
483
+ details[col] = {
484
+ "value": current_value,
485
+ "mean": float(window_mean),
486
+ "std": float(window_std),
487
+ "zscore": float(zscore),
488
+ }
489
+
490
+ is_anomaly = len(anomaly_columns) > 0 or max_zscore > session.threshold
491
+
492
+ if is_anomaly:
493
+ return StreamingAlert(
494
+ id=str(uuid4()),
495
+ session_id=session.id,
496
+ timestamp=timestamp,
497
+ data_point=data,
498
+ anomaly_score=float(max_zscore),
499
+ is_anomaly=True,
500
+ algorithm=StreamingAlgorithm.ZSCORE_ROLLING,
501
+ details={
502
+ "anomaly_columns": anomaly_columns,
503
+ "column_details": details,
504
+ "threshold": session.threshold,
505
+ },
506
+ )
507
+
508
+ return None
509
+
510
+ async def _detect_ema(
511
+ self,
512
+ session: StreamingSession,
513
+ data: dict[str, Any],
514
+ timestamp: datetime,
515
+ ) -> StreamingAlert | None:
516
+ """Exponential Moving Average based anomaly detection.
517
+
518
+ Args:
519
+ session: Streaming session.
520
+ data: Data point.
521
+ timestamp: Timestamp.
522
+
523
+ Returns:
524
+ Alert if anomaly detected.
525
+ """
526
+ alpha = session.config.get("alpha", 0.1) # Smoothing factor
527
+ threshold_multiplier = session.config.get("threshold_multiplier", 2.0)
528
+
529
+ max_deviation = 0.0
530
+ anomaly_columns = []
531
+ details = {}
532
+
533
+ for col in session.columns:
534
+ if col not in data:
535
+ continue
536
+
537
+ try:
538
+ current_value = float(data[col])
539
+ except (ValueError, TypeError):
540
+ continue
541
+
542
+ # Initialize EMA if first point
543
+ if session._ema_values.get(col, 0) == 0:
544
+ session._ema_values[col] = current_value
545
+ continue
546
+
547
+ # Calculate EMA
548
+ prev_ema = session._ema_values[col]
549
+ new_ema = alpha * current_value + (1 - alpha) * prev_ema
550
+ session._ema_values[col] = new_ema
551
+
552
+ # Calculate deviation from EMA
553
+ deviation = abs(current_value - prev_ema)
554
+
555
+ # Use rolling std for threshold
556
+ stats = session._column_stats.get(col)
557
+ if stats and stats.std > 0:
558
+ normalized_deviation = deviation / stats.std
559
+ if normalized_deviation > max_deviation:
560
+ max_deviation = normalized_deviation
561
+
562
+ if normalized_deviation > session.threshold * threshold_multiplier:
563
+ anomaly_columns.append(col)
564
+ details[col] = {
565
+ "value": current_value,
566
+ "ema": float(new_ema),
567
+ "deviation": float(deviation),
568
+ "normalized_deviation": float(normalized_deviation),
569
+ }
570
+
571
+ is_anomaly = len(anomaly_columns) > 0
572
+
573
+ if is_anomaly:
574
+ return StreamingAlert(
575
+ id=str(uuid4()),
576
+ session_id=session.id,
577
+ timestamp=timestamp,
578
+ data_point=data,
579
+ anomaly_score=float(max_deviation),
580
+ is_anomaly=True,
581
+ algorithm=StreamingAlgorithm.EXPONENTIAL_MOVING_AVERAGE,
582
+ details={
583
+ "anomaly_columns": anomaly_columns,
584
+ "column_details": details,
585
+ "alpha": alpha,
586
+ },
587
+ )
588
+
589
+ return None
590
+
591
+ async def _detect_isolation_forest_incremental(
592
+ self,
593
+ session: StreamingSession,
594
+ data: dict[str, Any],
595
+ timestamp: datetime,
596
+ ) -> StreamingAlert | None:
597
+ """Incremental Isolation Forest based anomaly detection.
598
+
599
+ Uses a simplified streaming version that periodically retrains.
600
+
601
+ Args:
602
+ session: Streaming session.
603
+ data: Data point.
604
+ timestamp: Timestamp.
605
+
606
+ Returns:
607
+ Alert if anomaly detected.
608
+ """
609
+ # Minimum points before detection
610
+ if len(session._buffer) < session.window_size:
611
+ return None
612
+
613
+ try:
614
+ from sklearn.ensemble import IsolationForest
615
+
616
+ # Get recent window data
617
+ window_data = list(session._buffer)[-session.window_size:]
618
+
619
+ # Build feature matrix from window
620
+ feature_cols = [col for col in session.columns if col in data]
621
+ if not feature_cols:
622
+ return None
623
+
624
+ X = []
625
+ for point in window_data:
626
+ row = []
627
+ valid = True
628
+ for col in feature_cols:
629
+ if col in point.get("data", {}):
630
+ try:
631
+ row.append(float(point["data"][col]))
632
+ except (ValueError, TypeError):
633
+ valid = False
634
+ break
635
+ else:
636
+ valid = False
637
+ break
638
+ if valid:
639
+ X.append(row)
640
+
641
+ if len(X) < 10:
642
+ return None
643
+
644
+ X = np.array(X)
645
+
646
+ # Build current point feature vector
647
+ current_point = []
648
+ for col in feature_cols:
649
+ try:
650
+ current_point.append(float(data[col]))
651
+ except (ValueError, TypeError):
652
+ return None
653
+
654
+ current_point = np.array([current_point])
655
+
656
+ # Fit Isolation Forest on window
657
+ contamination = session.config.get("contamination", 0.1)
658
+ clf = IsolationForest(
659
+ n_estimators=50,
660
+ contamination=contamination,
661
+ random_state=42,
662
+ )
663
+ clf.fit(X)
664
+
665
+ # Predict on current point
666
+ prediction = clf.predict(current_point)[0]
667
+ score = -clf.score_samples(current_point)[0]
668
+
669
+ is_anomaly = prediction == -1
670
+
671
+ if is_anomaly:
672
+ return StreamingAlert(
673
+ id=str(uuid4()),
674
+ session_id=session.id,
675
+ timestamp=timestamp,
676
+ data_point=data,
677
+ anomaly_score=float(score),
678
+ is_anomaly=True,
679
+ algorithm=StreamingAlgorithm.ISOLATION_FOREST_INCREMENTAL,
680
+ details={
681
+ "window_size": len(X),
682
+ "contamination": contamination,
683
+ },
684
+ )
685
+
686
+ except ImportError:
687
+ pass
688
+
689
+ return None
690
+
691
+ async def _detect_half_space_trees(
692
+ self,
693
+ session: StreamingSession,
694
+ data: dict[str, Any],
695
+ timestamp: datetime,
696
+ ) -> StreamingAlert | None:
697
+ """Half-Space Trees streaming anomaly detection.
698
+
699
+ A simplified implementation of HS-Trees for streaming.
700
+
701
+ Args:
702
+ session: Streaming session.
703
+ data: Data point.
704
+ timestamp: Timestamp.
705
+
706
+ Returns:
707
+ Alert if anomaly detected.
708
+ """
709
+ # Use Z-score as a fallback for HS-Trees
710
+ # A full implementation would maintain the tree structure
711
+ return await self._detect_zscore_rolling(session, data, timestamp)
712
+
713
+ async def _detect_rrcf(
714
+ self,
715
+ session: StreamingSession,
716
+ data: dict[str, Any],
717
+ timestamp: datetime,
718
+ ) -> StreamingAlert | None:
719
+ """Robust Random Cut Forest streaming anomaly detection.
720
+
721
+ A simplified implementation using codisp (collusive displacement).
722
+
723
+ Args:
724
+ session: Streaming session.
725
+ data: Data point.
726
+ timestamp: Timestamp.
727
+
728
+ Returns:
729
+ Alert if anomaly detected.
730
+ """
731
+ # Use Z-score as a fallback for RRCF
732
+ # A full implementation would use the rrcf library
733
+ return await self._detect_zscore_rolling(session, data, timestamp)
734
+
735
+ # =========================================================================
736
+ # Alert Management
737
+ # =========================================================================
738
+
739
+ def register_alert_callback(
740
+ self,
741
+ session_id: str,
742
+ callback: callable,
743
+ ) -> None:
744
+ """Register a callback for alerts.
745
+
746
+ Args:
747
+ session_id: Session ID.
748
+ callback: Callback function (async).
749
+ """
750
+ session = self._sessions.get(session_id)
751
+ if session:
752
+ session._alert_callbacks.append(callback)
753
+
754
+ def unregister_alert_callback(
755
+ self,
756
+ session_id: str,
757
+ callback: callable,
758
+ ) -> None:
759
+ """Unregister an alert callback.
760
+
761
+ Args:
762
+ session_id: Session ID.
763
+ callback: Callback function to remove.
764
+ """
765
+ session = self._sessions.get(session_id)
766
+ if session and callback in session._alert_callbacks:
767
+ session._alert_callbacks.remove(callback)
768
+
769
+ async def _trigger_alert_callbacks(
770
+ self,
771
+ session: StreamingSession,
772
+ alert: StreamingAlert,
773
+ ) -> None:
774
+ """Trigger all registered alert callbacks.
775
+
776
+ Args:
777
+ session: Streaming session.
778
+ alert: Alert to send.
779
+ """
780
+ for callback in session._alert_callbacks:
781
+ try:
782
+ if asyncio.iscoroutinefunction(callback):
783
+ await callback(alert)
784
+ else:
785
+ callback(alert)
786
+ except Exception:
787
+ pass # Don't let callback errors break detection
788
+
789
+ async def get_alerts(
790
+ self,
791
+ session_id: str,
792
+ *,
793
+ limit: int = 100,
794
+ offset: int = 0,
795
+ ) -> list[StreamingAlert]:
796
+ """Get alerts for a session.
797
+
798
+ Args:
799
+ session_id: Session ID.
800
+ limit: Maximum alerts to return.
801
+ offset: Offset for pagination.
802
+
803
+ Returns:
804
+ List of alerts.
805
+ """
806
+ session = self._sessions.get(session_id)
807
+ if session is None:
808
+ return []
809
+
810
+ # Return alerts in reverse order (most recent first)
811
+ alerts = list(reversed(session._alerts))
812
+ return alerts[offset : offset + limit]
813
+
814
+ async def get_statistics(
815
+ self,
816
+ session_id: str,
817
+ ) -> dict[str, Any]:
818
+ """Get statistics for a session.
819
+
820
+ Args:
821
+ session_id: Session ID.
822
+
823
+ Returns:
824
+ Statistics dictionary.
825
+ """
826
+ session = self._sessions.get(session_id)
827
+ if session is None:
828
+ return {}
829
+
830
+ return {
831
+ "total_points": len(session._buffer),
832
+ "total_alerts": len(session._alerts),
833
+ "columns": {
834
+ col: stats.to_dict()
835
+ for col, stats in session._column_stats.items()
836
+ },
837
+ "buffer_utilization": len(session._buffer) / session._buffer.maxlen if session._buffer.maxlen else 0,
838
+ }
839
+
840
+ async def get_recent_data(
841
+ self,
842
+ session_id: str,
843
+ *,
844
+ limit: int = 100,
845
+ ) -> list[dict[str, Any]]:
846
+ """Get recent data points.
847
+
848
+ Args:
849
+ session_id: Session ID.
850
+ limit: Maximum points to return.
851
+
852
+ Returns:
853
+ List of recent data points.
854
+ """
855
+ session = self._sessions.get(session_id)
856
+ if session is None:
857
+ return []
858
+
859
+ # Return most recent points
860
+ recent = list(session._buffer)[-limit:]
861
+ return [
862
+ {
863
+ "timestamp": point["timestamp"].isoformat(),
864
+ "data": point["data"],
865
+ }
866
+ for point in reversed(recent)
867
+ ]
868
+
869
+
870
+ # Global streaming detector instance
871
+ _streaming_detector: StreamingAnomalyDetector | None = None
872
+
873
+
874
+ def get_streaming_detector() -> StreamingAnomalyDetector:
875
+ """Get the global streaming detector instance.
876
+
877
+ Returns:
878
+ StreamingAnomalyDetector instance.
879
+ """
880
+ global _streaming_detector
881
+ if _streaming_detector is None:
882
+ _streaming_detector = StreamingAnomalyDetector()
883
+ return _streaming_detector