truthound-dashboard 1.3.1__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. truthound_dashboard/api/alerts.py +258 -0
  2. truthound_dashboard/api/anomaly.py +1302 -0
  3. truthound_dashboard/api/cross_alerts.py +352 -0
  4. truthound_dashboard/api/deps.py +143 -0
  5. truthound_dashboard/api/drift_monitor.py +540 -0
  6. truthound_dashboard/api/lineage.py +1151 -0
  7. truthound_dashboard/api/maintenance.py +363 -0
  8. truthound_dashboard/api/middleware.py +373 -1
  9. truthound_dashboard/api/model_monitoring.py +805 -0
  10. truthound_dashboard/api/notifications_advanced.py +2452 -0
  11. truthound_dashboard/api/plugins.py +2096 -0
  12. truthound_dashboard/api/profile.py +211 -14
  13. truthound_dashboard/api/reports.py +853 -0
  14. truthound_dashboard/api/router.py +147 -0
  15. truthound_dashboard/api/rule_suggestions.py +310 -0
  16. truthound_dashboard/api/schema_evolution.py +231 -0
  17. truthound_dashboard/api/sources.py +47 -3
  18. truthound_dashboard/api/triggers.py +190 -0
  19. truthound_dashboard/api/validations.py +13 -0
  20. truthound_dashboard/api/validators.py +333 -4
  21. truthound_dashboard/api/versioning.py +309 -0
  22. truthound_dashboard/api/websocket.py +301 -0
  23. truthound_dashboard/core/__init__.py +27 -0
  24. truthound_dashboard/core/anomaly.py +1395 -0
  25. truthound_dashboard/core/anomaly_explainer.py +633 -0
  26. truthound_dashboard/core/cache.py +206 -0
  27. truthound_dashboard/core/cached_services.py +422 -0
  28. truthound_dashboard/core/charts.py +352 -0
  29. truthound_dashboard/core/connections.py +1069 -42
  30. truthound_dashboard/core/cross_alerts.py +837 -0
  31. truthound_dashboard/core/drift_monitor.py +1477 -0
  32. truthound_dashboard/core/drift_sampling.py +669 -0
  33. truthound_dashboard/core/i18n/__init__.py +42 -0
  34. truthound_dashboard/core/i18n/detector.py +173 -0
  35. truthound_dashboard/core/i18n/messages.py +564 -0
  36. truthound_dashboard/core/lineage.py +971 -0
  37. truthound_dashboard/core/maintenance.py +443 -5
  38. truthound_dashboard/core/model_monitoring.py +1043 -0
  39. truthound_dashboard/core/notifications/channels.py +1020 -1
  40. truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
  41. truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
  42. truthound_dashboard/core/notifications/deduplication/service.py +400 -0
  43. truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
  44. truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
  45. truthound_dashboard/core/notifications/dispatcher.py +43 -0
  46. truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
  47. truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
  48. truthound_dashboard/core/notifications/escalation/engine.py +429 -0
  49. truthound_dashboard/core/notifications/escalation/models.py +336 -0
  50. truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
  51. truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
  52. truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
  53. truthound_dashboard/core/notifications/events.py +49 -0
  54. truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
  55. truthound_dashboard/core/notifications/metrics/base.py +528 -0
  56. truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
  57. truthound_dashboard/core/notifications/routing/__init__.py +169 -0
  58. truthound_dashboard/core/notifications/routing/combinators.py +184 -0
  59. truthound_dashboard/core/notifications/routing/config.py +375 -0
  60. truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
  61. truthound_dashboard/core/notifications/routing/engine.py +382 -0
  62. truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
  63. truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
  64. truthound_dashboard/core/notifications/routing/rules.py +625 -0
  65. truthound_dashboard/core/notifications/routing/validator.py +678 -0
  66. truthound_dashboard/core/notifications/service.py +2 -0
  67. truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
  68. truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
  69. truthound_dashboard/core/notifications/throttling/builder.py +311 -0
  70. truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
  71. truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
  72. truthound_dashboard/core/openlineage.py +1028 -0
  73. truthound_dashboard/core/plugins/__init__.py +39 -0
  74. truthound_dashboard/core/plugins/docs/__init__.py +39 -0
  75. truthound_dashboard/core/plugins/docs/extractor.py +703 -0
  76. truthound_dashboard/core/plugins/docs/renderers.py +804 -0
  77. truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
  78. truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
  79. truthound_dashboard/core/plugins/hooks/manager.py +403 -0
  80. truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
  81. truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
  82. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
  83. truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
  84. truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
  85. truthound_dashboard/core/plugins/loader.py +504 -0
  86. truthound_dashboard/core/plugins/registry.py +810 -0
  87. truthound_dashboard/core/plugins/reporter_executor.py +588 -0
  88. truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
  89. truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
  90. truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
  91. truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
  92. truthound_dashboard/core/plugins/sandbox.py +617 -0
  93. truthound_dashboard/core/plugins/security/__init__.py +68 -0
  94. truthound_dashboard/core/plugins/security/analyzer.py +535 -0
  95. truthound_dashboard/core/plugins/security/policies.py +311 -0
  96. truthound_dashboard/core/plugins/security/protocols.py +296 -0
  97. truthound_dashboard/core/plugins/security/signing.py +842 -0
  98. truthound_dashboard/core/plugins/security.py +446 -0
  99. truthound_dashboard/core/plugins/validator_executor.py +401 -0
  100. truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
  101. truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
  102. truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
  103. truthound_dashboard/core/plugins/versioning/semver.py +266 -0
  104. truthound_dashboard/core/profile_comparison.py +601 -0
  105. truthound_dashboard/core/report_history.py +570 -0
  106. truthound_dashboard/core/reporters/__init__.py +57 -0
  107. truthound_dashboard/core/reporters/base.py +296 -0
  108. truthound_dashboard/core/reporters/csv_reporter.py +155 -0
  109. truthound_dashboard/core/reporters/html_reporter.py +598 -0
  110. truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
  111. truthound_dashboard/core/reporters/i18n/base.py +494 -0
  112. truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
  113. truthound_dashboard/core/reporters/json_reporter.py +160 -0
  114. truthound_dashboard/core/reporters/junit_reporter.py +233 -0
  115. truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
  116. truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
  117. truthound_dashboard/core/reporters/registry.py +272 -0
  118. truthound_dashboard/core/rule_generator.py +2088 -0
  119. truthound_dashboard/core/scheduler.py +822 -12
  120. truthound_dashboard/core/schema_evolution.py +858 -0
  121. truthound_dashboard/core/services.py +152 -9
  122. truthound_dashboard/core/statistics.py +718 -0
  123. truthound_dashboard/core/streaming_anomaly.py +883 -0
  124. truthound_dashboard/core/triggers/__init__.py +45 -0
  125. truthound_dashboard/core/triggers/base.py +226 -0
  126. truthound_dashboard/core/triggers/evaluators.py +609 -0
  127. truthound_dashboard/core/triggers/factory.py +363 -0
  128. truthound_dashboard/core/unified_alerts.py +870 -0
  129. truthound_dashboard/core/validation_limits.py +509 -0
  130. truthound_dashboard/core/versioning.py +709 -0
  131. truthound_dashboard/core/websocket/__init__.py +59 -0
  132. truthound_dashboard/core/websocket/manager.py +512 -0
  133. truthound_dashboard/core/websocket/messages.py +130 -0
  134. truthound_dashboard/db/__init__.py +30 -0
  135. truthound_dashboard/db/models.py +3375 -3
  136. truthound_dashboard/main.py +22 -0
  137. truthound_dashboard/schemas/__init__.py +396 -1
  138. truthound_dashboard/schemas/anomaly.py +1258 -0
  139. truthound_dashboard/schemas/base.py +4 -0
  140. truthound_dashboard/schemas/cross_alerts.py +334 -0
  141. truthound_dashboard/schemas/drift_monitor.py +890 -0
  142. truthound_dashboard/schemas/lineage.py +428 -0
  143. truthound_dashboard/schemas/maintenance.py +154 -0
  144. truthound_dashboard/schemas/model_monitoring.py +374 -0
  145. truthound_dashboard/schemas/notifications_advanced.py +1363 -0
  146. truthound_dashboard/schemas/openlineage.py +704 -0
  147. truthound_dashboard/schemas/plugins.py +1293 -0
  148. truthound_dashboard/schemas/profile.py +420 -34
  149. truthound_dashboard/schemas/profile_comparison.py +242 -0
  150. truthound_dashboard/schemas/reports.py +285 -0
  151. truthound_dashboard/schemas/rule_suggestion.py +434 -0
  152. truthound_dashboard/schemas/schema_evolution.py +164 -0
  153. truthound_dashboard/schemas/source.py +117 -2
  154. truthound_dashboard/schemas/triggers.py +511 -0
  155. truthound_dashboard/schemas/unified_alerts.py +223 -0
  156. truthound_dashboard/schemas/validation.py +25 -1
  157. truthound_dashboard/schemas/validators/__init__.py +11 -0
  158. truthound_dashboard/schemas/validators/base.py +151 -0
  159. truthound_dashboard/schemas/versioning.py +152 -0
  160. truthound_dashboard/static/index.html +2 -2
  161. {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/METADATA +147 -23
  162. truthound_dashboard-1.4.1.dist-info/RECORD +239 -0
  163. truthound_dashboard/static/assets/index-BZG20KuF.js +0 -586
  164. truthound_dashboard/static/assets/index-D_HyZ3pb.css +0 -1
  165. truthound_dashboard/static/assets/unmerged_dictionaries-CtpqQBm0.js +0 -1
  166. truthound_dashboard-1.3.1.dist-info/RECORD +0 -110
  167. {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/WHEEL +0 -0
  168. {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/entry_points.txt +0 -0
  169. {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,971 @@
1
+ """Data lineage service.
2
+
3
+ This module provides services for managing data lineage graphs,
4
+ including node and edge CRUD, impact analysis, and auto-discovery.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from collections.abc import Sequence
10
+ from datetime import datetime
11
+ from typing import Any, Literal
12
+
13
+ from sqlalchemy import select
14
+ from sqlalchemy.ext.asyncio import AsyncSession
15
+
16
+ from truthound_dashboard.db import BaseRepository
17
+ from truthound_dashboard.db.models import AnomalyDetection, LineageEdge, LineageNode, Source
18
+
19
+
20
+ class LineageNodeRepository(BaseRepository[LineageNode]):
21
+ """Repository for LineageNode model operations."""
22
+
23
+ model = LineageNode
24
+
25
+ async def get_all_nodes(
26
+ self,
27
+ *,
28
+ offset: int = 0,
29
+ limit: int = 500,
30
+ ) -> Sequence[LineageNode]:
31
+ """Get all lineage nodes.
32
+
33
+ Args:
34
+ offset: Number to skip.
35
+ limit: Maximum to return.
36
+
37
+ Returns:
38
+ Sequence of lineage nodes.
39
+ """
40
+ return await self.list(offset=offset, limit=limit)
41
+
42
+ async def get_by_source_id(self, source_id: str) -> LineageNode | None:
43
+ """Get lineage node by linked source ID.
44
+
45
+ Args:
46
+ source_id: Data source ID.
47
+
48
+ Returns:
49
+ LineageNode or None.
50
+ """
51
+ result = await self.session.execute(
52
+ select(LineageNode).where(LineageNode.source_id == source_id).limit(1)
53
+ )
54
+ return result.scalar_one_or_none()
55
+
56
+ async def get_nodes_by_type(
57
+ self,
58
+ node_type: str,
59
+ *,
60
+ limit: int = 100,
61
+ ) -> Sequence[LineageNode]:
62
+ """Get nodes by type.
63
+
64
+ Args:
65
+ node_type: Node type (source, transform, sink).
66
+ limit: Maximum to return.
67
+
68
+ Returns:
69
+ Sequence of nodes.
70
+ """
71
+ return await self.list(
72
+ limit=limit,
73
+ filters=[LineageNode.node_type == node_type],
74
+ )
75
+
76
+
77
+ class LineageEdgeRepository(BaseRepository[LineageEdge]):
78
+ """Repository for LineageEdge model operations."""
79
+
80
+ model = LineageEdge
81
+
82
+ async def get_all_edges(
83
+ self,
84
+ *,
85
+ offset: int = 0,
86
+ limit: int = 1000,
87
+ ) -> Sequence[LineageEdge]:
88
+ """Get all lineage edges.
89
+
90
+ Args:
91
+ offset: Number to skip.
92
+ limit: Maximum to return.
93
+
94
+ Returns:
95
+ Sequence of lineage edges.
96
+ """
97
+ return await self.list(offset=offset, limit=limit)
98
+
99
+ async def get_outgoing_edges(
100
+ self,
101
+ node_id: str,
102
+ *,
103
+ limit: int = 100,
104
+ ) -> Sequence[LineageEdge]:
105
+ """Get outgoing edges from a node.
106
+
107
+ Args:
108
+ node_id: Source node ID.
109
+ limit: Maximum to return.
110
+
111
+ Returns:
112
+ Sequence of edges.
113
+ """
114
+ return await self.list(
115
+ limit=limit,
116
+ filters=[LineageEdge.source_node_id == node_id],
117
+ )
118
+
119
+ async def get_incoming_edges(
120
+ self,
121
+ node_id: str,
122
+ *,
123
+ limit: int = 100,
124
+ ) -> Sequence[LineageEdge]:
125
+ """Get incoming edges to a node.
126
+
127
+ Args:
128
+ node_id: Target node ID.
129
+ limit: Maximum to return.
130
+
131
+ Returns:
132
+ Sequence of edges.
133
+ """
134
+ return await self.list(
135
+ limit=limit,
136
+ filters=[LineageEdge.target_node_id == node_id],
137
+ )
138
+
139
+ async def edge_exists(
140
+ self,
141
+ source_node_id: str,
142
+ target_node_id: str,
143
+ edge_type: str = "derives_from",
144
+ ) -> bool:
145
+ """Check if an edge already exists.
146
+
147
+ Args:
148
+ source_node_id: Source node ID.
149
+ target_node_id: Target node ID.
150
+ edge_type: Edge type.
151
+
152
+ Returns:
153
+ True if edge exists.
154
+ """
155
+ result = await self.session.execute(
156
+ select(LineageEdge)
157
+ .where(LineageEdge.source_node_id == source_node_id)
158
+ .where(LineageEdge.target_node_id == target_node_id)
159
+ .where(LineageEdge.edge_type == edge_type)
160
+ .limit(1)
161
+ )
162
+ return result.scalar_one_or_none() is not None
163
+
164
+
165
+ class LineageService:
166
+ """Service for managing data lineage graphs.
167
+
168
+ Provides functionality for:
169
+ - Node and edge CRUD operations
170
+ - Impact analysis (upstream/downstream)
171
+ - Auto-discovery from source metadata
172
+ - Position management for visualization
173
+ """
174
+
175
+ def __init__(self, session: AsyncSession) -> None:
176
+ """Initialize service.
177
+
178
+ Args:
179
+ session: Database session.
180
+ """
181
+ self.session = session
182
+ self.node_repo = LineageNodeRepository(session)
183
+ self.edge_repo = LineageEdgeRepository(session)
184
+
185
+ # =========================================================================
186
+ # Graph Operations
187
+ # =========================================================================
188
+
189
+ async def get_graph(
190
+ self,
191
+ source_id: str | None = None,
192
+ ) -> dict[str, Any]:
193
+ """Get the full lineage graph or filtered by source.
194
+
195
+ Args:
196
+ source_id: Optional source ID to filter by.
197
+
198
+ Returns:
199
+ Dictionary with nodes and edges.
200
+ """
201
+ if source_id:
202
+ # Get node for source and its connected nodes
203
+ root_node = await self.node_repo.get_by_source_id(source_id)
204
+ if not root_node:
205
+ return {"nodes": [], "edges": [], "total_nodes": 0, "total_edges": 0}
206
+
207
+ # Get all connected nodes (simplified - could use BFS for deeper traversal)
208
+ node_ids = {root_node.id}
209
+ nodes = [root_node]
210
+
211
+ # Get outgoing edges
212
+ outgoing = await self.edge_repo.get_outgoing_edges(root_node.id)
213
+ for edge in outgoing:
214
+ if edge.target_node_id not in node_ids:
215
+ target = await self.node_repo.get_by_id(edge.target_node_id)
216
+ if target:
217
+ nodes.append(target)
218
+ node_ids.add(target.id)
219
+
220
+ # Get incoming edges
221
+ incoming = await self.edge_repo.get_incoming_edges(root_node.id)
222
+ for edge in incoming:
223
+ if edge.source_node_id not in node_ids:
224
+ source_node = await self.node_repo.get_by_id(edge.source_node_id)
225
+ if source_node:
226
+ nodes.append(source_node)
227
+ node_ids.add(source_node.id)
228
+
229
+ # Get all edges between these nodes
230
+ all_edges = await self.edge_repo.get_all_edges(limit=1000)
231
+ edges = [
232
+ e
233
+ for e in all_edges
234
+ if e.source_node_id in node_ids and e.target_node_id in node_ids
235
+ ]
236
+ else:
237
+ nodes = list(await self.node_repo.get_all_nodes())
238
+ edges = list(await self.edge_repo.get_all_edges())
239
+
240
+ return {
241
+ "nodes": [self._node_to_dict(n) for n in nodes],
242
+ "edges": [self._edge_to_dict(e) for e in edges],
243
+ "total_nodes": len(nodes),
244
+ "total_edges": len(edges),
245
+ }
246
+
247
+ # =========================================================================
248
+ # Node Operations
249
+ # =========================================================================
250
+
251
+ async def create_node(
252
+ self,
253
+ *,
254
+ name: str,
255
+ node_type: str,
256
+ source_id: str | None = None,
257
+ metadata: dict[str, Any] | None = None,
258
+ position_x: float | None = None,
259
+ position_y: float | None = None,
260
+ ) -> LineageNode:
261
+ """Create a new lineage node.
262
+
263
+ Args:
264
+ name: Node name.
265
+ node_type: Node type (source, transform, sink).
266
+ source_id: Optional linked data source ID.
267
+ metadata: Optional additional metadata.
268
+ position_x: X coordinate for visualization.
269
+ position_y: Y coordinate for visualization.
270
+
271
+ Returns:
272
+ Created node.
273
+ """
274
+ node = await self.node_repo.create(
275
+ name=name,
276
+ node_type=node_type,
277
+ source_id=source_id,
278
+ metadata_json=metadata,
279
+ position_x=position_x,
280
+ position_y=position_y,
281
+ )
282
+ return node
283
+
284
+ async def get_node(self, node_id: str) -> LineageNode | None:
285
+ """Get a node by ID.
286
+
287
+ Args:
288
+ node_id: Node ID.
289
+
290
+ Returns:
291
+ LineageNode or None.
292
+ """
293
+ return await self.node_repo.get_by_id(node_id)
294
+
295
+ async def update_node(
296
+ self,
297
+ node_id: str,
298
+ *,
299
+ name: str | None = None,
300
+ metadata: dict[str, Any] | None = None,
301
+ position_x: float | None = None,
302
+ position_y: float | None = None,
303
+ ) -> LineageNode | None:
304
+ """Update a lineage node.
305
+
306
+ Args:
307
+ node_id: Node ID.
308
+ name: New name.
309
+ metadata: New metadata.
310
+ position_x: New X coordinate.
311
+ position_y: New Y coordinate.
312
+
313
+ Returns:
314
+ Updated node or None.
315
+ """
316
+ node = await self.node_repo.get_by_id(node_id)
317
+ if node is None:
318
+ return None
319
+
320
+ if name is not None:
321
+ node.name = name
322
+ if metadata is not None:
323
+ node.metadata_json = metadata
324
+ if position_x is not None:
325
+ node.position_x = position_x
326
+ if position_y is not None:
327
+ node.position_y = position_y
328
+
329
+ await self.session.flush()
330
+ await self.session.refresh(node)
331
+ return node
332
+
333
+ async def delete_node(self, node_id: str) -> bool:
334
+ """Delete a node and its edges.
335
+
336
+ Args:
337
+ node_id: Node ID.
338
+
339
+ Returns:
340
+ True if deleted.
341
+ """
342
+ return await self.node_repo.delete(node_id)
343
+
344
+ # =========================================================================
345
+ # Edge Operations
346
+ # =========================================================================
347
+
348
+ async def create_edge(
349
+ self,
350
+ *,
351
+ source_node_id: str,
352
+ target_node_id: str,
353
+ edge_type: str = "derives_from",
354
+ metadata: dict[str, Any] | None = None,
355
+ ) -> LineageEdge:
356
+ """Create a new lineage edge.
357
+
358
+ Args:
359
+ source_node_id: Source node ID.
360
+ target_node_id: Target node ID.
361
+ edge_type: Edge type.
362
+ metadata: Optional additional metadata.
363
+
364
+ Returns:
365
+ Created edge.
366
+
367
+ Raises:
368
+ ValueError: If source or target node not found, or edge already exists.
369
+ """
370
+ # Verify nodes exist
371
+ source_node = await self.node_repo.get_by_id(source_node_id)
372
+ if source_node is None:
373
+ raise ValueError(f"Source node '{source_node_id}' not found")
374
+
375
+ target_node = await self.node_repo.get_by_id(target_node_id)
376
+ if target_node is None:
377
+ raise ValueError(f"Target node '{target_node_id}' not found")
378
+
379
+ # Check for duplicate
380
+ if await self.edge_repo.edge_exists(source_node_id, target_node_id, edge_type):
381
+ raise ValueError("Edge already exists")
382
+
383
+ edge = await self.edge_repo.create(
384
+ source_node_id=source_node_id,
385
+ target_node_id=target_node_id,
386
+ edge_type=edge_type,
387
+ metadata_json=metadata,
388
+ )
389
+ return edge
390
+
391
+ async def get_edge(self, edge_id: str) -> LineageEdge | None:
392
+ """Get an edge by ID.
393
+
394
+ Args:
395
+ edge_id: Edge ID.
396
+
397
+ Returns:
398
+ LineageEdge or None.
399
+ """
400
+ return await self.edge_repo.get_by_id(edge_id)
401
+
402
+ async def delete_edge(self, edge_id: str) -> bool:
403
+ """Delete an edge.
404
+
405
+ Args:
406
+ edge_id: Edge ID.
407
+
408
+ Returns:
409
+ True if deleted.
410
+ """
411
+ return await self.edge_repo.delete(edge_id)
412
+
413
+ # =========================================================================
414
+ # Impact Analysis
415
+ # =========================================================================
416
+
417
+ async def analyze_impact(
418
+ self,
419
+ node_id: str,
420
+ direction: Literal["upstream", "downstream", "both"] = "both",
421
+ max_depth: int = 10,
422
+ ) -> dict[str, Any]:
423
+ """Analyze upstream/downstream impact from a node.
424
+
425
+ Args:
426
+ node_id: Starting node ID.
427
+ direction: Analysis direction.
428
+ max_depth: Maximum traversal depth.
429
+
430
+ Returns:
431
+ Impact analysis results.
432
+
433
+ Raises:
434
+ ValueError: If node not found.
435
+ """
436
+ root_node = await self.node_repo.get_by_id(node_id)
437
+ if root_node is None:
438
+ raise ValueError(f"Node '{node_id}' not found")
439
+
440
+ upstream_nodes: list[dict[str, Any]] = []
441
+ downstream_nodes: list[dict[str, Any]] = []
442
+ affected_sources: set[str] = set()
443
+
444
+ if direction in ("upstream", "both"):
445
+ upstream = await self._traverse_upstream(node_id, max_depth)
446
+ upstream_nodes = [self._node_summary(n) for n in upstream]
447
+ for n in upstream:
448
+ if n.source_id:
449
+ affected_sources.add(n.source_id)
450
+
451
+ if direction in ("downstream", "both"):
452
+ downstream = await self._traverse_downstream(node_id, max_depth)
453
+ downstream_nodes = [self._node_summary(n) for n in downstream]
454
+ for n in downstream:
455
+ if n.source_id:
456
+ affected_sources.add(n.source_id)
457
+
458
+ return {
459
+ "root_node_id": node_id,
460
+ "root_node_name": root_node.name,
461
+ "direction": direction,
462
+ "upstream_nodes": upstream_nodes,
463
+ "downstream_nodes": downstream_nodes,
464
+ "affected_sources": list(affected_sources),
465
+ "upstream_count": len(upstream_nodes),
466
+ "downstream_count": len(downstream_nodes),
467
+ "total_affected": len(upstream_nodes) + len(downstream_nodes),
468
+ }
469
+
470
+ async def _traverse_upstream(
471
+ self,
472
+ node_id: str,
473
+ max_depth: int,
474
+ visited: set[str] | None = None,
475
+ depth: int = 0,
476
+ ) -> list[LineageNode]:
477
+ """Traverse upstream (parents) from a node."""
478
+ if visited is None:
479
+ visited = set()
480
+
481
+ if depth >= max_depth or node_id in visited:
482
+ return []
483
+
484
+ visited.add(node_id)
485
+ result: list[LineageNode] = []
486
+
487
+ incoming = await self.edge_repo.get_incoming_edges(node_id)
488
+ for edge in incoming:
489
+ parent = await self.node_repo.get_by_id(edge.source_node_id)
490
+ if parent and parent.id not in visited:
491
+ result.append(parent)
492
+ result.extend(
493
+ await self._traverse_upstream(
494
+ parent.id, max_depth, visited, depth + 1
495
+ )
496
+ )
497
+
498
+ return result
499
+
500
+ async def _traverse_downstream(
501
+ self,
502
+ node_id: str,
503
+ max_depth: int,
504
+ visited: set[str] | None = None,
505
+ depth: int = 0,
506
+ ) -> list[LineageNode]:
507
+ """Traverse downstream (children) from a node."""
508
+ if visited is None:
509
+ visited = set()
510
+
511
+ if depth >= max_depth or node_id in visited:
512
+ return []
513
+
514
+ visited.add(node_id)
515
+ result: list[LineageNode] = []
516
+
517
+ outgoing = await self.edge_repo.get_outgoing_edges(node_id)
518
+ for edge in outgoing:
519
+ child = await self.node_repo.get_by_id(edge.target_node_id)
520
+ if child and child.id not in visited:
521
+ result.append(child)
522
+ result.extend(
523
+ await self._traverse_downstream(
524
+ child.id, max_depth, visited, depth + 1
525
+ )
526
+ )
527
+
528
+ return result
529
+
530
+ # =========================================================================
531
+ # Auto-Discovery
532
+ # =========================================================================
533
+
534
+ async def auto_discover(
535
+ self,
536
+ source_id: str,
537
+ include_fk_relations: bool = True,
538
+ max_depth: int = 3,
539
+ ) -> dict[str, Any]:
540
+ """Auto-discover lineage from a data source.
541
+
542
+ This is a placeholder for more sophisticated discovery logic.
543
+ In a real implementation, this would analyze source metadata,
544
+ SQL queries, or foreign key relationships.
545
+
546
+ Args:
547
+ source_id: Source ID to discover from.
548
+ include_fk_relations: Include foreign key relationships (for DB sources).
549
+ max_depth: Maximum discovery depth.
550
+
551
+ Returns:
552
+ Discovered graph.
553
+ """
554
+ # Check if node already exists for this source
555
+ existing_node = await self.node_repo.get_by_source_id(source_id)
556
+ if existing_node:
557
+ return await self.get_graph(source_id)
558
+
559
+ # Get source info
560
+ from truthound_dashboard.db import Source as SourceModel
561
+
562
+ result = await self.session.execute(
563
+ select(SourceModel).where(SourceModel.id == source_id)
564
+ )
565
+ source = result.scalar_one_or_none()
566
+ if source is None:
567
+ raise ValueError(f"Source '{source_id}' not found")
568
+
569
+ # Create a node for this source
570
+ node = await self.create_node(
571
+ name=source.name,
572
+ node_type="source",
573
+ source_id=source_id,
574
+ metadata={"auto_discovered": True, "source_type": source.type},
575
+ position_x=100,
576
+ position_y=100,
577
+ )
578
+
579
+ return {
580
+ "source_id": source_id,
581
+ "discovered_nodes": 1,
582
+ "discovered_edges": 0,
583
+ "graph": await self.get_graph(source_id),
584
+ }
585
+
586
+ # =========================================================================
587
+ # Position Management
588
+ # =========================================================================
589
+
590
+ async def update_positions(
591
+ self,
592
+ positions: list[dict[str, Any]],
593
+ ) -> int:
594
+ """Batch update node positions.
595
+
596
+ Args:
597
+ positions: List of {id, x, y} dictionaries.
598
+
599
+ Returns:
600
+ Number of positions updated.
601
+ """
602
+ updated = 0
603
+ for pos in positions:
604
+ node = await self.node_repo.get_by_id(pos["id"])
605
+ if node:
606
+ node.position_x = pos.get("x")
607
+ node.position_y = pos.get("y")
608
+ updated += 1
609
+
610
+ await self.session.flush()
611
+ return updated
612
+
613
+ # =========================================================================
614
+ # Anomaly Integration
615
+ # =========================================================================
616
+
617
+ async def get_nodes_with_anomaly_status(self) -> list[dict[str, Any]]:
618
+ """Get all nodes with their latest anomaly detection status.
619
+
620
+ Returns:
621
+ List of nodes with anomaly status overlay data.
622
+ """
623
+ nodes = await self.node_repo.get_all_nodes()
624
+ result = []
625
+
626
+ for node in nodes:
627
+ node_dict = self._node_to_dict(node)
628
+
629
+ # Initialize anomaly status
630
+ anomaly_status: dict[str, Any] = {
631
+ "status": "unknown", # unknown, clean, low, medium, high
632
+ "anomaly_rate": None,
633
+ "anomaly_count": None,
634
+ "last_detection_at": None,
635
+ "algorithm": None,
636
+ }
637
+
638
+ # If node has a linked source, get its latest anomaly detection
639
+ if node.source_id:
640
+ detection = await self._get_latest_anomaly_for_source(node.source_id)
641
+ if detection:
642
+ anomaly_status = self._classify_anomaly_status(detection)
643
+
644
+ node_dict["anomaly_status"] = anomaly_status
645
+ result.append(node_dict)
646
+
647
+ return result
648
+
649
+ async def get_graph_with_anomalies(
650
+ self,
651
+ source_id: str | None = None,
652
+ ) -> dict[str, Any]:
653
+ """Get the lineage graph with anomaly status overlay.
654
+
655
+ Args:
656
+ source_id: Optional source ID to filter by.
657
+
658
+ Returns:
659
+ Graph with anomaly status for each node.
660
+ """
661
+ # Get base graph
662
+ graph = await self.get_graph(source_id=source_id)
663
+
664
+ # Enhance nodes with anomaly status
665
+ enhanced_nodes = []
666
+ for node_dict in graph["nodes"]:
667
+ anomaly_status: dict[str, Any] = {
668
+ "status": "unknown",
669
+ "anomaly_rate": None,
670
+ "anomaly_count": None,
671
+ "last_detection_at": None,
672
+ "algorithm": None,
673
+ }
674
+
675
+ if node_dict.get("source_id"):
676
+ detection = await self._get_latest_anomaly_for_source(
677
+ node_dict["source_id"]
678
+ )
679
+ if detection:
680
+ anomaly_status = self._classify_anomaly_status(detection)
681
+
682
+ node_dict["anomaly_status"] = anomaly_status
683
+ enhanced_nodes.append(node_dict)
684
+
685
+ return {
686
+ **graph,
687
+ "nodes": enhanced_nodes,
688
+ }
689
+
690
+ async def get_impacted_by_anomaly(
691
+ self,
692
+ source_id: str,
693
+ max_depth: int = 10,
694
+ ) -> dict[str, Any]:
695
+ """Get downstream nodes impacted by anomalies in a source.
696
+
697
+ This analyzes the anomaly status of a source and identifies all
698
+ downstream nodes that could be affected by data quality issues.
699
+
700
+ Args:
701
+ source_id: Source ID to analyze.
702
+ max_depth: Maximum traversal depth.
703
+
704
+ Returns:
705
+ Impact analysis including impacted nodes and severity.
706
+
707
+ Raises:
708
+ ValueError: If source not found.
709
+ """
710
+ # Find node for this source
711
+ node = await self.node_repo.get_by_source_id(source_id)
712
+ if node is None:
713
+ raise ValueError(f"No lineage node found for source '{source_id}'")
714
+
715
+ # Get anomaly status for the source
716
+ detection = await self._get_latest_anomaly_for_source(source_id)
717
+ source_anomaly_status = (
718
+ self._classify_anomaly_status(detection) if detection else None
719
+ )
720
+
721
+ # Traverse downstream to find impacted nodes
722
+ downstream = await self._traverse_downstream(node.id, max_depth)
723
+
724
+ # Build impact path information
725
+ impacted_nodes = []
726
+ for downstream_node in downstream:
727
+ node_info = self._node_summary(downstream_node)
728
+
729
+ # Get anomaly status for downstream node if it has a source
730
+ downstream_anomaly_status = None
731
+ if downstream_node.source_id:
732
+ downstream_detection = await self._get_latest_anomaly_for_source(
733
+ downstream_node.source_id
734
+ )
735
+ if downstream_detection:
736
+ downstream_anomaly_status = self._classify_anomaly_status(
737
+ downstream_detection
738
+ )
739
+
740
+ node_info["anomaly_status"] = downstream_anomaly_status
741
+ node_info["impact_severity"] = self._calculate_impact_severity(
742
+ source_anomaly_status, downstream_anomaly_status
743
+ )
744
+ impacted_nodes.append(node_info)
745
+
746
+ # Calculate overall impact severity
747
+ overall_severity = self._calculate_overall_severity(
748
+ source_anomaly_status, impacted_nodes
749
+ )
750
+
751
+ return {
752
+ "source_node_id": node.id,
753
+ "source_node_name": node.name,
754
+ "source_id": source_id,
755
+ "source_anomaly_status": source_anomaly_status,
756
+ "impacted_nodes": impacted_nodes,
757
+ "impacted_count": len(impacted_nodes),
758
+ "overall_severity": overall_severity,
759
+ "propagation_path": await self._build_propagation_path(
760
+ node.id, downstream
761
+ ),
762
+ }
763
+
764
+ async def _get_latest_anomaly_for_source(
765
+ self,
766
+ source_id: str,
767
+ ) -> AnomalyDetection | None:
768
+ """Get the latest successful anomaly detection for a source."""
769
+ result = await self.session.execute(
770
+ select(AnomalyDetection)
771
+ .where(AnomalyDetection.source_id == source_id)
772
+ .where(AnomalyDetection.status == "completed")
773
+ .order_by(AnomalyDetection.created_at.desc())
774
+ .limit(1)
775
+ )
776
+ return result.scalar_one_or_none()
777
+
778
+ def _classify_anomaly_status(
779
+ self,
780
+ detection: AnomalyDetection,
781
+ ) -> dict[str, Any]:
782
+ """Classify anomaly detection into status categories.
783
+
784
+ Args:
785
+ detection: Anomaly detection record.
786
+
787
+ Returns:
788
+ Anomaly status dictionary.
789
+ """
790
+ anomaly_rate = detection.anomaly_rate or 0.0
791
+
792
+ # Classify based on anomaly rate thresholds
793
+ if anomaly_rate >= 0.15: # 15%+ is high
794
+ status = "high"
795
+ elif anomaly_rate >= 0.05: # 5-15% is medium
796
+ status = "medium"
797
+ elif anomaly_rate > 0: # 0-5% is low
798
+ status = "low"
799
+ else:
800
+ status = "clean"
801
+
802
+ return {
803
+ "status": status,
804
+ "anomaly_rate": anomaly_rate,
805
+ "anomaly_count": detection.anomaly_count,
806
+ "last_detection_at": (
807
+ detection.completed_at.isoformat() if detection.completed_at else None
808
+ ),
809
+ "algorithm": detection.algorithm,
810
+ }
811
+
812
+ def _calculate_impact_severity(
813
+ self,
814
+ source_status: dict[str, Any] | None,
815
+ downstream_status: dict[str, Any] | None,
816
+ ) -> str:
817
+ """Calculate impact severity for a downstream node.
818
+
819
+ Args:
820
+ source_status: Anomaly status of source node.
821
+ downstream_status: Anomaly status of downstream node.
822
+
823
+ Returns:
824
+ Impact severity level.
825
+ """
826
+ if not source_status:
827
+ return "unknown"
828
+
829
+ source_level = source_status.get("status", "unknown")
830
+
831
+ # If downstream also has anomalies, amplify the severity
832
+ if downstream_status and downstream_status.get("status") in ("medium", "high"):
833
+ if source_level == "high":
834
+ return "critical"
835
+ elif source_level == "medium":
836
+ return "high"
837
+ else:
838
+ return "medium"
839
+
840
+ # Map source anomaly status to impact severity
841
+ severity_map = {
842
+ "high": "high",
843
+ "medium": "medium",
844
+ "low": "low",
845
+ "clean": "none",
846
+ "unknown": "unknown",
847
+ }
848
+ return severity_map.get(source_level, "unknown")
849
+
850
+ def _calculate_overall_severity(
851
+ self,
852
+ source_status: dict[str, Any] | None,
853
+ impacted_nodes: list[dict[str, Any]],
854
+ ) -> str:
855
+ """Calculate overall impact severity across all impacted nodes.
856
+
857
+ Args:
858
+ source_status: Source anomaly status.
859
+ impacted_nodes: List of impacted nodes with severity.
860
+
861
+ Returns:
862
+ Overall severity level.
863
+ """
864
+ if not source_status or source_status.get("status") == "clean":
865
+ return "none"
866
+
867
+ if not impacted_nodes:
868
+ return source_status.get("status", "unknown")
869
+
870
+ # Count severity levels
871
+ severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0}
872
+ for node in impacted_nodes:
873
+ severity = node.get("impact_severity", "unknown")
874
+ if severity in severity_counts:
875
+ severity_counts[severity] += 1
876
+
877
+ # Determine overall based on highest severity and count
878
+ if severity_counts["critical"] > 0:
879
+ return "critical"
880
+ elif severity_counts["high"] >= 3 or (
881
+ severity_counts["high"] > 0 and source_status.get("status") == "high"
882
+ ):
883
+ return "critical"
884
+ elif severity_counts["high"] > 0:
885
+ return "high"
886
+ elif severity_counts["medium"] >= 3:
887
+ return "high"
888
+ elif severity_counts["medium"] > 0:
889
+ return "medium"
890
+ elif severity_counts["low"] > 0:
891
+ return "low"
892
+ else:
893
+ return source_status.get("status", "unknown")
894
+
895
+ async def _build_propagation_path(
896
+ self,
897
+ root_node_id: str,
898
+ downstream_nodes: list[LineageNode],
899
+ ) -> list[dict[str, Any]]:
900
+ """Build a list of edges showing the propagation path.
901
+
902
+ Args:
903
+ root_node_id: Starting node ID.
904
+ downstream_nodes: List of downstream nodes.
905
+
906
+ Returns:
907
+ List of edges in the propagation path.
908
+ """
909
+ if not downstream_nodes:
910
+ return []
911
+
912
+ node_ids = {root_node_id} | {n.id for n in downstream_nodes}
913
+ all_edges = await self.edge_repo.get_all_edges(limit=1000)
914
+
915
+ path_edges = []
916
+ for edge in all_edges:
917
+ if (
918
+ edge.source_node_id in node_ids
919
+ and edge.target_node_id in node_ids
920
+ ):
921
+ path_edges.append({
922
+ "id": edge.id,
923
+ "source_node_id": edge.source_node_id,
924
+ "target_node_id": edge.target_node_id,
925
+ "edge_type": edge.edge_type,
926
+ })
927
+
928
+ return path_edges
929
+
930
+ # =========================================================================
931
+ # Helpers
932
+ # =========================================================================
933
+
934
+ def _node_to_dict(self, node: LineageNode) -> dict[str, Any]:
935
+ """Convert node to dictionary."""
936
+ return {
937
+ "id": node.id,
938
+ "name": node.name,
939
+ "node_type": node.node_type,
940
+ "source_id": node.source_id,
941
+ "source_name": node.source.name if node.source else None,
942
+ "metadata": node.metadata_json,
943
+ "position_x": node.position_x,
944
+ "position_y": node.position_y,
945
+ "upstream_count": node.upstream_count,
946
+ "downstream_count": node.downstream_count,
947
+ "created_at": node.created_at.isoformat() if node.created_at else None,
948
+ "updated_at": node.updated_at.isoformat() if node.updated_at else None,
949
+ }
950
+
951
+ def _edge_to_dict(self, edge: LineageEdge) -> dict[str, Any]:
952
+ """Convert edge to dictionary."""
953
+ return {
954
+ "id": edge.id,
955
+ "source_node_id": edge.source_node_id,
956
+ "target_node_id": edge.target_node_id,
957
+ "source_node_name": edge.source_node.name if edge.source_node else None,
958
+ "target_node_name": edge.target_node.name if edge.target_node else None,
959
+ "edge_type": edge.edge_type,
960
+ "metadata": edge.metadata_json,
961
+ "created_at": edge.created_at.isoformat() if edge.created_at else None,
962
+ }
963
+
964
+ def _node_summary(self, node: LineageNode) -> dict[str, Any]:
965
+ """Get minimal node summary."""
966
+ return {
967
+ "id": node.id,
968
+ "name": node.name,
969
+ "node_type": node.node_type,
970
+ "source_id": node.source_id,
971
+ }