truthound-dashboard 1.3.1__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +258 -0
- truthound_dashboard/api/anomaly.py +1302 -0
- truthound_dashboard/api/cross_alerts.py +352 -0
- truthound_dashboard/api/deps.py +143 -0
- truthound_dashboard/api/drift_monitor.py +540 -0
- truthound_dashboard/api/lineage.py +1151 -0
- truthound_dashboard/api/maintenance.py +363 -0
- truthound_dashboard/api/middleware.py +373 -1
- truthound_dashboard/api/model_monitoring.py +805 -0
- truthound_dashboard/api/notifications_advanced.py +2452 -0
- truthound_dashboard/api/plugins.py +2096 -0
- truthound_dashboard/api/profile.py +211 -14
- truthound_dashboard/api/reports.py +853 -0
- truthound_dashboard/api/router.py +147 -0
- truthound_dashboard/api/rule_suggestions.py +310 -0
- truthound_dashboard/api/schema_evolution.py +231 -0
- truthound_dashboard/api/sources.py +47 -3
- truthound_dashboard/api/triggers.py +190 -0
- truthound_dashboard/api/validations.py +13 -0
- truthound_dashboard/api/validators.py +333 -4
- truthound_dashboard/api/versioning.py +309 -0
- truthound_dashboard/api/websocket.py +301 -0
- truthound_dashboard/core/__init__.py +27 -0
- truthound_dashboard/core/anomaly.py +1395 -0
- truthound_dashboard/core/anomaly_explainer.py +633 -0
- truthound_dashboard/core/cache.py +206 -0
- truthound_dashboard/core/cached_services.py +422 -0
- truthound_dashboard/core/charts.py +352 -0
- truthound_dashboard/core/connections.py +1069 -42
- truthound_dashboard/core/cross_alerts.py +837 -0
- truthound_dashboard/core/drift_monitor.py +1477 -0
- truthound_dashboard/core/drift_sampling.py +669 -0
- truthound_dashboard/core/i18n/__init__.py +42 -0
- truthound_dashboard/core/i18n/detector.py +173 -0
- truthound_dashboard/core/i18n/messages.py +564 -0
- truthound_dashboard/core/lineage.py +971 -0
- truthound_dashboard/core/maintenance.py +443 -5
- truthound_dashboard/core/model_monitoring.py +1043 -0
- truthound_dashboard/core/notifications/channels.py +1020 -1
- truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
- truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
- truthound_dashboard/core/notifications/deduplication/service.py +400 -0
- truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
- truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
- truthound_dashboard/core/notifications/dispatcher.py +43 -0
- truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
- truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
- truthound_dashboard/core/notifications/escalation/engine.py +429 -0
- truthound_dashboard/core/notifications/escalation/models.py +336 -0
- truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
- truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
- truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
- truthound_dashboard/core/notifications/events.py +49 -0
- truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
- truthound_dashboard/core/notifications/metrics/base.py +528 -0
- truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
- truthound_dashboard/core/notifications/routing/__init__.py +169 -0
- truthound_dashboard/core/notifications/routing/combinators.py +184 -0
- truthound_dashboard/core/notifications/routing/config.py +375 -0
- truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
- truthound_dashboard/core/notifications/routing/engine.py +382 -0
- truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
- truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
- truthound_dashboard/core/notifications/routing/rules.py +625 -0
- truthound_dashboard/core/notifications/routing/validator.py +678 -0
- truthound_dashboard/core/notifications/service.py +2 -0
- truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
- truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
- truthound_dashboard/core/notifications/throttling/builder.py +311 -0
- truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
- truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
- truthound_dashboard/core/openlineage.py +1028 -0
- truthound_dashboard/core/plugins/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/extractor.py +703 -0
- truthound_dashboard/core/plugins/docs/renderers.py +804 -0
- truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
- truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
- truthound_dashboard/core/plugins/hooks/manager.py +403 -0
- truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
- truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
- truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
- truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
- truthound_dashboard/core/plugins/loader.py +504 -0
- truthound_dashboard/core/plugins/registry.py +810 -0
- truthound_dashboard/core/plugins/reporter_executor.py +588 -0
- truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
- truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
- truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
- truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
- truthound_dashboard/core/plugins/sandbox.py +617 -0
- truthound_dashboard/core/plugins/security/__init__.py +68 -0
- truthound_dashboard/core/plugins/security/analyzer.py +535 -0
- truthound_dashboard/core/plugins/security/policies.py +311 -0
- truthound_dashboard/core/plugins/security/protocols.py +296 -0
- truthound_dashboard/core/plugins/security/signing.py +842 -0
- truthound_dashboard/core/plugins/security.py +446 -0
- truthound_dashboard/core/plugins/validator_executor.py +401 -0
- truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
- truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
- truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
- truthound_dashboard/core/plugins/versioning/semver.py +266 -0
- truthound_dashboard/core/profile_comparison.py +601 -0
- truthound_dashboard/core/report_history.py +570 -0
- truthound_dashboard/core/reporters/__init__.py +57 -0
- truthound_dashboard/core/reporters/base.py +296 -0
- truthound_dashboard/core/reporters/csv_reporter.py +155 -0
- truthound_dashboard/core/reporters/html_reporter.py +598 -0
- truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
- truthound_dashboard/core/reporters/i18n/base.py +494 -0
- truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
- truthound_dashboard/core/reporters/json_reporter.py +160 -0
- truthound_dashboard/core/reporters/junit_reporter.py +233 -0
- truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
- truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
- truthound_dashboard/core/reporters/registry.py +272 -0
- truthound_dashboard/core/rule_generator.py +2088 -0
- truthound_dashboard/core/scheduler.py +822 -12
- truthound_dashboard/core/schema_evolution.py +858 -0
- truthound_dashboard/core/services.py +152 -9
- truthound_dashboard/core/statistics.py +718 -0
- truthound_dashboard/core/streaming_anomaly.py +883 -0
- truthound_dashboard/core/triggers/__init__.py +45 -0
- truthound_dashboard/core/triggers/base.py +226 -0
- truthound_dashboard/core/triggers/evaluators.py +609 -0
- truthound_dashboard/core/triggers/factory.py +363 -0
- truthound_dashboard/core/unified_alerts.py +870 -0
- truthound_dashboard/core/validation_limits.py +509 -0
- truthound_dashboard/core/versioning.py +709 -0
- truthound_dashboard/core/websocket/__init__.py +59 -0
- truthound_dashboard/core/websocket/manager.py +512 -0
- truthound_dashboard/core/websocket/messages.py +130 -0
- truthound_dashboard/db/__init__.py +30 -0
- truthound_dashboard/db/models.py +3375 -3
- truthound_dashboard/main.py +22 -0
- truthound_dashboard/schemas/__init__.py +396 -1
- truthound_dashboard/schemas/anomaly.py +1258 -0
- truthound_dashboard/schemas/base.py +4 -0
- truthound_dashboard/schemas/cross_alerts.py +334 -0
- truthound_dashboard/schemas/drift_monitor.py +890 -0
- truthound_dashboard/schemas/lineage.py +428 -0
- truthound_dashboard/schemas/maintenance.py +154 -0
- truthound_dashboard/schemas/model_monitoring.py +374 -0
- truthound_dashboard/schemas/notifications_advanced.py +1363 -0
- truthound_dashboard/schemas/openlineage.py +704 -0
- truthound_dashboard/schemas/plugins.py +1293 -0
- truthound_dashboard/schemas/profile.py +420 -34
- truthound_dashboard/schemas/profile_comparison.py +242 -0
- truthound_dashboard/schemas/reports.py +285 -0
- truthound_dashboard/schemas/rule_suggestion.py +434 -0
- truthound_dashboard/schemas/schema_evolution.py +164 -0
- truthound_dashboard/schemas/source.py +117 -2
- truthound_dashboard/schemas/triggers.py +511 -0
- truthound_dashboard/schemas/unified_alerts.py +223 -0
- truthound_dashboard/schemas/validation.py +25 -1
- truthound_dashboard/schemas/validators/__init__.py +11 -0
- truthound_dashboard/schemas/validators/base.py +151 -0
- truthound_dashboard/schemas/versioning.py +152 -0
- truthound_dashboard/static/index.html +2 -2
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/METADATA +147 -23
- truthound_dashboard-1.4.1.dist-info/RECORD +239 -0
- truthound_dashboard/static/assets/index-BZG20KuF.js +0 -586
- truthound_dashboard/static/assets/index-D_HyZ3pb.css +0 -1
- truthound_dashboard/static/assets/unmerged_dictionaries-CtpqQBm0.js +0 -1
- truthound_dashboard-1.3.1.dist-info/RECORD +0 -110
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,971 @@
|
|
|
1
|
+
"""Data lineage service.
|
|
2
|
+
|
|
3
|
+
This module provides services for managing data lineage graphs,
|
|
4
|
+
including node and edge CRUD, impact analysis, and auto-discovery.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from collections.abc import Sequence
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from typing import Any, Literal
|
|
12
|
+
|
|
13
|
+
from sqlalchemy import select
|
|
14
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
15
|
+
|
|
16
|
+
from truthound_dashboard.db import BaseRepository
|
|
17
|
+
from truthound_dashboard.db.models import AnomalyDetection, LineageEdge, LineageNode, Source
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class LineageNodeRepository(BaseRepository[LineageNode]):
|
|
21
|
+
"""Repository for LineageNode model operations."""
|
|
22
|
+
|
|
23
|
+
model = LineageNode
|
|
24
|
+
|
|
25
|
+
async def get_all_nodes(
|
|
26
|
+
self,
|
|
27
|
+
*,
|
|
28
|
+
offset: int = 0,
|
|
29
|
+
limit: int = 500,
|
|
30
|
+
) -> Sequence[LineageNode]:
|
|
31
|
+
"""Get all lineage nodes.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
offset: Number to skip.
|
|
35
|
+
limit: Maximum to return.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
Sequence of lineage nodes.
|
|
39
|
+
"""
|
|
40
|
+
return await self.list(offset=offset, limit=limit)
|
|
41
|
+
|
|
42
|
+
async def get_by_source_id(self, source_id: str) -> LineageNode | None:
|
|
43
|
+
"""Get lineage node by linked source ID.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
source_id: Data source ID.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
LineageNode or None.
|
|
50
|
+
"""
|
|
51
|
+
result = await self.session.execute(
|
|
52
|
+
select(LineageNode).where(LineageNode.source_id == source_id).limit(1)
|
|
53
|
+
)
|
|
54
|
+
return result.scalar_one_or_none()
|
|
55
|
+
|
|
56
|
+
async def get_nodes_by_type(
|
|
57
|
+
self,
|
|
58
|
+
node_type: str,
|
|
59
|
+
*,
|
|
60
|
+
limit: int = 100,
|
|
61
|
+
) -> Sequence[LineageNode]:
|
|
62
|
+
"""Get nodes by type.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
node_type: Node type (source, transform, sink).
|
|
66
|
+
limit: Maximum to return.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Sequence of nodes.
|
|
70
|
+
"""
|
|
71
|
+
return await self.list(
|
|
72
|
+
limit=limit,
|
|
73
|
+
filters=[LineageNode.node_type == node_type],
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class LineageEdgeRepository(BaseRepository[LineageEdge]):
|
|
78
|
+
"""Repository for LineageEdge model operations."""
|
|
79
|
+
|
|
80
|
+
model = LineageEdge
|
|
81
|
+
|
|
82
|
+
async def get_all_edges(
|
|
83
|
+
self,
|
|
84
|
+
*,
|
|
85
|
+
offset: int = 0,
|
|
86
|
+
limit: int = 1000,
|
|
87
|
+
) -> Sequence[LineageEdge]:
|
|
88
|
+
"""Get all lineage edges.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
offset: Number to skip.
|
|
92
|
+
limit: Maximum to return.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Sequence of lineage edges.
|
|
96
|
+
"""
|
|
97
|
+
return await self.list(offset=offset, limit=limit)
|
|
98
|
+
|
|
99
|
+
async def get_outgoing_edges(
|
|
100
|
+
self,
|
|
101
|
+
node_id: str,
|
|
102
|
+
*,
|
|
103
|
+
limit: int = 100,
|
|
104
|
+
) -> Sequence[LineageEdge]:
|
|
105
|
+
"""Get outgoing edges from a node.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
node_id: Source node ID.
|
|
109
|
+
limit: Maximum to return.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Sequence of edges.
|
|
113
|
+
"""
|
|
114
|
+
return await self.list(
|
|
115
|
+
limit=limit,
|
|
116
|
+
filters=[LineageEdge.source_node_id == node_id],
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
async def get_incoming_edges(
|
|
120
|
+
self,
|
|
121
|
+
node_id: str,
|
|
122
|
+
*,
|
|
123
|
+
limit: int = 100,
|
|
124
|
+
) -> Sequence[LineageEdge]:
|
|
125
|
+
"""Get incoming edges to a node.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
node_id: Target node ID.
|
|
129
|
+
limit: Maximum to return.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Sequence of edges.
|
|
133
|
+
"""
|
|
134
|
+
return await self.list(
|
|
135
|
+
limit=limit,
|
|
136
|
+
filters=[LineageEdge.target_node_id == node_id],
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
async def edge_exists(
|
|
140
|
+
self,
|
|
141
|
+
source_node_id: str,
|
|
142
|
+
target_node_id: str,
|
|
143
|
+
edge_type: str = "derives_from",
|
|
144
|
+
) -> bool:
|
|
145
|
+
"""Check if an edge already exists.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
source_node_id: Source node ID.
|
|
149
|
+
target_node_id: Target node ID.
|
|
150
|
+
edge_type: Edge type.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
True if edge exists.
|
|
154
|
+
"""
|
|
155
|
+
result = await self.session.execute(
|
|
156
|
+
select(LineageEdge)
|
|
157
|
+
.where(LineageEdge.source_node_id == source_node_id)
|
|
158
|
+
.where(LineageEdge.target_node_id == target_node_id)
|
|
159
|
+
.where(LineageEdge.edge_type == edge_type)
|
|
160
|
+
.limit(1)
|
|
161
|
+
)
|
|
162
|
+
return result.scalar_one_or_none() is not None
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class LineageService:
|
|
166
|
+
"""Service for managing data lineage graphs.
|
|
167
|
+
|
|
168
|
+
Provides functionality for:
|
|
169
|
+
- Node and edge CRUD operations
|
|
170
|
+
- Impact analysis (upstream/downstream)
|
|
171
|
+
- Auto-discovery from source metadata
|
|
172
|
+
- Position management for visualization
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
def __init__(self, session: AsyncSession) -> None:
|
|
176
|
+
"""Initialize service.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
session: Database session.
|
|
180
|
+
"""
|
|
181
|
+
self.session = session
|
|
182
|
+
self.node_repo = LineageNodeRepository(session)
|
|
183
|
+
self.edge_repo = LineageEdgeRepository(session)
|
|
184
|
+
|
|
185
|
+
# =========================================================================
|
|
186
|
+
# Graph Operations
|
|
187
|
+
# =========================================================================
|
|
188
|
+
|
|
189
|
+
async def get_graph(
|
|
190
|
+
self,
|
|
191
|
+
source_id: str | None = None,
|
|
192
|
+
) -> dict[str, Any]:
|
|
193
|
+
"""Get the full lineage graph or filtered by source.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
source_id: Optional source ID to filter by.
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
Dictionary with nodes and edges.
|
|
200
|
+
"""
|
|
201
|
+
if source_id:
|
|
202
|
+
# Get node for source and its connected nodes
|
|
203
|
+
root_node = await self.node_repo.get_by_source_id(source_id)
|
|
204
|
+
if not root_node:
|
|
205
|
+
return {"nodes": [], "edges": [], "total_nodes": 0, "total_edges": 0}
|
|
206
|
+
|
|
207
|
+
# Get all connected nodes (simplified - could use BFS for deeper traversal)
|
|
208
|
+
node_ids = {root_node.id}
|
|
209
|
+
nodes = [root_node]
|
|
210
|
+
|
|
211
|
+
# Get outgoing edges
|
|
212
|
+
outgoing = await self.edge_repo.get_outgoing_edges(root_node.id)
|
|
213
|
+
for edge in outgoing:
|
|
214
|
+
if edge.target_node_id not in node_ids:
|
|
215
|
+
target = await self.node_repo.get_by_id(edge.target_node_id)
|
|
216
|
+
if target:
|
|
217
|
+
nodes.append(target)
|
|
218
|
+
node_ids.add(target.id)
|
|
219
|
+
|
|
220
|
+
# Get incoming edges
|
|
221
|
+
incoming = await self.edge_repo.get_incoming_edges(root_node.id)
|
|
222
|
+
for edge in incoming:
|
|
223
|
+
if edge.source_node_id not in node_ids:
|
|
224
|
+
source_node = await self.node_repo.get_by_id(edge.source_node_id)
|
|
225
|
+
if source_node:
|
|
226
|
+
nodes.append(source_node)
|
|
227
|
+
node_ids.add(source_node.id)
|
|
228
|
+
|
|
229
|
+
# Get all edges between these nodes
|
|
230
|
+
all_edges = await self.edge_repo.get_all_edges(limit=1000)
|
|
231
|
+
edges = [
|
|
232
|
+
e
|
|
233
|
+
for e in all_edges
|
|
234
|
+
if e.source_node_id in node_ids and e.target_node_id in node_ids
|
|
235
|
+
]
|
|
236
|
+
else:
|
|
237
|
+
nodes = list(await self.node_repo.get_all_nodes())
|
|
238
|
+
edges = list(await self.edge_repo.get_all_edges())
|
|
239
|
+
|
|
240
|
+
return {
|
|
241
|
+
"nodes": [self._node_to_dict(n) for n in nodes],
|
|
242
|
+
"edges": [self._edge_to_dict(e) for e in edges],
|
|
243
|
+
"total_nodes": len(nodes),
|
|
244
|
+
"total_edges": len(edges),
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
# =========================================================================
|
|
248
|
+
# Node Operations
|
|
249
|
+
# =========================================================================
|
|
250
|
+
|
|
251
|
+
async def create_node(
|
|
252
|
+
self,
|
|
253
|
+
*,
|
|
254
|
+
name: str,
|
|
255
|
+
node_type: str,
|
|
256
|
+
source_id: str | None = None,
|
|
257
|
+
metadata: dict[str, Any] | None = None,
|
|
258
|
+
position_x: float | None = None,
|
|
259
|
+
position_y: float | None = None,
|
|
260
|
+
) -> LineageNode:
|
|
261
|
+
"""Create a new lineage node.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
name: Node name.
|
|
265
|
+
node_type: Node type (source, transform, sink).
|
|
266
|
+
source_id: Optional linked data source ID.
|
|
267
|
+
metadata: Optional additional metadata.
|
|
268
|
+
position_x: X coordinate for visualization.
|
|
269
|
+
position_y: Y coordinate for visualization.
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
Created node.
|
|
273
|
+
"""
|
|
274
|
+
node = await self.node_repo.create(
|
|
275
|
+
name=name,
|
|
276
|
+
node_type=node_type,
|
|
277
|
+
source_id=source_id,
|
|
278
|
+
metadata_json=metadata,
|
|
279
|
+
position_x=position_x,
|
|
280
|
+
position_y=position_y,
|
|
281
|
+
)
|
|
282
|
+
return node
|
|
283
|
+
|
|
284
|
+
async def get_node(self, node_id: str) -> LineageNode | None:
|
|
285
|
+
"""Get a node by ID.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
node_id: Node ID.
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
LineageNode or None.
|
|
292
|
+
"""
|
|
293
|
+
return await self.node_repo.get_by_id(node_id)
|
|
294
|
+
|
|
295
|
+
async def update_node(
|
|
296
|
+
self,
|
|
297
|
+
node_id: str,
|
|
298
|
+
*,
|
|
299
|
+
name: str | None = None,
|
|
300
|
+
metadata: dict[str, Any] | None = None,
|
|
301
|
+
position_x: float | None = None,
|
|
302
|
+
position_y: float | None = None,
|
|
303
|
+
) -> LineageNode | None:
|
|
304
|
+
"""Update a lineage node.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
node_id: Node ID.
|
|
308
|
+
name: New name.
|
|
309
|
+
metadata: New metadata.
|
|
310
|
+
position_x: New X coordinate.
|
|
311
|
+
position_y: New Y coordinate.
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
Updated node or None.
|
|
315
|
+
"""
|
|
316
|
+
node = await self.node_repo.get_by_id(node_id)
|
|
317
|
+
if node is None:
|
|
318
|
+
return None
|
|
319
|
+
|
|
320
|
+
if name is not None:
|
|
321
|
+
node.name = name
|
|
322
|
+
if metadata is not None:
|
|
323
|
+
node.metadata_json = metadata
|
|
324
|
+
if position_x is not None:
|
|
325
|
+
node.position_x = position_x
|
|
326
|
+
if position_y is not None:
|
|
327
|
+
node.position_y = position_y
|
|
328
|
+
|
|
329
|
+
await self.session.flush()
|
|
330
|
+
await self.session.refresh(node)
|
|
331
|
+
return node
|
|
332
|
+
|
|
333
|
+
async def delete_node(self, node_id: str) -> bool:
|
|
334
|
+
"""Delete a node and its edges.
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
node_id: Node ID.
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
True if deleted.
|
|
341
|
+
"""
|
|
342
|
+
return await self.node_repo.delete(node_id)
|
|
343
|
+
|
|
344
|
+
# =========================================================================
|
|
345
|
+
# Edge Operations
|
|
346
|
+
# =========================================================================
|
|
347
|
+
|
|
348
|
+
async def create_edge(
|
|
349
|
+
self,
|
|
350
|
+
*,
|
|
351
|
+
source_node_id: str,
|
|
352
|
+
target_node_id: str,
|
|
353
|
+
edge_type: str = "derives_from",
|
|
354
|
+
metadata: dict[str, Any] | None = None,
|
|
355
|
+
) -> LineageEdge:
|
|
356
|
+
"""Create a new lineage edge.
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
source_node_id: Source node ID.
|
|
360
|
+
target_node_id: Target node ID.
|
|
361
|
+
edge_type: Edge type.
|
|
362
|
+
metadata: Optional additional metadata.
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
Created edge.
|
|
366
|
+
|
|
367
|
+
Raises:
|
|
368
|
+
ValueError: If source or target node not found, or edge already exists.
|
|
369
|
+
"""
|
|
370
|
+
# Verify nodes exist
|
|
371
|
+
source_node = await self.node_repo.get_by_id(source_node_id)
|
|
372
|
+
if source_node is None:
|
|
373
|
+
raise ValueError(f"Source node '{source_node_id}' not found")
|
|
374
|
+
|
|
375
|
+
target_node = await self.node_repo.get_by_id(target_node_id)
|
|
376
|
+
if target_node is None:
|
|
377
|
+
raise ValueError(f"Target node '{target_node_id}' not found")
|
|
378
|
+
|
|
379
|
+
# Check for duplicate
|
|
380
|
+
if await self.edge_repo.edge_exists(source_node_id, target_node_id, edge_type):
|
|
381
|
+
raise ValueError("Edge already exists")
|
|
382
|
+
|
|
383
|
+
edge = await self.edge_repo.create(
|
|
384
|
+
source_node_id=source_node_id,
|
|
385
|
+
target_node_id=target_node_id,
|
|
386
|
+
edge_type=edge_type,
|
|
387
|
+
metadata_json=metadata,
|
|
388
|
+
)
|
|
389
|
+
return edge
|
|
390
|
+
|
|
391
|
+
async def get_edge(self, edge_id: str) -> LineageEdge | None:
|
|
392
|
+
"""Get an edge by ID.
|
|
393
|
+
|
|
394
|
+
Args:
|
|
395
|
+
edge_id: Edge ID.
|
|
396
|
+
|
|
397
|
+
Returns:
|
|
398
|
+
LineageEdge or None.
|
|
399
|
+
"""
|
|
400
|
+
return await self.edge_repo.get_by_id(edge_id)
|
|
401
|
+
|
|
402
|
+
async def delete_edge(self, edge_id: str) -> bool:
|
|
403
|
+
"""Delete an edge.
|
|
404
|
+
|
|
405
|
+
Args:
|
|
406
|
+
edge_id: Edge ID.
|
|
407
|
+
|
|
408
|
+
Returns:
|
|
409
|
+
True if deleted.
|
|
410
|
+
"""
|
|
411
|
+
return await self.edge_repo.delete(edge_id)
|
|
412
|
+
|
|
413
|
+
# =========================================================================
|
|
414
|
+
# Impact Analysis
|
|
415
|
+
# =========================================================================
|
|
416
|
+
|
|
417
|
+
async def analyze_impact(
|
|
418
|
+
self,
|
|
419
|
+
node_id: str,
|
|
420
|
+
direction: Literal["upstream", "downstream", "both"] = "both",
|
|
421
|
+
max_depth: int = 10,
|
|
422
|
+
) -> dict[str, Any]:
|
|
423
|
+
"""Analyze upstream/downstream impact from a node.
|
|
424
|
+
|
|
425
|
+
Args:
|
|
426
|
+
node_id: Starting node ID.
|
|
427
|
+
direction: Analysis direction.
|
|
428
|
+
max_depth: Maximum traversal depth.
|
|
429
|
+
|
|
430
|
+
Returns:
|
|
431
|
+
Impact analysis results.
|
|
432
|
+
|
|
433
|
+
Raises:
|
|
434
|
+
ValueError: If node not found.
|
|
435
|
+
"""
|
|
436
|
+
root_node = await self.node_repo.get_by_id(node_id)
|
|
437
|
+
if root_node is None:
|
|
438
|
+
raise ValueError(f"Node '{node_id}' not found")
|
|
439
|
+
|
|
440
|
+
upstream_nodes: list[dict[str, Any]] = []
|
|
441
|
+
downstream_nodes: list[dict[str, Any]] = []
|
|
442
|
+
affected_sources: set[str] = set()
|
|
443
|
+
|
|
444
|
+
if direction in ("upstream", "both"):
|
|
445
|
+
upstream = await self._traverse_upstream(node_id, max_depth)
|
|
446
|
+
upstream_nodes = [self._node_summary(n) for n in upstream]
|
|
447
|
+
for n in upstream:
|
|
448
|
+
if n.source_id:
|
|
449
|
+
affected_sources.add(n.source_id)
|
|
450
|
+
|
|
451
|
+
if direction in ("downstream", "both"):
|
|
452
|
+
downstream = await self._traverse_downstream(node_id, max_depth)
|
|
453
|
+
downstream_nodes = [self._node_summary(n) for n in downstream]
|
|
454
|
+
for n in downstream:
|
|
455
|
+
if n.source_id:
|
|
456
|
+
affected_sources.add(n.source_id)
|
|
457
|
+
|
|
458
|
+
return {
|
|
459
|
+
"root_node_id": node_id,
|
|
460
|
+
"root_node_name": root_node.name,
|
|
461
|
+
"direction": direction,
|
|
462
|
+
"upstream_nodes": upstream_nodes,
|
|
463
|
+
"downstream_nodes": downstream_nodes,
|
|
464
|
+
"affected_sources": list(affected_sources),
|
|
465
|
+
"upstream_count": len(upstream_nodes),
|
|
466
|
+
"downstream_count": len(downstream_nodes),
|
|
467
|
+
"total_affected": len(upstream_nodes) + len(downstream_nodes),
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
async def _traverse_upstream(
|
|
471
|
+
self,
|
|
472
|
+
node_id: str,
|
|
473
|
+
max_depth: int,
|
|
474
|
+
visited: set[str] | None = None,
|
|
475
|
+
depth: int = 0,
|
|
476
|
+
) -> list[LineageNode]:
|
|
477
|
+
"""Traverse upstream (parents) from a node."""
|
|
478
|
+
if visited is None:
|
|
479
|
+
visited = set()
|
|
480
|
+
|
|
481
|
+
if depth >= max_depth or node_id in visited:
|
|
482
|
+
return []
|
|
483
|
+
|
|
484
|
+
visited.add(node_id)
|
|
485
|
+
result: list[LineageNode] = []
|
|
486
|
+
|
|
487
|
+
incoming = await self.edge_repo.get_incoming_edges(node_id)
|
|
488
|
+
for edge in incoming:
|
|
489
|
+
parent = await self.node_repo.get_by_id(edge.source_node_id)
|
|
490
|
+
if parent and parent.id not in visited:
|
|
491
|
+
result.append(parent)
|
|
492
|
+
result.extend(
|
|
493
|
+
await self._traverse_upstream(
|
|
494
|
+
parent.id, max_depth, visited, depth + 1
|
|
495
|
+
)
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
return result
|
|
499
|
+
|
|
500
|
+
async def _traverse_downstream(
|
|
501
|
+
self,
|
|
502
|
+
node_id: str,
|
|
503
|
+
max_depth: int,
|
|
504
|
+
visited: set[str] | None = None,
|
|
505
|
+
depth: int = 0,
|
|
506
|
+
) -> list[LineageNode]:
|
|
507
|
+
"""Traverse downstream (children) from a node."""
|
|
508
|
+
if visited is None:
|
|
509
|
+
visited = set()
|
|
510
|
+
|
|
511
|
+
if depth >= max_depth or node_id in visited:
|
|
512
|
+
return []
|
|
513
|
+
|
|
514
|
+
visited.add(node_id)
|
|
515
|
+
result: list[LineageNode] = []
|
|
516
|
+
|
|
517
|
+
outgoing = await self.edge_repo.get_outgoing_edges(node_id)
|
|
518
|
+
for edge in outgoing:
|
|
519
|
+
child = await self.node_repo.get_by_id(edge.target_node_id)
|
|
520
|
+
if child and child.id not in visited:
|
|
521
|
+
result.append(child)
|
|
522
|
+
result.extend(
|
|
523
|
+
await self._traverse_downstream(
|
|
524
|
+
child.id, max_depth, visited, depth + 1
|
|
525
|
+
)
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
return result
|
|
529
|
+
|
|
530
|
+
# =========================================================================
|
|
531
|
+
# Auto-Discovery
|
|
532
|
+
# =========================================================================
|
|
533
|
+
|
|
534
|
+
async def auto_discover(
|
|
535
|
+
self,
|
|
536
|
+
source_id: str,
|
|
537
|
+
include_fk_relations: bool = True,
|
|
538
|
+
max_depth: int = 3,
|
|
539
|
+
) -> dict[str, Any]:
|
|
540
|
+
"""Auto-discover lineage from a data source.
|
|
541
|
+
|
|
542
|
+
This is a placeholder for more sophisticated discovery logic.
|
|
543
|
+
In a real implementation, this would analyze source metadata,
|
|
544
|
+
SQL queries, or foreign key relationships.
|
|
545
|
+
|
|
546
|
+
Args:
|
|
547
|
+
source_id: Source ID to discover from.
|
|
548
|
+
include_fk_relations: Include foreign key relationships (for DB sources).
|
|
549
|
+
max_depth: Maximum discovery depth.
|
|
550
|
+
|
|
551
|
+
Returns:
|
|
552
|
+
Discovered graph.
|
|
553
|
+
"""
|
|
554
|
+
# Check if node already exists for this source
|
|
555
|
+
existing_node = await self.node_repo.get_by_source_id(source_id)
|
|
556
|
+
if existing_node:
|
|
557
|
+
return await self.get_graph(source_id)
|
|
558
|
+
|
|
559
|
+
# Get source info
|
|
560
|
+
from truthound_dashboard.db import Source as SourceModel
|
|
561
|
+
|
|
562
|
+
result = await self.session.execute(
|
|
563
|
+
select(SourceModel).where(SourceModel.id == source_id)
|
|
564
|
+
)
|
|
565
|
+
source = result.scalar_one_or_none()
|
|
566
|
+
if source is None:
|
|
567
|
+
raise ValueError(f"Source '{source_id}' not found")
|
|
568
|
+
|
|
569
|
+
# Create a node for this source
|
|
570
|
+
node = await self.create_node(
|
|
571
|
+
name=source.name,
|
|
572
|
+
node_type="source",
|
|
573
|
+
source_id=source_id,
|
|
574
|
+
metadata={"auto_discovered": True, "source_type": source.type},
|
|
575
|
+
position_x=100,
|
|
576
|
+
position_y=100,
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
return {
|
|
580
|
+
"source_id": source_id,
|
|
581
|
+
"discovered_nodes": 1,
|
|
582
|
+
"discovered_edges": 0,
|
|
583
|
+
"graph": await self.get_graph(source_id),
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
# =========================================================================
|
|
587
|
+
# Position Management
|
|
588
|
+
# =========================================================================
|
|
589
|
+
|
|
590
|
+
async def update_positions(
|
|
591
|
+
self,
|
|
592
|
+
positions: list[dict[str, Any]],
|
|
593
|
+
) -> int:
|
|
594
|
+
"""Batch update node positions.
|
|
595
|
+
|
|
596
|
+
Args:
|
|
597
|
+
positions: List of {id, x, y} dictionaries.
|
|
598
|
+
|
|
599
|
+
Returns:
|
|
600
|
+
Number of positions updated.
|
|
601
|
+
"""
|
|
602
|
+
updated = 0
|
|
603
|
+
for pos in positions:
|
|
604
|
+
node = await self.node_repo.get_by_id(pos["id"])
|
|
605
|
+
if node:
|
|
606
|
+
node.position_x = pos.get("x")
|
|
607
|
+
node.position_y = pos.get("y")
|
|
608
|
+
updated += 1
|
|
609
|
+
|
|
610
|
+
await self.session.flush()
|
|
611
|
+
return updated
|
|
612
|
+
|
|
613
|
+
# =========================================================================
|
|
614
|
+
# Anomaly Integration
|
|
615
|
+
# =========================================================================
|
|
616
|
+
|
|
617
|
+
async def get_nodes_with_anomaly_status(self) -> list[dict[str, Any]]:
|
|
618
|
+
"""Get all nodes with their latest anomaly detection status.
|
|
619
|
+
|
|
620
|
+
Returns:
|
|
621
|
+
List of nodes with anomaly status overlay data.
|
|
622
|
+
"""
|
|
623
|
+
nodes = await self.node_repo.get_all_nodes()
|
|
624
|
+
result = []
|
|
625
|
+
|
|
626
|
+
for node in nodes:
|
|
627
|
+
node_dict = self._node_to_dict(node)
|
|
628
|
+
|
|
629
|
+
# Initialize anomaly status
|
|
630
|
+
anomaly_status: dict[str, Any] = {
|
|
631
|
+
"status": "unknown", # unknown, clean, low, medium, high
|
|
632
|
+
"anomaly_rate": None,
|
|
633
|
+
"anomaly_count": None,
|
|
634
|
+
"last_detection_at": None,
|
|
635
|
+
"algorithm": None,
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
# If node has a linked source, get its latest anomaly detection
|
|
639
|
+
if node.source_id:
|
|
640
|
+
detection = await self._get_latest_anomaly_for_source(node.source_id)
|
|
641
|
+
if detection:
|
|
642
|
+
anomaly_status = self._classify_anomaly_status(detection)
|
|
643
|
+
|
|
644
|
+
node_dict["anomaly_status"] = anomaly_status
|
|
645
|
+
result.append(node_dict)
|
|
646
|
+
|
|
647
|
+
return result
|
|
648
|
+
|
|
649
|
+
async def get_graph_with_anomalies(
|
|
650
|
+
self,
|
|
651
|
+
source_id: str | None = None,
|
|
652
|
+
) -> dict[str, Any]:
|
|
653
|
+
"""Get the lineage graph with anomaly status overlay.
|
|
654
|
+
|
|
655
|
+
Args:
|
|
656
|
+
source_id: Optional source ID to filter by.
|
|
657
|
+
|
|
658
|
+
Returns:
|
|
659
|
+
Graph with anomaly status for each node.
|
|
660
|
+
"""
|
|
661
|
+
# Get base graph
|
|
662
|
+
graph = await self.get_graph(source_id=source_id)
|
|
663
|
+
|
|
664
|
+
# Enhance nodes with anomaly status
|
|
665
|
+
enhanced_nodes = []
|
|
666
|
+
for node_dict in graph["nodes"]:
|
|
667
|
+
anomaly_status: dict[str, Any] = {
|
|
668
|
+
"status": "unknown",
|
|
669
|
+
"anomaly_rate": None,
|
|
670
|
+
"anomaly_count": None,
|
|
671
|
+
"last_detection_at": None,
|
|
672
|
+
"algorithm": None,
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
if node_dict.get("source_id"):
|
|
676
|
+
detection = await self._get_latest_anomaly_for_source(
|
|
677
|
+
node_dict["source_id"]
|
|
678
|
+
)
|
|
679
|
+
if detection:
|
|
680
|
+
anomaly_status = self._classify_anomaly_status(detection)
|
|
681
|
+
|
|
682
|
+
node_dict["anomaly_status"] = anomaly_status
|
|
683
|
+
enhanced_nodes.append(node_dict)
|
|
684
|
+
|
|
685
|
+
return {
|
|
686
|
+
**graph,
|
|
687
|
+
"nodes": enhanced_nodes,
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
async def get_impacted_by_anomaly(
|
|
691
|
+
self,
|
|
692
|
+
source_id: str,
|
|
693
|
+
max_depth: int = 10,
|
|
694
|
+
) -> dict[str, Any]:
|
|
695
|
+
"""Get downstream nodes impacted by anomalies in a source.
|
|
696
|
+
|
|
697
|
+
This analyzes the anomaly status of a source and identifies all
|
|
698
|
+
downstream nodes that could be affected by data quality issues.
|
|
699
|
+
|
|
700
|
+
Args:
|
|
701
|
+
source_id: Source ID to analyze.
|
|
702
|
+
max_depth: Maximum traversal depth.
|
|
703
|
+
|
|
704
|
+
Returns:
|
|
705
|
+
Impact analysis including impacted nodes and severity.
|
|
706
|
+
|
|
707
|
+
Raises:
|
|
708
|
+
ValueError: If source not found.
|
|
709
|
+
"""
|
|
710
|
+
# Find node for this source
|
|
711
|
+
node = await self.node_repo.get_by_source_id(source_id)
|
|
712
|
+
if node is None:
|
|
713
|
+
raise ValueError(f"No lineage node found for source '{source_id}'")
|
|
714
|
+
|
|
715
|
+
# Get anomaly status for the source
|
|
716
|
+
detection = await self._get_latest_anomaly_for_source(source_id)
|
|
717
|
+
source_anomaly_status = (
|
|
718
|
+
self._classify_anomaly_status(detection) if detection else None
|
|
719
|
+
)
|
|
720
|
+
|
|
721
|
+
# Traverse downstream to find impacted nodes
|
|
722
|
+
downstream = await self._traverse_downstream(node.id, max_depth)
|
|
723
|
+
|
|
724
|
+
# Build impact path information
|
|
725
|
+
impacted_nodes = []
|
|
726
|
+
for downstream_node in downstream:
|
|
727
|
+
node_info = self._node_summary(downstream_node)
|
|
728
|
+
|
|
729
|
+
# Get anomaly status for downstream node if it has a source
|
|
730
|
+
downstream_anomaly_status = None
|
|
731
|
+
if downstream_node.source_id:
|
|
732
|
+
downstream_detection = await self._get_latest_anomaly_for_source(
|
|
733
|
+
downstream_node.source_id
|
|
734
|
+
)
|
|
735
|
+
if downstream_detection:
|
|
736
|
+
downstream_anomaly_status = self._classify_anomaly_status(
|
|
737
|
+
downstream_detection
|
|
738
|
+
)
|
|
739
|
+
|
|
740
|
+
node_info["anomaly_status"] = downstream_anomaly_status
|
|
741
|
+
node_info["impact_severity"] = self._calculate_impact_severity(
|
|
742
|
+
source_anomaly_status, downstream_anomaly_status
|
|
743
|
+
)
|
|
744
|
+
impacted_nodes.append(node_info)
|
|
745
|
+
|
|
746
|
+
# Calculate overall impact severity
|
|
747
|
+
overall_severity = self._calculate_overall_severity(
|
|
748
|
+
source_anomaly_status, impacted_nodes
|
|
749
|
+
)
|
|
750
|
+
|
|
751
|
+
return {
|
|
752
|
+
"source_node_id": node.id,
|
|
753
|
+
"source_node_name": node.name,
|
|
754
|
+
"source_id": source_id,
|
|
755
|
+
"source_anomaly_status": source_anomaly_status,
|
|
756
|
+
"impacted_nodes": impacted_nodes,
|
|
757
|
+
"impacted_count": len(impacted_nodes),
|
|
758
|
+
"overall_severity": overall_severity,
|
|
759
|
+
"propagation_path": await self._build_propagation_path(
|
|
760
|
+
node.id, downstream
|
|
761
|
+
),
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
async def _get_latest_anomaly_for_source(
|
|
765
|
+
self,
|
|
766
|
+
source_id: str,
|
|
767
|
+
) -> AnomalyDetection | None:
|
|
768
|
+
"""Get the latest successful anomaly detection for a source."""
|
|
769
|
+
result = await self.session.execute(
|
|
770
|
+
select(AnomalyDetection)
|
|
771
|
+
.where(AnomalyDetection.source_id == source_id)
|
|
772
|
+
.where(AnomalyDetection.status == "completed")
|
|
773
|
+
.order_by(AnomalyDetection.created_at.desc())
|
|
774
|
+
.limit(1)
|
|
775
|
+
)
|
|
776
|
+
return result.scalar_one_or_none()
|
|
777
|
+
|
|
778
|
+
def _classify_anomaly_status(
|
|
779
|
+
self,
|
|
780
|
+
detection: AnomalyDetection,
|
|
781
|
+
) -> dict[str, Any]:
|
|
782
|
+
"""Classify anomaly detection into status categories.
|
|
783
|
+
|
|
784
|
+
Args:
|
|
785
|
+
detection: Anomaly detection record.
|
|
786
|
+
|
|
787
|
+
Returns:
|
|
788
|
+
Anomaly status dictionary.
|
|
789
|
+
"""
|
|
790
|
+
anomaly_rate = detection.anomaly_rate or 0.0
|
|
791
|
+
|
|
792
|
+
# Classify based on anomaly rate thresholds
|
|
793
|
+
if anomaly_rate >= 0.15: # 15%+ is high
|
|
794
|
+
status = "high"
|
|
795
|
+
elif anomaly_rate >= 0.05: # 5-15% is medium
|
|
796
|
+
status = "medium"
|
|
797
|
+
elif anomaly_rate > 0: # 0-5% is low
|
|
798
|
+
status = "low"
|
|
799
|
+
else:
|
|
800
|
+
status = "clean"
|
|
801
|
+
|
|
802
|
+
return {
|
|
803
|
+
"status": status,
|
|
804
|
+
"anomaly_rate": anomaly_rate,
|
|
805
|
+
"anomaly_count": detection.anomaly_count,
|
|
806
|
+
"last_detection_at": (
|
|
807
|
+
detection.completed_at.isoformat() if detection.completed_at else None
|
|
808
|
+
),
|
|
809
|
+
"algorithm": detection.algorithm,
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
def _calculate_impact_severity(
|
|
813
|
+
self,
|
|
814
|
+
source_status: dict[str, Any] | None,
|
|
815
|
+
downstream_status: dict[str, Any] | None,
|
|
816
|
+
) -> str:
|
|
817
|
+
"""Calculate impact severity for a downstream node.
|
|
818
|
+
|
|
819
|
+
Args:
|
|
820
|
+
source_status: Anomaly status of source node.
|
|
821
|
+
downstream_status: Anomaly status of downstream node.
|
|
822
|
+
|
|
823
|
+
Returns:
|
|
824
|
+
Impact severity level.
|
|
825
|
+
"""
|
|
826
|
+
if not source_status:
|
|
827
|
+
return "unknown"
|
|
828
|
+
|
|
829
|
+
source_level = source_status.get("status", "unknown")
|
|
830
|
+
|
|
831
|
+
# If downstream also has anomalies, amplify the severity
|
|
832
|
+
if downstream_status and downstream_status.get("status") in ("medium", "high"):
|
|
833
|
+
if source_level == "high":
|
|
834
|
+
return "critical"
|
|
835
|
+
elif source_level == "medium":
|
|
836
|
+
return "high"
|
|
837
|
+
else:
|
|
838
|
+
return "medium"
|
|
839
|
+
|
|
840
|
+
# Map source anomaly status to impact severity
|
|
841
|
+
severity_map = {
|
|
842
|
+
"high": "high",
|
|
843
|
+
"medium": "medium",
|
|
844
|
+
"low": "low",
|
|
845
|
+
"clean": "none",
|
|
846
|
+
"unknown": "unknown",
|
|
847
|
+
}
|
|
848
|
+
return severity_map.get(source_level, "unknown")
|
|
849
|
+
|
|
850
|
+
def _calculate_overall_severity(
|
|
851
|
+
self,
|
|
852
|
+
source_status: dict[str, Any] | None,
|
|
853
|
+
impacted_nodes: list[dict[str, Any]],
|
|
854
|
+
) -> str:
|
|
855
|
+
"""Calculate overall impact severity across all impacted nodes.
|
|
856
|
+
|
|
857
|
+
Args:
|
|
858
|
+
source_status: Source anomaly status.
|
|
859
|
+
impacted_nodes: List of impacted nodes with severity.
|
|
860
|
+
|
|
861
|
+
Returns:
|
|
862
|
+
Overall severity level.
|
|
863
|
+
"""
|
|
864
|
+
if not source_status or source_status.get("status") == "clean":
|
|
865
|
+
return "none"
|
|
866
|
+
|
|
867
|
+
if not impacted_nodes:
|
|
868
|
+
return source_status.get("status", "unknown")
|
|
869
|
+
|
|
870
|
+
# Count severity levels
|
|
871
|
+
severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0}
|
|
872
|
+
for node in impacted_nodes:
|
|
873
|
+
severity = node.get("impact_severity", "unknown")
|
|
874
|
+
if severity in severity_counts:
|
|
875
|
+
severity_counts[severity] += 1
|
|
876
|
+
|
|
877
|
+
# Determine overall based on highest severity and count
|
|
878
|
+
if severity_counts["critical"] > 0:
|
|
879
|
+
return "critical"
|
|
880
|
+
elif severity_counts["high"] >= 3 or (
|
|
881
|
+
severity_counts["high"] > 0 and source_status.get("status") == "high"
|
|
882
|
+
):
|
|
883
|
+
return "critical"
|
|
884
|
+
elif severity_counts["high"] > 0:
|
|
885
|
+
return "high"
|
|
886
|
+
elif severity_counts["medium"] >= 3:
|
|
887
|
+
return "high"
|
|
888
|
+
elif severity_counts["medium"] > 0:
|
|
889
|
+
return "medium"
|
|
890
|
+
elif severity_counts["low"] > 0:
|
|
891
|
+
return "low"
|
|
892
|
+
else:
|
|
893
|
+
return source_status.get("status", "unknown")
|
|
894
|
+
|
|
895
|
+
async def _build_propagation_path(
|
|
896
|
+
self,
|
|
897
|
+
root_node_id: str,
|
|
898
|
+
downstream_nodes: list[LineageNode],
|
|
899
|
+
) -> list[dict[str, Any]]:
|
|
900
|
+
"""Build a list of edges showing the propagation path.
|
|
901
|
+
|
|
902
|
+
Args:
|
|
903
|
+
root_node_id: Starting node ID.
|
|
904
|
+
downstream_nodes: List of downstream nodes.
|
|
905
|
+
|
|
906
|
+
Returns:
|
|
907
|
+
List of edges in the propagation path.
|
|
908
|
+
"""
|
|
909
|
+
if not downstream_nodes:
|
|
910
|
+
return []
|
|
911
|
+
|
|
912
|
+
node_ids = {root_node_id} | {n.id for n in downstream_nodes}
|
|
913
|
+
all_edges = await self.edge_repo.get_all_edges(limit=1000)
|
|
914
|
+
|
|
915
|
+
path_edges = []
|
|
916
|
+
for edge in all_edges:
|
|
917
|
+
if (
|
|
918
|
+
edge.source_node_id in node_ids
|
|
919
|
+
and edge.target_node_id in node_ids
|
|
920
|
+
):
|
|
921
|
+
path_edges.append({
|
|
922
|
+
"id": edge.id,
|
|
923
|
+
"source_node_id": edge.source_node_id,
|
|
924
|
+
"target_node_id": edge.target_node_id,
|
|
925
|
+
"edge_type": edge.edge_type,
|
|
926
|
+
})
|
|
927
|
+
|
|
928
|
+
return path_edges
|
|
929
|
+
|
|
930
|
+
# =========================================================================
|
|
931
|
+
# Helpers
|
|
932
|
+
# =========================================================================
|
|
933
|
+
|
|
934
|
+
def _node_to_dict(self, node: LineageNode) -> dict[str, Any]:
|
|
935
|
+
"""Convert node to dictionary."""
|
|
936
|
+
return {
|
|
937
|
+
"id": node.id,
|
|
938
|
+
"name": node.name,
|
|
939
|
+
"node_type": node.node_type,
|
|
940
|
+
"source_id": node.source_id,
|
|
941
|
+
"source_name": node.source.name if node.source else None,
|
|
942
|
+
"metadata": node.metadata_json,
|
|
943
|
+
"position_x": node.position_x,
|
|
944
|
+
"position_y": node.position_y,
|
|
945
|
+
"upstream_count": node.upstream_count,
|
|
946
|
+
"downstream_count": node.downstream_count,
|
|
947
|
+
"created_at": node.created_at.isoformat() if node.created_at else None,
|
|
948
|
+
"updated_at": node.updated_at.isoformat() if node.updated_at else None,
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
def _edge_to_dict(self, edge: LineageEdge) -> dict[str, Any]:
|
|
952
|
+
"""Convert edge to dictionary."""
|
|
953
|
+
return {
|
|
954
|
+
"id": edge.id,
|
|
955
|
+
"source_node_id": edge.source_node_id,
|
|
956
|
+
"target_node_id": edge.target_node_id,
|
|
957
|
+
"source_node_name": edge.source_node.name if edge.source_node else None,
|
|
958
|
+
"target_node_name": edge.target_node.name if edge.target_node else None,
|
|
959
|
+
"edge_type": edge.edge_type,
|
|
960
|
+
"metadata": edge.metadata_json,
|
|
961
|
+
"created_at": edge.created_at.isoformat() if edge.created_at else None,
|
|
962
|
+
}
|
|
963
|
+
|
|
964
|
+
def _node_summary(self, node: LineageNode) -> dict[str, Any]:
|
|
965
|
+
"""Get minimal node summary."""
|
|
966
|
+
return {
|
|
967
|
+
"id": node.id,
|
|
968
|
+
"name": node.name,
|
|
969
|
+
"node_type": node.node_type,
|
|
970
|
+
"source_id": node.source_id,
|
|
971
|
+
}
|