truthound-dashboard 1.4.3__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +75 -86
- truthound_dashboard/api/anomaly.py +7 -13
- truthound_dashboard/api/cross_alerts.py +38 -52
- truthound_dashboard/api/drift.py +49 -59
- truthound_dashboard/api/drift_monitor.py +234 -79
- truthound_dashboard/api/enterprise_sampling.py +498 -0
- truthound_dashboard/api/history.py +57 -5
- truthound_dashboard/api/lineage.py +3 -48
- truthound_dashboard/api/maintenance.py +104 -49
- truthound_dashboard/api/mask.py +1 -2
- truthound_dashboard/api/middleware.py +2 -1
- truthound_dashboard/api/model_monitoring.py +435 -311
- truthound_dashboard/api/notifications.py +227 -191
- truthound_dashboard/api/notifications_advanced.py +21 -20
- truthound_dashboard/api/observability.py +586 -0
- truthound_dashboard/api/plugins.py +2 -433
- truthound_dashboard/api/profile.py +199 -37
- truthound_dashboard/api/quality_reporter.py +701 -0
- truthound_dashboard/api/reports.py +7 -16
- truthound_dashboard/api/router.py +66 -0
- truthound_dashboard/api/rule_suggestions.py +5 -5
- truthound_dashboard/api/scan.py +17 -19
- truthound_dashboard/api/schedules.py +85 -50
- truthound_dashboard/api/schema_evolution.py +6 -6
- truthound_dashboard/api/schema_watcher.py +667 -0
- truthound_dashboard/api/sources.py +98 -27
- truthound_dashboard/api/tiering.py +1323 -0
- truthound_dashboard/api/triggers.py +14 -11
- truthound_dashboard/api/validations.py +12 -11
- truthound_dashboard/api/versioning.py +1 -6
- truthound_dashboard/core/__init__.py +129 -3
- truthound_dashboard/core/actions/__init__.py +62 -0
- truthound_dashboard/core/actions/custom.py +426 -0
- truthound_dashboard/core/actions/notifications.py +910 -0
- truthound_dashboard/core/actions/storage.py +472 -0
- truthound_dashboard/core/actions/webhook.py +281 -0
- truthound_dashboard/core/anomaly.py +262 -67
- truthound_dashboard/core/anomaly_explainer.py +4 -3
- truthound_dashboard/core/backends/__init__.py +67 -0
- truthound_dashboard/core/backends/base.py +299 -0
- truthound_dashboard/core/backends/errors.py +191 -0
- truthound_dashboard/core/backends/factory.py +423 -0
- truthound_dashboard/core/backends/mock_backend.py +451 -0
- truthound_dashboard/core/backends/truthound_backend.py +718 -0
- truthound_dashboard/core/checkpoint/__init__.py +87 -0
- truthound_dashboard/core/checkpoint/adapters.py +814 -0
- truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
- truthound_dashboard/core/checkpoint/runner.py +270 -0
- truthound_dashboard/core/connections.py +437 -10
- truthound_dashboard/core/converters/__init__.py +14 -0
- truthound_dashboard/core/converters/truthound.py +620 -0
- truthound_dashboard/core/cross_alerts.py +540 -320
- truthound_dashboard/core/datasource_factory.py +1672 -0
- truthound_dashboard/core/drift_monitor.py +216 -20
- truthound_dashboard/core/enterprise_sampling.py +1291 -0
- truthound_dashboard/core/interfaces/__init__.py +225 -0
- truthound_dashboard/core/interfaces/actions.py +652 -0
- truthound_dashboard/core/interfaces/base.py +247 -0
- truthound_dashboard/core/interfaces/checkpoint.py +676 -0
- truthound_dashboard/core/interfaces/protocols.py +664 -0
- truthound_dashboard/core/interfaces/reporters.py +650 -0
- truthound_dashboard/core/interfaces/routing.py +646 -0
- truthound_dashboard/core/interfaces/triggers.py +619 -0
- truthound_dashboard/core/lineage.py +407 -71
- truthound_dashboard/core/model_monitoring.py +431 -3
- truthound_dashboard/core/notifications/base.py +4 -0
- truthound_dashboard/core/notifications/channels.py +501 -1203
- truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
- truthound_dashboard/core/notifications/deduplication/service.py +131 -348
- truthound_dashboard/core/notifications/dispatcher.py +202 -11
- truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
- truthound_dashboard/core/notifications/escalation/engine.py +168 -358
- truthound_dashboard/core/notifications/routing/__init__.py +88 -128
- truthound_dashboard/core/notifications/routing/engine.py +90 -317
- truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
- truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
- truthound_dashboard/core/notifications/throttling/builder.py +117 -255
- truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
- truthound_dashboard/core/phase5/collaboration.py +1 -1
- truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
- truthound_dashboard/core/quality_reporter.py +1359 -0
- truthound_dashboard/core/report_history.py +0 -6
- truthound_dashboard/core/reporters/__init__.py +175 -14
- truthound_dashboard/core/reporters/adapters.py +943 -0
- truthound_dashboard/core/reporters/base.py +0 -3
- truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
- truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
- truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
- truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
- truthound_dashboard/core/reporters/compat.py +266 -0
- truthound_dashboard/core/reporters/csv_reporter.py +2 -35
- truthound_dashboard/core/reporters/factory.py +526 -0
- truthound_dashboard/core/reporters/interfaces.py +745 -0
- truthound_dashboard/core/reporters/registry.py +1 -10
- truthound_dashboard/core/scheduler.py +165 -0
- truthound_dashboard/core/schema_evolution.py +3 -3
- truthound_dashboard/core/schema_watcher.py +1528 -0
- truthound_dashboard/core/services.py +595 -76
- truthound_dashboard/core/store_manager.py +810 -0
- truthound_dashboard/core/streaming_anomaly.py +169 -4
- truthound_dashboard/core/tiering.py +1309 -0
- truthound_dashboard/core/triggers/evaluators.py +178 -8
- truthound_dashboard/core/truthound_adapter.py +2620 -197
- truthound_dashboard/core/unified_alerts.py +23 -20
- truthound_dashboard/db/__init__.py +8 -0
- truthound_dashboard/db/database.py +8 -2
- truthound_dashboard/db/models.py +944 -25
- truthound_dashboard/db/repository.py +2 -0
- truthound_dashboard/main.py +11 -0
- truthound_dashboard/schemas/__init__.py +177 -16
- truthound_dashboard/schemas/base.py +44 -23
- truthound_dashboard/schemas/collaboration.py +19 -6
- truthound_dashboard/schemas/cross_alerts.py +19 -3
- truthound_dashboard/schemas/drift.py +61 -55
- truthound_dashboard/schemas/drift_monitor.py +67 -23
- truthound_dashboard/schemas/enterprise_sampling.py +653 -0
- truthound_dashboard/schemas/lineage.py +0 -33
- truthound_dashboard/schemas/mask.py +10 -8
- truthound_dashboard/schemas/model_monitoring.py +89 -10
- truthound_dashboard/schemas/notifications_advanced.py +13 -0
- truthound_dashboard/schemas/observability.py +453 -0
- truthound_dashboard/schemas/plugins.py +0 -280
- truthound_dashboard/schemas/profile.py +154 -247
- truthound_dashboard/schemas/quality_reporter.py +403 -0
- truthound_dashboard/schemas/reports.py +2 -2
- truthound_dashboard/schemas/rule_suggestion.py +8 -1
- truthound_dashboard/schemas/scan.py +4 -24
- truthound_dashboard/schemas/schedule.py +11 -3
- truthound_dashboard/schemas/schema_watcher.py +727 -0
- truthound_dashboard/schemas/source.py +17 -2
- truthound_dashboard/schemas/tiering.py +822 -0
- truthound_dashboard/schemas/triggers.py +16 -0
- truthound_dashboard/schemas/unified_alerts.py +7 -0
- truthound_dashboard/schemas/validation.py +0 -13
- truthound_dashboard/schemas/validators/base.py +41 -21
- truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
- truthound_dashboard/schemas/validators/localization_validators.py +273 -0
- truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
- truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
- truthound_dashboard/schemas/validators/referential_validators.py +312 -0
- truthound_dashboard/schemas/validators/registry.py +93 -8
- truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
- truthound_dashboard/schemas/versioning.py +1 -6
- truthound_dashboard/static/index.html +2 -2
- truthound_dashboard-1.5.0.dist-info/METADATA +309 -0
- {truthound_dashboard-1.4.3.dist-info → truthound_dashboard-1.5.0.dist-info}/RECORD +149 -148
- truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
- truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
- truthound_dashboard/core/plugins/hooks/manager.py +0 -403
- truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
- truthound_dashboard/core/reporters/junit_reporter.py +0 -233
- truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
- truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
- truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
- truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
- truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
- truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
- truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
- truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
- truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
- truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
- truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
- truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
- truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
- truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
- truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
- truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
- truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
- truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
- truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
- truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
- truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
- truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
- truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
- truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
- truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
- truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
- truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
- truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
- truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
- truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
- truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
- truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
- truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
- truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
- truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
- truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
- truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
- truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
- truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
- truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
- truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
- truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
- truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
- truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
- truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
- truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
- truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
- truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
- truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
- truthound_dashboard-1.4.3.dist-info/METADATA +0 -505
- {truthound_dashboard-1.4.3.dist-info → truthound_dashboard-1.5.0.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.4.3.dist-info → truthound_dashboard-1.5.0.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.4.3.dist-info → truthound_dashboard-1.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -2,10 +2,16 @@
|
|
|
2
2
|
|
|
3
3
|
This module provides services for managing data lineage graphs,
|
|
4
4
|
including node and edge CRUD, impact analysis, and auto-discovery.
|
|
5
|
+
|
|
6
|
+
Uses truthound.lineage for advanced features when available:
|
|
7
|
+
- LineageTracker: Automatic operation tracking
|
|
8
|
+
- ImpactAnalyzer: What-if analysis and schema change impact
|
|
9
|
+
- OpenLineage integration for industry-standard lineage events
|
|
5
10
|
"""
|
|
6
11
|
|
|
7
12
|
from __future__ import annotations
|
|
8
13
|
|
|
14
|
+
import logging
|
|
9
15
|
from collections.abc import Sequence
|
|
10
16
|
from datetime import datetime
|
|
11
17
|
from typing import Any, Literal
|
|
@@ -16,6 +22,8 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
16
22
|
from truthound_dashboard.db import BaseRepository
|
|
17
23
|
from truthound_dashboard.db.models import AnomalyDetection, LineageEdge, LineageNode, Source
|
|
18
24
|
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
19
27
|
|
|
20
28
|
class LineageNodeRepository(BaseRepository[LineageNode]):
|
|
21
29
|
"""Repository for LineageNode model operations."""
|
|
@@ -73,6 +81,25 @@ class LineageNodeRepository(BaseRepository[LineageNode]):
|
|
|
73
81
|
filters=[LineageNode.node_type == node_type],
|
|
74
82
|
)
|
|
75
83
|
|
|
84
|
+
async def get_by_name_and_type(
|
|
85
|
+
self, name: str, node_type: str
|
|
86
|
+
) -> LineageNode | None:
|
|
87
|
+
"""Get a node by name and type combination.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
name: Node name.
|
|
91
|
+
node_type: Node type (source, transform, sink).
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
LineageNode or None if not found.
|
|
95
|
+
"""
|
|
96
|
+
result = await self.session.execute(
|
|
97
|
+
select(LineageNode)
|
|
98
|
+
.where(LineageNode.name == name, LineageNode.node_type == node_type)
|
|
99
|
+
.limit(1)
|
|
100
|
+
)
|
|
101
|
+
return result.scalar_one_or_none()
|
|
102
|
+
|
|
76
103
|
|
|
77
104
|
class LineageEdgeRepository(BaseRepository[LineageEdge]):
|
|
78
105
|
"""Repository for LineageEdge model operations."""
|
|
@@ -167,9 +194,9 @@ class LineageService:
|
|
|
167
194
|
|
|
168
195
|
Provides functionality for:
|
|
169
196
|
- Node and edge CRUD operations
|
|
170
|
-
- Impact analysis (upstream/downstream)
|
|
171
|
-
- Auto-discovery from source metadata
|
|
197
|
+
- Impact analysis (upstream/downstream) using truthound.lineage.ImpactAnalyzer
|
|
172
198
|
- Position management for visualization
|
|
199
|
+
- Integration with truthound.lineage.LineageTracker for automatic tracking
|
|
173
200
|
"""
|
|
174
201
|
|
|
175
202
|
def __init__(self, session: AsyncSession) -> None:
|
|
@@ -182,6 +209,44 @@ class LineageService:
|
|
|
182
209
|
self.node_repo = LineageNodeRepository(session)
|
|
183
210
|
self.edge_repo = LineageEdgeRepository(session)
|
|
184
211
|
|
|
212
|
+
# Initialize truthound lineage components if available
|
|
213
|
+
self._tracker = None
|
|
214
|
+
self._impact_analyzer = None
|
|
215
|
+
self._init_truthound_lineage()
|
|
216
|
+
|
|
217
|
+
def _init_truthound_lineage(self) -> None:
|
|
218
|
+
"""Initialize truthound lineage components if available."""
|
|
219
|
+
try:
|
|
220
|
+
from truthound.lineage import LineageTracker, ImpactAnalyzer, LineageConfig
|
|
221
|
+
|
|
222
|
+
# Create lineage tracker with default configuration
|
|
223
|
+
config = LineageConfig(
|
|
224
|
+
auto_track=True,
|
|
225
|
+
track_column_level=True,
|
|
226
|
+
)
|
|
227
|
+
self._tracker = LineageTracker(config)
|
|
228
|
+
logger.info("truthound.lineage.LineageTracker initialized")
|
|
229
|
+
|
|
230
|
+
except ImportError:
|
|
231
|
+
logger.debug("truthound.lineage not available, using SQLAlchemy-based implementation")
|
|
232
|
+
self._tracker = None
|
|
233
|
+
self._impact_analyzer = None
|
|
234
|
+
|
|
235
|
+
def _sync_tracker_with_db(self) -> None:
|
|
236
|
+
"""Sync the truthound tracker with database state."""
|
|
237
|
+
if self._tracker is None:
|
|
238
|
+
return
|
|
239
|
+
|
|
240
|
+
try:
|
|
241
|
+
from truthound.lineage import ImpactAnalyzer
|
|
242
|
+
|
|
243
|
+
# Sync nodes and edges from DB to tracker
|
|
244
|
+
# This ensures truthound analyzer has current state
|
|
245
|
+
self._impact_analyzer = ImpactAnalyzer(self._tracker.graph)
|
|
246
|
+
|
|
247
|
+
except Exception as e:
|
|
248
|
+
logger.warning(f"Failed to sync lineage tracker: {e}")
|
|
249
|
+
|
|
185
250
|
# =========================================================================
|
|
186
251
|
# Graph Operations
|
|
187
252
|
# =========================================================================
|
|
@@ -248,6 +313,20 @@ class LineageService:
|
|
|
248
313
|
# Node Operations
|
|
249
314
|
# =========================================================================
|
|
250
315
|
|
|
316
|
+
async def get_node_by_name_and_type(
|
|
317
|
+
self, name: str, node_type: str
|
|
318
|
+
) -> LineageNode | None:
|
|
319
|
+
"""Get a node by name and type.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
name: Node name.
|
|
323
|
+
node_type: Node type (source, transform, sink).
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
LineageNode or None if not found.
|
|
327
|
+
"""
|
|
328
|
+
return await self.node_repo.get_by_name_and_type(name, node_type)
|
|
329
|
+
|
|
251
330
|
async def create_node(
|
|
252
331
|
self,
|
|
253
332
|
*,
|
|
@@ -270,7 +349,17 @@ class LineageService:
|
|
|
270
349
|
|
|
271
350
|
Returns:
|
|
272
351
|
Created node.
|
|
352
|
+
|
|
353
|
+
Raises:
|
|
354
|
+
ValueError: If a node with same name and type already exists.
|
|
273
355
|
"""
|
|
356
|
+
# Check for existing node with same name and type
|
|
357
|
+
existing = await self.get_node_by_name_and_type(name, node_type)
|
|
358
|
+
if existing:
|
|
359
|
+
raise ValueError(
|
|
360
|
+
f"Node with name '{name}' and type '{node_type}' already exists"
|
|
361
|
+
)
|
|
362
|
+
|
|
274
363
|
node = await self.node_repo.create(
|
|
275
364
|
name=name,
|
|
276
365
|
node_type=node_type,
|
|
@@ -281,6 +370,43 @@ class LineageService:
|
|
|
281
370
|
)
|
|
282
371
|
return node
|
|
283
372
|
|
|
373
|
+
async def get_or_create_node(
|
|
374
|
+
self,
|
|
375
|
+
*,
|
|
376
|
+
name: str,
|
|
377
|
+
node_type: str,
|
|
378
|
+
source_id: str | None = None,
|
|
379
|
+
metadata: dict[str, Any] | None = None,
|
|
380
|
+
position_x: float | None = None,
|
|
381
|
+
position_y: float | None = None,
|
|
382
|
+
) -> tuple[LineageNode, bool]:
|
|
383
|
+
"""Get an existing node or create a new one.
|
|
384
|
+
|
|
385
|
+
Args:
|
|
386
|
+
name: Node name.
|
|
387
|
+
node_type: Node type (source, transform, sink).
|
|
388
|
+
source_id: Optional linked data source ID.
|
|
389
|
+
metadata: Optional additional metadata.
|
|
390
|
+
position_x: X coordinate for visualization.
|
|
391
|
+
position_y: Y coordinate for visualization.
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
Tuple of (node, created) where created is True if new node was created.
|
|
395
|
+
"""
|
|
396
|
+
existing = await self.get_node_by_name_and_type(name, node_type)
|
|
397
|
+
if existing:
|
|
398
|
+
return existing, False
|
|
399
|
+
|
|
400
|
+
node = await self.node_repo.create(
|
|
401
|
+
name=name,
|
|
402
|
+
node_type=node_type,
|
|
403
|
+
source_id=source_id,
|
|
404
|
+
metadata_json=metadata,
|
|
405
|
+
position_x=position_x,
|
|
406
|
+
position_y=position_y,
|
|
407
|
+
)
|
|
408
|
+
return node, True
|
|
409
|
+
|
|
284
410
|
async def get_node(self, node_id: str) -> LineageNode | None:
|
|
285
411
|
"""Get a node by ID.
|
|
286
412
|
|
|
@@ -352,7 +478,7 @@ class LineageService:
|
|
|
352
478
|
target_node_id: str,
|
|
353
479
|
edge_type: str = "derives_from",
|
|
354
480
|
metadata: dict[str, Any] | None = None,
|
|
355
|
-
) -> LineageEdge:
|
|
481
|
+
) -> tuple[LineageEdge, LineageNode, LineageNode]:
|
|
356
482
|
"""Create a new lineage edge.
|
|
357
483
|
|
|
358
484
|
Args:
|
|
@@ -362,7 +488,7 @@ class LineageService:
|
|
|
362
488
|
metadata: Optional additional metadata.
|
|
363
489
|
|
|
364
490
|
Returns:
|
|
365
|
-
|
|
491
|
+
Tuple of (created edge, source node, target node).
|
|
366
492
|
|
|
367
493
|
Raises:
|
|
368
494
|
ValueError: If source or target node not found, or edge already exists.
|
|
@@ -386,7 +512,7 @@ class LineageService:
|
|
|
386
512
|
edge_type=edge_type,
|
|
387
513
|
metadata_json=metadata,
|
|
388
514
|
)
|
|
389
|
-
return edge
|
|
515
|
+
return edge, source_node, target_node
|
|
390
516
|
|
|
391
517
|
async def get_edge(self, edge_id: str) -> LineageEdge | None:
|
|
392
518
|
"""Get an edge by ID.
|
|
@@ -411,7 +537,7 @@ class LineageService:
|
|
|
411
537
|
return await self.edge_repo.delete(edge_id)
|
|
412
538
|
|
|
413
539
|
# =========================================================================
|
|
414
|
-
# Impact Analysis
|
|
540
|
+
# Impact Analysis (using truthound.lineage.ImpactAnalyzer when available)
|
|
415
541
|
# =========================================================================
|
|
416
542
|
|
|
417
543
|
async def analyze_impact(
|
|
@@ -422,6 +548,9 @@ class LineageService:
|
|
|
422
548
|
) -> dict[str, Any]:
|
|
423
549
|
"""Analyze upstream/downstream impact from a node.
|
|
424
550
|
|
|
551
|
+
Uses truthound.lineage.ImpactAnalyzer when available for advanced analysis
|
|
552
|
+
including schema change impact and what-if scenarios.
|
|
553
|
+
|
|
425
554
|
Args:
|
|
426
555
|
node_id: Starting node ID.
|
|
427
556
|
direction: Analysis direction.
|
|
@@ -437,20 +566,72 @@ class LineageService:
|
|
|
437
566
|
if root_node is None:
|
|
438
567
|
raise ValueError(f"Node '{node_id}' not found")
|
|
439
568
|
|
|
440
|
-
|
|
441
|
-
|
|
569
|
+
# Try to use truthound's ImpactAnalyzer for enhanced analysis
|
|
570
|
+
try:
|
|
571
|
+
from truthound.lineage import ImpactAnalyzer, LineageGraph
|
|
572
|
+
|
|
573
|
+
# Build lineage graph from database
|
|
574
|
+
graph = await self._build_truthound_graph()
|
|
575
|
+
|
|
576
|
+
# Create impact analyzer
|
|
577
|
+
analyzer = ImpactAnalyzer(graph)
|
|
578
|
+
|
|
579
|
+
# Perform impact analysis
|
|
580
|
+
impact_result = analyzer.analyze_impact(
|
|
581
|
+
node_id=node_id,
|
|
582
|
+
max_depth=max_depth,
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
# Convert truthound result to our format
|
|
586
|
+
# ImpactResult has: source_node, affected_nodes (downstream), total_affected, max_depth
|
|
587
|
+
downstream_nodes = [
|
|
588
|
+
{
|
|
589
|
+
"id": getattr(an.node, 'id', None),
|
|
590
|
+
"name": getattr(an.node, 'name', None),
|
|
591
|
+
"node_type": getattr(an.node, 'node_type', None),
|
|
592
|
+
"source_id": None,
|
|
593
|
+
"depth": getattr(an, 'depth', 0),
|
|
594
|
+
"impact_level": str(getattr(an, 'impact_level', '')),
|
|
595
|
+
}
|
|
596
|
+
for an in impact_result.affected_nodes
|
|
597
|
+
]
|
|
598
|
+
|
|
599
|
+
# For upstream, use fallback traversal (ImpactAnalyzer only does downstream)
|
|
600
|
+
upstream_nodes_list: list[dict[str, Any]] = []
|
|
601
|
+
if direction in ("upstream", "both"):
|
|
602
|
+
upstream = await self._traverse_upstream(node_id, max_depth)
|
|
603
|
+
upstream_nodes_list = [self._node_summary(n) for n in upstream]
|
|
604
|
+
|
|
605
|
+
return {
|
|
606
|
+
"root_node_id": node_id,
|
|
607
|
+
"root_node_name": root_node.name,
|
|
608
|
+
"direction": direction,
|
|
609
|
+
"upstream_nodes": upstream_nodes_list,
|
|
610
|
+
"downstream_nodes": downstream_nodes,
|
|
611
|
+
"affected_sources": [],
|
|
612
|
+
"upstream_count": len(upstream_nodes_list),
|
|
613
|
+
"downstream_count": len(downstream_nodes),
|
|
614
|
+
"total_affected": len(upstream_nodes_list) + len(downstream_nodes),
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
except ImportError:
|
|
618
|
+
logger.debug("truthound.lineage not available, using fallback implementation")
|
|
619
|
+
|
|
620
|
+
# Fallback to SQLAlchemy-based implementation
|
|
621
|
+
upstream_nodes_list: list[dict[str, Any]] = []
|
|
622
|
+
downstream_nodes_list: list[dict[str, Any]] = []
|
|
442
623
|
affected_sources: set[str] = set()
|
|
443
624
|
|
|
444
625
|
if direction in ("upstream", "both"):
|
|
445
626
|
upstream = await self._traverse_upstream(node_id, max_depth)
|
|
446
|
-
|
|
627
|
+
upstream_nodes_list = [self._node_summary(n) for n in upstream]
|
|
447
628
|
for n in upstream:
|
|
448
629
|
if n.source_id:
|
|
449
630
|
affected_sources.add(n.source_id)
|
|
450
631
|
|
|
451
632
|
if direction in ("downstream", "both"):
|
|
452
633
|
downstream = await self._traverse_downstream(node_id, max_depth)
|
|
453
|
-
|
|
634
|
+
downstream_nodes_list = [self._node_summary(n) for n in downstream]
|
|
454
635
|
for n in downstream:
|
|
455
636
|
if n.source_id:
|
|
456
637
|
affected_sources.add(n.source_id)
|
|
@@ -459,12 +640,223 @@ class LineageService:
|
|
|
459
640
|
"root_node_id": node_id,
|
|
460
641
|
"root_node_name": root_node.name,
|
|
461
642
|
"direction": direction,
|
|
462
|
-
"upstream_nodes":
|
|
463
|
-
"downstream_nodes":
|
|
643
|
+
"upstream_nodes": upstream_nodes_list,
|
|
644
|
+
"downstream_nodes": downstream_nodes_list,
|
|
464
645
|
"affected_sources": list(affected_sources),
|
|
465
|
-
"upstream_count": len(
|
|
466
|
-
"downstream_count": len(
|
|
467
|
-
"total_affected": len(
|
|
646
|
+
"upstream_count": len(upstream_nodes_list),
|
|
647
|
+
"downstream_count": len(downstream_nodes_list),
|
|
648
|
+
"total_affected": len(upstream_nodes_list) + len(downstream_nodes_list),
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
async def _build_truthound_graph(self) -> Any:
|
|
652
|
+
"""Build a truthound LineageGraph from database state."""
|
|
653
|
+
try:
|
|
654
|
+
from truthound.lineage import LineageGraph, LineageNode as TruthoundNode, LineageEdge as TruthoundEdge
|
|
655
|
+
from truthound.lineage.base import NodeType, EdgeType
|
|
656
|
+
|
|
657
|
+
# Map dashboard node types to truthound NodeType enum
|
|
658
|
+
node_type_map = {
|
|
659
|
+
"source": NodeType.SOURCE,
|
|
660
|
+
"transform": NodeType.TRANSFORMATION,
|
|
661
|
+
"sink": NodeType.EXTERNAL,
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
# Map dashboard edge types to truthound EdgeType enum
|
|
665
|
+
edge_type_map = {
|
|
666
|
+
"derives_from": EdgeType.DERIVED_FROM,
|
|
667
|
+
"transforms_to": EdgeType.TRANSFORMED_TO,
|
|
668
|
+
"joins_with": EdgeType.JOINED_WITH,
|
|
669
|
+
"filters_from": EdgeType.FILTERED_TO,
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
graph = LineageGraph()
|
|
673
|
+
|
|
674
|
+
# Add all nodes
|
|
675
|
+
nodes = await self.node_repo.get_all_nodes()
|
|
676
|
+
for node in nodes:
|
|
677
|
+
th_node_type = node_type_map.get(node.node_type, NodeType.EXTERNAL)
|
|
678
|
+
th_node = TruthoundNode(
|
|
679
|
+
id=node.id,
|
|
680
|
+
name=node.name,
|
|
681
|
+
node_type=th_node_type,
|
|
682
|
+
)
|
|
683
|
+
graph.add_node(th_node)
|
|
684
|
+
|
|
685
|
+
# Add all edges
|
|
686
|
+
edges = await self.edge_repo.get_all_edges()
|
|
687
|
+
for edge in edges:
|
|
688
|
+
th_edge_type = edge_type_map.get(edge.edge_type, EdgeType.DERIVED_FROM)
|
|
689
|
+
th_edge = TruthoundEdge(
|
|
690
|
+
source=edge.source_node_id,
|
|
691
|
+
target=edge.target_node_id,
|
|
692
|
+
edge_type=th_edge_type,
|
|
693
|
+
)
|
|
694
|
+
graph.add_edge(th_edge)
|
|
695
|
+
|
|
696
|
+
return graph
|
|
697
|
+
|
|
698
|
+
except ImportError:
|
|
699
|
+
return None
|
|
700
|
+
|
|
701
|
+
def _truthound_node_to_summary(self, node: Any) -> dict[str, Any]:
|
|
702
|
+
"""Convert truthound LineageNode to summary dict."""
|
|
703
|
+
node_type = getattr(node, 'node_type', None)
|
|
704
|
+
return {
|
|
705
|
+
"id": getattr(node, 'id', None),
|
|
706
|
+
"name": getattr(node, 'name', None),
|
|
707
|
+
"node_type": str(node_type.value) if hasattr(node_type, 'value') else str(node_type),
|
|
708
|
+
"source_id": None,
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
async def analyze_schema_change_impact(
|
|
712
|
+
self,
|
|
713
|
+
node_id: str,
|
|
714
|
+
schema_changes: list[dict[str, Any]],
|
|
715
|
+
) -> dict[str, Any]:
|
|
716
|
+
"""Analyze impact of schema changes on downstream nodes.
|
|
717
|
+
|
|
718
|
+
Uses truthound.lineage.ImpactAnalyzer for what-if analysis.
|
|
719
|
+
|
|
720
|
+
Args:
|
|
721
|
+
node_id: Node ID where schema change occurs.
|
|
722
|
+
schema_changes: List of schema changes (e.g., column removal, type change).
|
|
723
|
+
|
|
724
|
+
Returns:
|
|
725
|
+
Schema change impact analysis.
|
|
726
|
+
"""
|
|
727
|
+
try:
|
|
728
|
+
from truthound.lineage import ImpactAnalyzer
|
|
729
|
+
|
|
730
|
+
# Build lineage graph
|
|
731
|
+
graph = await self._build_truthound_graph()
|
|
732
|
+
if graph is None:
|
|
733
|
+
raise ImportError("truthound.lineage not available")
|
|
734
|
+
|
|
735
|
+
analyzer = ImpactAnalyzer(graph)
|
|
736
|
+
|
|
737
|
+
# Analyze schema change impact
|
|
738
|
+
impact_result = analyzer.analyze_schema_change_impact(
|
|
739
|
+
node_id=node_id,
|
|
740
|
+
changes=schema_changes,
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
return {
|
|
744
|
+
"node_id": node_id,
|
|
745
|
+
"schema_changes": schema_changes,
|
|
746
|
+
"breaking_changes": impact_result.breaking_changes,
|
|
747
|
+
"affected_downstream_nodes": [
|
|
748
|
+
self._truthound_node_to_summary(n)
|
|
749
|
+
for n in impact_result.affected_nodes
|
|
750
|
+
],
|
|
751
|
+
"impact_severity": impact_result.severity,
|
|
752
|
+
"recommendations": impact_result.recommendations,
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
except ImportError:
|
|
756
|
+
# Fallback: basic downstream analysis without schema-aware logic
|
|
757
|
+
downstream = await self._traverse_downstream(node_id, max_depth=10)
|
|
758
|
+
return {
|
|
759
|
+
"node_id": node_id,
|
|
760
|
+
"schema_changes": schema_changes,
|
|
761
|
+
"breaking_changes": [],
|
|
762
|
+
"affected_downstream_nodes": [self._node_summary(n) for n in downstream],
|
|
763
|
+
"impact_severity": "unknown",
|
|
764
|
+
"recommendations": ["Truthound lineage module not available for detailed analysis"],
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
async def track_operation(
|
|
768
|
+
self,
|
|
769
|
+
operation_type: str,
|
|
770
|
+
source_nodes: list[str],
|
|
771
|
+
target_nodes: list[str],
|
|
772
|
+
metadata: dict[str, Any] | None = None,
|
|
773
|
+
) -> dict[str, Any]:
|
|
774
|
+
"""Track a data operation for automatic lineage recording.
|
|
775
|
+
|
|
776
|
+
Uses truthound.lineage.LineageTracker for automatic operation tracking.
|
|
777
|
+
|
|
778
|
+
Args:
|
|
779
|
+
operation_type: Type of operation (e.g., 'transform', 'aggregate', 'join').
|
|
780
|
+
source_nodes: Source node IDs.
|
|
781
|
+
target_nodes: Target node IDs.
|
|
782
|
+
metadata: Optional operation metadata.
|
|
783
|
+
|
|
784
|
+
Returns:
|
|
785
|
+
Tracking result with created edges.
|
|
786
|
+
"""
|
|
787
|
+
try:
|
|
788
|
+
from truthound.lineage import LineageTracker, OperationType
|
|
789
|
+
|
|
790
|
+
if self._tracker is None:
|
|
791
|
+
self._init_truthound_lineage()
|
|
792
|
+
|
|
793
|
+
if self._tracker is not None:
|
|
794
|
+
# Map string to enum
|
|
795
|
+
op_type_map = {
|
|
796
|
+
"transform": OperationType.TRANSFORM,
|
|
797
|
+
"aggregate": OperationType.AGGREGATE,
|
|
798
|
+
"join": OperationType.JOIN,
|
|
799
|
+
"filter": OperationType.FILTER,
|
|
800
|
+
"derive": OperationType.DERIVE,
|
|
801
|
+
}
|
|
802
|
+
op_type = op_type_map.get(operation_type, OperationType.TRANSFORM)
|
|
803
|
+
|
|
804
|
+
# Track operation
|
|
805
|
+
result = self._tracker.track_operation(
|
|
806
|
+
operation_type=op_type,
|
|
807
|
+
source_nodes=source_nodes,
|
|
808
|
+
target_nodes=target_nodes,
|
|
809
|
+
metadata=metadata or {},
|
|
810
|
+
)
|
|
811
|
+
|
|
812
|
+
# Also persist to database
|
|
813
|
+
created_edges = []
|
|
814
|
+
for source_id in source_nodes:
|
|
815
|
+
for target_id in target_nodes:
|
|
816
|
+
try:
|
|
817
|
+
edge = await self.create_edge(
|
|
818
|
+
source_node_id=source_id,
|
|
819
|
+
target_node_id=target_id,
|
|
820
|
+
edge_type=operation_type,
|
|
821
|
+
metadata=metadata,
|
|
822
|
+
)
|
|
823
|
+
created_edges.append(self._edge_to_dict(edge))
|
|
824
|
+
except ValueError:
|
|
825
|
+
# Edge might already exist
|
|
826
|
+
pass
|
|
827
|
+
|
|
828
|
+
return {
|
|
829
|
+
"operation_type": operation_type,
|
|
830
|
+
"source_nodes": source_nodes,
|
|
831
|
+
"target_nodes": target_nodes,
|
|
832
|
+
"created_edges": created_edges,
|
|
833
|
+
"tracking_id": getattr(result, 'tracking_id', None),
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
except ImportError:
|
|
837
|
+
pass
|
|
838
|
+
|
|
839
|
+
# Fallback: just create edges in database
|
|
840
|
+
created_edges = []
|
|
841
|
+
for source_id in source_nodes:
|
|
842
|
+
for target_id in target_nodes:
|
|
843
|
+
try:
|
|
844
|
+
edge = await self.create_edge(
|
|
845
|
+
source_node_id=source_id,
|
|
846
|
+
target_node_id=target_id,
|
|
847
|
+
edge_type=operation_type,
|
|
848
|
+
metadata=metadata,
|
|
849
|
+
)
|
|
850
|
+
created_edges.append(self._edge_to_dict(edge))
|
|
851
|
+
except ValueError:
|
|
852
|
+
pass
|
|
853
|
+
|
|
854
|
+
return {
|
|
855
|
+
"operation_type": operation_type,
|
|
856
|
+
"source_nodes": source_nodes,
|
|
857
|
+
"target_nodes": target_nodes,
|
|
858
|
+
"created_edges": created_edges,
|
|
859
|
+
"tracking_id": None,
|
|
468
860
|
}
|
|
469
861
|
|
|
470
862
|
async def _traverse_upstream(
|
|
@@ -527,62 +919,6 @@ class LineageService:
|
|
|
527
919
|
|
|
528
920
|
return result
|
|
529
921
|
|
|
530
|
-
# =========================================================================
|
|
531
|
-
# Auto-Discovery
|
|
532
|
-
# =========================================================================
|
|
533
|
-
|
|
534
|
-
async def auto_discover(
|
|
535
|
-
self,
|
|
536
|
-
source_id: str,
|
|
537
|
-
include_fk_relations: bool = True,
|
|
538
|
-
max_depth: int = 3,
|
|
539
|
-
) -> dict[str, Any]:
|
|
540
|
-
"""Auto-discover lineage from a data source.
|
|
541
|
-
|
|
542
|
-
This is a placeholder for more sophisticated discovery logic.
|
|
543
|
-
In a real implementation, this would analyze source metadata,
|
|
544
|
-
SQL queries, or foreign key relationships.
|
|
545
|
-
|
|
546
|
-
Args:
|
|
547
|
-
source_id: Source ID to discover from.
|
|
548
|
-
include_fk_relations: Include foreign key relationships (for DB sources).
|
|
549
|
-
max_depth: Maximum discovery depth.
|
|
550
|
-
|
|
551
|
-
Returns:
|
|
552
|
-
Discovered graph.
|
|
553
|
-
"""
|
|
554
|
-
# Check if node already exists for this source
|
|
555
|
-
existing_node = await self.node_repo.get_by_source_id(source_id)
|
|
556
|
-
if existing_node:
|
|
557
|
-
return await self.get_graph(source_id)
|
|
558
|
-
|
|
559
|
-
# Get source info
|
|
560
|
-
from truthound_dashboard.db import Source as SourceModel
|
|
561
|
-
|
|
562
|
-
result = await self.session.execute(
|
|
563
|
-
select(SourceModel).where(SourceModel.id == source_id)
|
|
564
|
-
)
|
|
565
|
-
source = result.scalar_one_or_none()
|
|
566
|
-
if source is None:
|
|
567
|
-
raise ValueError(f"Source '{source_id}' not found")
|
|
568
|
-
|
|
569
|
-
# Create a node for this source
|
|
570
|
-
node = await self.create_node(
|
|
571
|
-
name=source.name,
|
|
572
|
-
node_type="source",
|
|
573
|
-
source_id=source_id,
|
|
574
|
-
metadata={"auto_discovered": True, "source_type": source.type},
|
|
575
|
-
position_x=100,
|
|
576
|
-
position_y=100,
|
|
577
|
-
)
|
|
578
|
-
|
|
579
|
-
return {
|
|
580
|
-
"source_id": source_id,
|
|
581
|
-
"discovered_nodes": 1,
|
|
582
|
-
"discovered_edges": 0,
|
|
583
|
-
"graph": await self.get_graph(source_id),
|
|
584
|
-
}
|
|
585
|
-
|
|
586
922
|
# =========================================================================
|
|
587
923
|
# Position Management
|
|
588
924
|
# =========================================================================
|