truthound-dashboard 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. truthound_dashboard/api/alerts.py +258 -0
  2. truthound_dashboard/api/anomaly.py +1302 -0
  3. truthound_dashboard/api/cross_alerts.py +352 -0
  4. truthound_dashboard/api/deps.py +143 -0
  5. truthound_dashboard/api/drift_monitor.py +540 -0
  6. truthound_dashboard/api/lineage.py +1151 -0
  7. truthound_dashboard/api/maintenance.py +363 -0
  8. truthound_dashboard/api/middleware.py +373 -1
  9. truthound_dashboard/api/model_monitoring.py +805 -0
  10. truthound_dashboard/api/notifications_advanced.py +2452 -0
  11. truthound_dashboard/api/plugins.py +2096 -0
  12. truthound_dashboard/api/profile.py +211 -14
  13. truthound_dashboard/api/reports.py +853 -0
  14. truthound_dashboard/api/router.py +147 -0
  15. truthound_dashboard/api/rule_suggestions.py +310 -0
  16. truthound_dashboard/api/schema_evolution.py +231 -0
  17. truthound_dashboard/api/sources.py +47 -3
  18. truthound_dashboard/api/triggers.py +190 -0
  19. truthound_dashboard/api/validations.py +13 -0
  20. truthound_dashboard/api/validators.py +333 -4
  21. truthound_dashboard/api/versioning.py +309 -0
  22. truthound_dashboard/api/websocket.py +301 -0
  23. truthound_dashboard/core/__init__.py +27 -0
  24. truthound_dashboard/core/anomaly.py +1395 -0
  25. truthound_dashboard/core/anomaly_explainer.py +633 -0
  26. truthound_dashboard/core/cache.py +206 -0
  27. truthound_dashboard/core/cached_services.py +422 -0
  28. truthound_dashboard/core/charts.py +352 -0
  29. truthound_dashboard/core/connections.py +1069 -42
  30. truthound_dashboard/core/cross_alerts.py +837 -0
  31. truthound_dashboard/core/drift_monitor.py +1477 -0
  32. truthound_dashboard/core/drift_sampling.py +669 -0
  33. truthound_dashboard/core/i18n/__init__.py +42 -0
  34. truthound_dashboard/core/i18n/detector.py +173 -0
  35. truthound_dashboard/core/i18n/messages.py +564 -0
  36. truthound_dashboard/core/lineage.py +971 -0
  37. truthound_dashboard/core/maintenance.py +443 -5
  38. truthound_dashboard/core/model_monitoring.py +1043 -0
  39. truthound_dashboard/core/notifications/channels.py +1020 -1
  40. truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
  41. truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
  42. truthound_dashboard/core/notifications/deduplication/service.py +400 -0
  43. truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
  44. truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
  45. truthound_dashboard/core/notifications/dispatcher.py +43 -0
  46. truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
  47. truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
  48. truthound_dashboard/core/notifications/escalation/engine.py +429 -0
  49. truthound_dashboard/core/notifications/escalation/models.py +336 -0
  50. truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
  51. truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
  52. truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
  53. truthound_dashboard/core/notifications/events.py +49 -0
  54. truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
  55. truthound_dashboard/core/notifications/metrics/base.py +528 -0
  56. truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
  57. truthound_dashboard/core/notifications/routing/__init__.py +169 -0
  58. truthound_dashboard/core/notifications/routing/combinators.py +184 -0
  59. truthound_dashboard/core/notifications/routing/config.py +375 -0
  60. truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
  61. truthound_dashboard/core/notifications/routing/engine.py +382 -0
  62. truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
  63. truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
  64. truthound_dashboard/core/notifications/routing/rules.py +625 -0
  65. truthound_dashboard/core/notifications/routing/validator.py +678 -0
  66. truthound_dashboard/core/notifications/service.py +2 -0
  67. truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
  68. truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
  69. truthound_dashboard/core/notifications/throttling/builder.py +311 -0
  70. truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
  71. truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
  72. truthound_dashboard/core/openlineage.py +1028 -0
  73. truthound_dashboard/core/plugins/__init__.py +39 -0
  74. truthound_dashboard/core/plugins/docs/__init__.py +39 -0
  75. truthound_dashboard/core/plugins/docs/extractor.py +703 -0
  76. truthound_dashboard/core/plugins/docs/renderers.py +804 -0
  77. truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
  78. truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
  79. truthound_dashboard/core/plugins/hooks/manager.py +403 -0
  80. truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
  81. truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
  82. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
  83. truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
  84. truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
  85. truthound_dashboard/core/plugins/loader.py +504 -0
  86. truthound_dashboard/core/plugins/registry.py +810 -0
  87. truthound_dashboard/core/plugins/reporter_executor.py +588 -0
  88. truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
  89. truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
  90. truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
  91. truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
  92. truthound_dashboard/core/plugins/sandbox.py +617 -0
  93. truthound_dashboard/core/plugins/security/__init__.py +68 -0
  94. truthound_dashboard/core/plugins/security/analyzer.py +535 -0
  95. truthound_dashboard/core/plugins/security/policies.py +311 -0
  96. truthound_dashboard/core/plugins/security/protocols.py +296 -0
  97. truthound_dashboard/core/plugins/security/signing.py +842 -0
  98. truthound_dashboard/core/plugins/security.py +446 -0
  99. truthound_dashboard/core/plugins/validator_executor.py +401 -0
  100. truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
  101. truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
  102. truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
  103. truthound_dashboard/core/plugins/versioning/semver.py +266 -0
  104. truthound_dashboard/core/profile_comparison.py +601 -0
  105. truthound_dashboard/core/report_history.py +570 -0
  106. truthound_dashboard/core/reporters/__init__.py +57 -0
  107. truthound_dashboard/core/reporters/base.py +296 -0
  108. truthound_dashboard/core/reporters/csv_reporter.py +155 -0
  109. truthound_dashboard/core/reporters/html_reporter.py +598 -0
  110. truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
  111. truthound_dashboard/core/reporters/i18n/base.py +494 -0
  112. truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
  113. truthound_dashboard/core/reporters/json_reporter.py +160 -0
  114. truthound_dashboard/core/reporters/junit_reporter.py +233 -0
  115. truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
  116. truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
  117. truthound_dashboard/core/reporters/registry.py +272 -0
  118. truthound_dashboard/core/rule_generator.py +2088 -0
  119. truthound_dashboard/core/scheduler.py +822 -12
  120. truthound_dashboard/core/schema_evolution.py +858 -0
  121. truthound_dashboard/core/services.py +152 -9
  122. truthound_dashboard/core/statistics.py +718 -0
  123. truthound_dashboard/core/streaming_anomaly.py +883 -0
  124. truthound_dashboard/core/triggers/__init__.py +45 -0
  125. truthound_dashboard/core/triggers/base.py +226 -0
  126. truthound_dashboard/core/triggers/evaluators.py +609 -0
  127. truthound_dashboard/core/triggers/factory.py +363 -0
  128. truthound_dashboard/core/unified_alerts.py +870 -0
  129. truthound_dashboard/core/validation_limits.py +509 -0
  130. truthound_dashboard/core/versioning.py +709 -0
  131. truthound_dashboard/core/websocket/__init__.py +59 -0
  132. truthound_dashboard/core/websocket/manager.py +512 -0
  133. truthound_dashboard/core/websocket/messages.py +130 -0
  134. truthound_dashboard/db/__init__.py +30 -0
  135. truthound_dashboard/db/models.py +3375 -3
  136. truthound_dashboard/main.py +22 -0
  137. truthound_dashboard/schemas/__init__.py +396 -1
  138. truthound_dashboard/schemas/anomaly.py +1258 -0
  139. truthound_dashboard/schemas/base.py +4 -0
  140. truthound_dashboard/schemas/cross_alerts.py +334 -0
  141. truthound_dashboard/schemas/drift_monitor.py +890 -0
  142. truthound_dashboard/schemas/lineage.py +428 -0
  143. truthound_dashboard/schemas/maintenance.py +154 -0
  144. truthound_dashboard/schemas/model_monitoring.py +374 -0
  145. truthound_dashboard/schemas/notifications_advanced.py +1363 -0
  146. truthound_dashboard/schemas/openlineage.py +704 -0
  147. truthound_dashboard/schemas/plugins.py +1293 -0
  148. truthound_dashboard/schemas/profile.py +420 -34
  149. truthound_dashboard/schemas/profile_comparison.py +242 -0
  150. truthound_dashboard/schemas/reports.py +285 -0
  151. truthound_dashboard/schemas/rule_suggestion.py +434 -0
  152. truthound_dashboard/schemas/schema_evolution.py +164 -0
  153. truthound_dashboard/schemas/source.py +117 -2
  154. truthound_dashboard/schemas/triggers.py +511 -0
  155. truthound_dashboard/schemas/unified_alerts.py +223 -0
  156. truthound_dashboard/schemas/validation.py +25 -1
  157. truthound_dashboard/schemas/validators/__init__.py +11 -0
  158. truthound_dashboard/schemas/validators/base.py +151 -0
  159. truthound_dashboard/schemas/versioning.py +152 -0
  160. truthound_dashboard/static/index.html +2 -2
  161. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/METADATA +142 -18
  162. truthound_dashboard-1.4.0.dist-info/RECORD +239 -0
  163. truthound_dashboard/static/assets/index-BCA8H1hO.js +0 -574
  164. truthound_dashboard/static/assets/index-BNsSQ2fN.css +0 -1
  165. truthound_dashboard/static/assets/unmerged_dictionaries-CsJWCRx9.js +0 -1
  166. truthound_dashboard-1.3.0.dist-info/RECORD +0 -110
  167. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/WHEEL +0 -0
  168. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/entry_points.txt +0 -0
  169. {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1028 @@
1
+ """OpenLineage emitter service.
2
+
3
+ This module provides services for converting dashboard lineage to OpenLineage events
4
+ and emitting them to external systems.
5
+
6
+ The OpenLineage emitter follows the Protocol-based design for extensibility:
7
+ - IOpenLineageEmitter: Core emission interface
8
+ - IEventTransformer: Transform lineage to OpenLineage events
9
+ - ITransport: Send events to external systems
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import logging
16
+ import time
17
+ from abc import ABC, abstractmethod
18
+ from datetime import datetime
19
+ from typing import Any, Protocol, runtime_checkable
20
+ from uuid import uuid4
21
+
22
+ import httpx
23
+ from sqlalchemy import select
24
+ from sqlalchemy.ext.asyncio import AsyncSession
25
+
26
+ from truthound_dashboard.core.lineage import LineageService
27
+ from truthound_dashboard.db.models import LineageNode, LineageEdge, Source, OpenLineageWebhook
28
+ from truthound_dashboard.schemas.openlineage import (
29
+ OpenLineageDataset,
30
+ OpenLineageEvent,
31
+ OpenLineageJob,
32
+ OpenLineageRun,
33
+ RunState,
34
+ SchemaDatasetFacet,
35
+ SchemaField,
36
+ build_dataset_namespace,
37
+ )
38
+
39
+ logger = logging.getLogger(__name__)
40
+
41
+
42
+ # =============================================================================
43
+ # Protocols for Extensibility
44
+ # =============================================================================
45
+
46
+
47
+ @runtime_checkable
48
+ class IEventTransformer(Protocol):
49
+ """Protocol for transforming lineage to OpenLineage events."""
50
+
51
+ def transform(
52
+ self,
53
+ nodes: list[dict[str, Any]],
54
+ edges: list[dict[str, Any]],
55
+ job_namespace: str,
56
+ job_name: str,
57
+ include_schema: bool = True,
58
+ ) -> list[OpenLineageEvent]:
59
+ """Transform lineage graph to OpenLineage events.
60
+
61
+ Args:
62
+ nodes: List of lineage nodes.
63
+ edges: List of lineage edges.
64
+ job_namespace: Namespace for the job.
65
+ job_name: Name of the job.
66
+ include_schema: Whether to include schema facets.
67
+
68
+ Returns:
69
+ List of OpenLineage events.
70
+ """
71
+ ...
72
+
73
+
74
+ @runtime_checkable
75
+ class ITransport(Protocol):
76
+ """Protocol for transporting OpenLineage events."""
77
+
78
+ async def send(
79
+ self,
80
+ events: list[OpenLineageEvent],
81
+ url: str,
82
+ api_key: str | None = None,
83
+ headers: dict[str, str] | None = None,
84
+ timeout: int = 30,
85
+ ) -> tuple[int, int]:
86
+ """Send events to external system.
87
+
88
+ Args:
89
+ events: List of events to send.
90
+ url: Target URL.
91
+ api_key: Optional API key.
92
+ headers: Additional headers.
93
+ timeout: Request timeout in seconds.
94
+
95
+ Returns:
96
+ Tuple of (sent_count, failed_count).
97
+ """
98
+ ...
99
+
100
+
101
+ # =============================================================================
102
+ # Event Transformer Implementation
103
+ # =============================================================================
104
+
105
+
106
+ class LineageToOpenLineageTransformer:
107
+ """Transforms dashboard lineage graph to OpenLineage events.
108
+
109
+ This transformer creates a complete run event that represents the data flow
110
+ from source nodes through transformations to sink nodes.
111
+ """
112
+
113
+ def __init__(self, producer: str = "https://github.com/truthound/truthound-dashboard"):
114
+ """Initialize transformer.
115
+
116
+ Args:
117
+ producer: Producer URI for OpenLineage events.
118
+ """
119
+ self.producer = producer
120
+
121
+ def transform(
122
+ self,
123
+ nodes: list[dict[str, Any]],
124
+ edges: list[dict[str, Any]],
125
+ job_namespace: str,
126
+ job_name: str,
127
+ include_schema: bool = True,
128
+ ) -> list[OpenLineageEvent]:
129
+ """Transform lineage graph to OpenLineage events.
130
+
131
+ Creates events representing the complete data flow:
132
+ 1. START event with all inputs
133
+ 2. COMPLETE event with all outputs
134
+
135
+ Args:
136
+ nodes: List of lineage nodes.
137
+ edges: List of lineage edges.
138
+ job_namespace: Namespace for the job.
139
+ job_name: Name of the job.
140
+ include_schema: Whether to include schema facets.
141
+
142
+ Returns:
143
+ List of OpenLineage events.
144
+ """
145
+ if not nodes:
146
+ return []
147
+
148
+ events: list[OpenLineageEvent] = []
149
+ run_id = str(uuid4())
150
+
151
+ # Categorize nodes by type
152
+ source_nodes = [n for n in nodes if n.get("node_type") == "source"]
153
+ transform_nodes = [n for n in nodes if n.get("node_type") == "transform"]
154
+ sink_nodes = [n for n in nodes if n.get("node_type") == "sink"]
155
+
156
+ # Build edge map for dependency tracking
157
+ edge_map = self._build_edge_map(edges)
158
+
159
+ # Create input datasets from source nodes
160
+ inputs = [
161
+ self._node_to_dataset(node, include_schema)
162
+ for node in source_nodes
163
+ ]
164
+
165
+ # Create output datasets from sink and transform nodes
166
+ outputs = [
167
+ self._node_to_dataset(node, include_schema)
168
+ for node in sink_nodes + transform_nodes
169
+ ]
170
+
171
+ # Create job with facets
172
+ job = OpenLineageJob(
173
+ namespace=job_namespace,
174
+ name=job_name,
175
+ facets={
176
+ "truthound": {
177
+ "_producer": self.producer,
178
+ "_schemaURL": "https://truthound.io/spec/facets/1-0-0/TruthoundJobFacet.json",
179
+ "total_nodes": len(nodes),
180
+ "total_edges": len(edges),
181
+ "source_count": len(source_nodes),
182
+ "transform_count": len(transform_nodes),
183
+ "sink_count": len(sink_nodes),
184
+ }
185
+ },
186
+ )
187
+
188
+ # Create run
189
+ run = OpenLineageRun(
190
+ run_id=run_id,
191
+ facets={
192
+ "processing_engine": {
193
+ "_producer": self.producer,
194
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/ProcessingEngineRunFacet.json",
195
+ "version": "1.0.0",
196
+ "name": "truthound-dashboard",
197
+ }
198
+ },
199
+ )
200
+
201
+ # Create START event
202
+ start_event = OpenLineageEvent(
203
+ event_time=datetime.utcnow().isoformat() + "Z",
204
+ event_type=RunState.START,
205
+ producer=self.producer,
206
+ run=run,
207
+ job=job,
208
+ inputs=inputs,
209
+ outputs=[], # No outputs at start
210
+ )
211
+ events.append(start_event)
212
+
213
+ # Create COMPLETE event
214
+ complete_event = OpenLineageEvent(
215
+ event_time=datetime.utcnow().isoformat() + "Z",
216
+ event_type=RunState.COMPLETE,
217
+ producer=self.producer,
218
+ run=run,
219
+ job=job,
220
+ inputs=inputs,
221
+ outputs=outputs,
222
+ )
223
+ events.append(complete_event)
224
+
225
+ return events
226
+
227
+ def transform_per_node(
228
+ self,
229
+ nodes: list[dict[str, Any]],
230
+ edges: list[dict[str, Any]],
231
+ job_namespace: str,
232
+ include_schema: bool = True,
233
+ ) -> list[OpenLineageEvent]:
234
+ """Transform each node to a separate OpenLineage run.
235
+
236
+ This creates finer-grained events where each transformation
237
+ becomes its own job with explicit input/output relationships.
238
+
239
+ Args:
240
+ nodes: List of lineage nodes.
241
+ edges: List of lineage edges.
242
+ job_namespace: Namespace for jobs.
243
+ include_schema: Whether to include schema facets.
244
+
245
+ Returns:
246
+ List of OpenLineage events (2 per transform node: START + COMPLETE).
247
+ """
248
+ events: list[OpenLineageEvent] = []
249
+
250
+ # Build maps for lookups
251
+ node_map = {n["id"]: n for n in nodes}
252
+ incoming_edges = self._build_incoming_edge_map(edges)
253
+ outgoing_edges = self._build_outgoing_edge_map(edges)
254
+
255
+ # Process transform and sink nodes
256
+ for node in nodes:
257
+ if node.get("node_type") == "source":
258
+ continue # Sources don't have incoming edges
259
+
260
+ node_id = node["id"]
261
+ run_id = str(uuid4())
262
+
263
+ # Find input datasets (nodes pointing to this node)
264
+ input_node_ids = incoming_edges.get(node_id, [])
265
+ inputs = [
266
+ self._node_to_dataset(node_map[nid], include_schema)
267
+ for nid in input_node_ids
268
+ if nid in node_map
269
+ ]
270
+
271
+ # This node is the output
272
+ outputs = [self._node_to_dataset(node, include_schema)]
273
+
274
+ job_name = f"process_{node.get('name', node_id)}"
275
+ job = OpenLineageJob(
276
+ namespace=job_namespace,
277
+ name=job_name,
278
+ facets={
279
+ "truthound": {
280
+ "_producer": self.producer,
281
+ "_schemaURL": "https://truthound.io/spec/facets/1-0-0/TruthoundJobFacet.json",
282
+ "node_id": node_id,
283
+ "node_type": node.get("node_type"),
284
+ }
285
+ },
286
+ )
287
+
288
+ run = OpenLineageRun(run_id=run_id)
289
+
290
+ # START event
291
+ events.append(
292
+ OpenLineageEvent(
293
+ event_time=datetime.utcnow().isoformat() + "Z",
294
+ event_type=RunState.START,
295
+ producer=self.producer,
296
+ run=run,
297
+ job=job,
298
+ inputs=inputs,
299
+ outputs=[],
300
+ )
301
+ )
302
+
303
+ # COMPLETE event
304
+ events.append(
305
+ OpenLineageEvent(
306
+ event_time=datetime.utcnow().isoformat() + "Z",
307
+ event_type=RunState.COMPLETE,
308
+ producer=self.producer,
309
+ run=run,
310
+ job=job,
311
+ inputs=inputs,
312
+ outputs=outputs,
313
+ )
314
+ )
315
+
316
+ return events
317
+
318
+ def _node_to_dataset(
319
+ self,
320
+ node: dict[str, Any],
321
+ include_schema: bool,
322
+ ) -> OpenLineageDataset:
323
+ """Convert a lineage node to an OpenLineage dataset.
324
+
325
+ Args:
326
+ node: Lineage node dictionary.
327
+ include_schema: Whether to include schema.
328
+
329
+ Returns:
330
+ OpenLineageDataset instance.
331
+ """
332
+ metadata = node.get("metadata") or {}
333
+ source_type = metadata.get("source_type", "unknown")
334
+
335
+ # Build namespace based on source type
336
+ namespace = build_dataset_namespace(source_type, metadata.get("config"))
337
+
338
+ facets: dict[str, Any] = {
339
+ "truthound": {
340
+ "_producer": self.producer,
341
+ "_schemaURL": "https://truthound.io/spec/facets/1-0-0/TruthoundDatasetFacet.json",
342
+ "node_id": node.get("id"),
343
+ "node_type": node.get("node_type"),
344
+ "source_id": node.get("source_id"),
345
+ }
346
+ }
347
+
348
+ # Add schema facet if available
349
+ if include_schema and metadata.get("schema_fields"):
350
+ facets["schema"] = {
351
+ "_producer": self.producer,
352
+ "_schemaURL": "https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json",
353
+ "fields": [
354
+ {"name": f.get("name"), "type": f.get("type", "string")}
355
+ for f in metadata["schema_fields"]
356
+ ],
357
+ }
358
+
359
+ return OpenLineageDataset(
360
+ namespace=namespace,
361
+ name=node.get("name", "unknown"),
362
+ facets=facets,
363
+ )
364
+
365
+ def _build_edge_map(
366
+ self,
367
+ edges: list[dict[str, Any]],
368
+ ) -> dict[str, list[str]]:
369
+ """Build map from source_node_id to target_node_ids."""
370
+ edge_map: dict[str, list[str]] = {}
371
+ for edge in edges:
372
+ source_id = edge.get("source_node_id")
373
+ target_id = edge.get("target_node_id")
374
+ if source_id and target_id:
375
+ if source_id not in edge_map:
376
+ edge_map[source_id] = []
377
+ edge_map[source_id].append(target_id)
378
+ return edge_map
379
+
380
+ def _build_incoming_edge_map(
381
+ self,
382
+ edges: list[dict[str, Any]],
383
+ ) -> dict[str, list[str]]:
384
+ """Build map from target_node_id to source_node_ids."""
385
+ edge_map: dict[str, list[str]] = {}
386
+ for edge in edges:
387
+ source_id = edge.get("source_node_id")
388
+ target_id = edge.get("target_node_id")
389
+ if source_id and target_id:
390
+ if target_id not in edge_map:
391
+ edge_map[target_id] = []
392
+ edge_map[target_id].append(source_id)
393
+ return edge_map
394
+
395
+ def _build_outgoing_edge_map(
396
+ self,
397
+ edges: list[dict[str, Any]],
398
+ ) -> dict[str, list[str]]:
399
+ """Build map from source_node_id to target_node_ids."""
400
+ return self._build_edge_map(edges)
401
+
402
+
403
+ # =============================================================================
404
+ # Transport Implementation
405
+ # =============================================================================
406
+
407
+
408
+ class HttpTransport:
409
+ """HTTP transport for sending OpenLineage events."""
410
+
411
+ def __init__(self, client: httpx.AsyncClient | None = None):
412
+ """Initialize transport.
413
+
414
+ Args:
415
+ client: Optional pre-configured httpx client.
416
+ """
417
+ self._client = client
418
+ self._owns_client = client is None
419
+
420
+ async def send(
421
+ self,
422
+ events: list[OpenLineageEvent],
423
+ url: str,
424
+ api_key: str | None = None,
425
+ headers: dict[str, str] | None = None,
426
+ timeout: int = 30,
427
+ ) -> tuple[int, int]:
428
+ """Send events via HTTP POST.
429
+
430
+ Args:
431
+ events: List of events to send.
432
+ url: Target URL.
433
+ api_key: Optional API key.
434
+ headers: Additional headers.
435
+ timeout: Request timeout in seconds.
436
+
437
+ Returns:
438
+ Tuple of (sent_count, failed_count).
439
+ """
440
+ if not events:
441
+ return 0, 0
442
+
443
+ client = self._client or httpx.AsyncClient()
444
+ sent_count = 0
445
+ failed_count = 0
446
+
447
+ try:
448
+ request_headers = {
449
+ "Content-Type": "application/json",
450
+ }
451
+ if api_key:
452
+ request_headers["Authorization"] = f"Bearer {api_key}"
453
+ if headers:
454
+ request_headers.update(headers)
455
+
456
+ for event in events:
457
+ try:
458
+ response = await client.post(
459
+ url,
460
+ json=event.model_dump(by_alias=True),
461
+ headers=request_headers,
462
+ timeout=timeout,
463
+ )
464
+ if response.status_code in (200, 201, 202):
465
+ sent_count += 1
466
+ else:
467
+ logger.warning(
468
+ f"Failed to send event: {response.status_code} {response.text}"
469
+ )
470
+ failed_count += 1
471
+ except Exception as e:
472
+ logger.error(f"Error sending event: {e}")
473
+ failed_count += 1
474
+
475
+ finally:
476
+ if self._owns_client and client:
477
+ await client.aclose()
478
+
479
+ return sent_count, failed_count
480
+
481
+
482
+ class BatchHttpTransport:
483
+ """Batched HTTP transport for efficient bulk sending."""
484
+
485
+ def __init__(
486
+ self,
487
+ batch_size: int = 100,
488
+ client: httpx.AsyncClient | None = None,
489
+ ):
490
+ """Initialize transport.
491
+
492
+ Args:
493
+ batch_size: Number of events per batch.
494
+ client: Optional pre-configured httpx client.
495
+ """
496
+ self.batch_size = batch_size
497
+ self._client = client
498
+ self._owns_client = client is None
499
+
500
+ async def send(
501
+ self,
502
+ events: list[OpenLineageEvent],
503
+ url: str,
504
+ api_key: str | None = None,
505
+ headers: dict[str, str] | None = None,
506
+ timeout: int = 30,
507
+ ) -> tuple[int, int]:
508
+ """Send events in batches.
509
+
510
+ Args:
511
+ events: List of events to send.
512
+ url: Target URL.
513
+ api_key: Optional API key.
514
+ headers: Additional headers.
515
+ timeout: Request timeout in seconds.
516
+
517
+ Returns:
518
+ Tuple of (sent_count, failed_count).
519
+ """
520
+ if not events:
521
+ return 0, 0
522
+
523
+ client = self._client or httpx.AsyncClient()
524
+ sent_count = 0
525
+ failed_count = 0
526
+
527
+ try:
528
+ request_headers = {
529
+ "Content-Type": "application/json",
530
+ }
531
+ if api_key:
532
+ request_headers["Authorization"] = f"Bearer {api_key}"
533
+ if headers:
534
+ request_headers.update(headers)
535
+
536
+ # Process in batches
537
+ for i in range(0, len(events), self.batch_size):
538
+ batch = events[i : i + self.batch_size]
539
+ batch_payload = [e.model_dump(by_alias=True) for e in batch]
540
+
541
+ try:
542
+ response = await client.post(
543
+ url,
544
+ json=batch_payload,
545
+ headers=request_headers,
546
+ timeout=timeout,
547
+ )
548
+ if response.status_code in (200, 201, 202):
549
+ sent_count += len(batch)
550
+ else:
551
+ logger.warning(
552
+ f"Failed to send batch: {response.status_code} {response.text}"
553
+ )
554
+ failed_count += len(batch)
555
+ except Exception as e:
556
+ logger.error(f"Error sending batch: {e}")
557
+ failed_count += len(batch)
558
+
559
+ finally:
560
+ if self._owns_client and client:
561
+ await client.aclose()
562
+
563
+ return sent_count, failed_count
564
+
565
+
566
+ # =============================================================================
567
+ # OpenLineage Emitter Service
568
+ # =============================================================================
569
+
570
+
571
+ class OpenLineageEmitterService:
572
+ """Service for emitting OpenLineage events from dashboard lineage.
573
+
574
+ This service provides:
575
+ - Export lineage graph as OpenLineage events (JSON)
576
+ - Emit events to external OpenLineage consumers (Marquez, DataHub, etc.)
577
+ - Support for granular (per-node) or aggregated (full-graph) export
578
+ """
579
+
580
+ def __init__(
581
+ self,
582
+ session: AsyncSession,
583
+ transformer: LineageToOpenLineageTransformer | None = None,
584
+ transport: HttpTransport | BatchHttpTransport | None = None,
585
+ ):
586
+ """Initialize service.
587
+
588
+ Args:
589
+ session: Database session.
590
+ transformer: Optional custom transformer.
591
+ transport: Optional custom transport.
592
+ """
593
+ self.session = session
594
+ self.lineage_service = LineageService(session)
595
+ self.transformer = transformer or LineageToOpenLineageTransformer()
596
+ self.transport = transport or HttpTransport()
597
+
598
+ async def export_as_openlineage(
599
+ self,
600
+ job_namespace: str = "truthound-dashboard",
601
+ job_name: str = "lineage_export",
602
+ source_id: str | None = None,
603
+ include_schema: bool = True,
604
+ granular: bool = False,
605
+ ) -> dict[str, Any]:
606
+ """Export lineage graph as OpenLineage events.
607
+
608
+ Args:
609
+ job_namespace: Namespace for the job.
610
+ job_name: Name of the job.
611
+ source_id: Optional source ID to filter.
612
+ include_schema: Include schema in facets.
613
+ granular: If True, create events per node.
614
+
615
+ Returns:
616
+ Dictionary with events and metadata.
617
+ """
618
+ # Get lineage graph
619
+ graph = await self.lineage_service.get_graph(source_id=source_id)
620
+ nodes = graph.get("nodes", [])
621
+ edges = graph.get("edges", [])
622
+
623
+ # Transform to OpenLineage events
624
+ if granular:
625
+ events = self.transformer.transform_per_node(
626
+ nodes=nodes,
627
+ edges=edges,
628
+ job_namespace=job_namespace,
629
+ include_schema=include_schema,
630
+ )
631
+ else:
632
+ events = self.transformer.transform(
633
+ nodes=nodes,
634
+ edges=edges,
635
+ job_namespace=job_namespace,
636
+ job_name=job_name,
637
+ include_schema=include_schema,
638
+ )
639
+
640
+ # Count unique datasets
641
+ dataset_names = set()
642
+ for event in events:
643
+ for ds in event.inputs:
644
+ dataset_names.add(f"{ds.namespace}:{ds.name}")
645
+ for ds in event.outputs:
646
+ dataset_names.add(f"{ds.namespace}:{ds.name}")
647
+
648
+ # Count unique jobs
649
+ job_names = set()
650
+ for event in events:
651
+ job_names.add(f"{event.job.namespace}:{event.job.name}")
652
+
653
+ return {
654
+ "events": [e.model_dump(by_alias=True) for e in events],
655
+ "total_events": len(events),
656
+ "total_datasets": len(dataset_names),
657
+ "total_jobs": len(job_names),
658
+ "export_time": datetime.utcnow().isoformat() + "Z",
659
+ }
660
+
661
+ async def emit_to_endpoint(
662
+ self,
663
+ url: str,
664
+ api_key: str | None = None,
665
+ headers: dict[str, str] | None = None,
666
+ job_namespace: str = "truthound-dashboard",
667
+ job_name: str = "lineage_export",
668
+ source_id: str | None = None,
669
+ timeout: int = 30,
670
+ ) -> dict[str, Any]:
671
+ """Emit OpenLineage events to an external endpoint.
672
+
673
+ Args:
674
+ url: Target URL (e.g., Marquez API).
675
+ api_key: Optional API key.
676
+ headers: Additional headers.
677
+ job_namespace: Namespace for the job.
678
+ job_name: Name of the job.
679
+ source_id: Optional source ID to filter.
680
+ timeout: Request timeout.
681
+
682
+ Returns:
683
+ Result dictionary with success status.
684
+ """
685
+ try:
686
+ # Export events
687
+ export_result = await self.export_as_openlineage(
688
+ job_namespace=job_namespace,
689
+ job_name=job_name,
690
+ source_id=source_id,
691
+ include_schema=True,
692
+ )
693
+
694
+ # Convert back to OpenLineageEvent objects
695
+ events = [
696
+ OpenLineageEvent(**e)
697
+ for e in export_result["events"]
698
+ ]
699
+
700
+ # Send via transport
701
+ sent_count, failed_count = await self.transport.send(
702
+ events=events,
703
+ url=url,
704
+ api_key=api_key,
705
+ headers=headers,
706
+ timeout=timeout,
707
+ )
708
+
709
+ return {
710
+ "success": failed_count == 0,
711
+ "events_sent": sent_count,
712
+ "failed_events": failed_count,
713
+ "error_message": None if failed_count == 0 else f"{failed_count} events failed to send",
714
+ }
715
+
716
+ except Exception as e:
717
+ logger.exception("Failed to emit OpenLineage events")
718
+ return {
719
+ "success": False,
720
+ "events_sent": 0,
721
+ "failed_events": 0,
722
+ "error_message": str(e),
723
+ }
724
+
725
+ def export_as_ndjson(
726
+ self,
727
+ events: list[dict[str, Any]],
728
+ ) -> str:
729
+ """Export events as newline-delimited JSON.
730
+
731
+ Args:
732
+ events: List of event dictionaries.
733
+
734
+ Returns:
735
+ NDJSON string.
736
+ """
737
+ return "\n".join(json.dumps(e) for e in events)
738
+
739
+
740
+ # =============================================================================
741
+ # Webhook Management Service
742
+ # =============================================================================
743
+
744
+
745
+ class OpenLineageWebhookService:
746
+ """Service for managing OpenLineage webhook configurations.
747
+
748
+ Provides CRUD operations for webhook configurations and
749
+ testing webhook connectivity.
750
+ """
751
+
752
+ def __init__(self, session: AsyncSession):
753
+ """Initialize service.
754
+
755
+ Args:
756
+ session: Database session.
757
+ """
758
+ self.session = session
759
+
760
+ async def list_webhooks(self, active_only: bool = False) -> list[OpenLineageWebhook]:
761
+ """List all configured webhooks.
762
+
763
+ Args:
764
+ active_only: If True, only return active webhooks.
765
+
766
+ Returns:
767
+ List of webhook configurations.
768
+ """
769
+ query = select(OpenLineageWebhook).order_by(OpenLineageWebhook.created_at.desc())
770
+ if active_only:
771
+ query = query.where(OpenLineageWebhook.is_active == True)
772
+
773
+ result = await self.session.execute(query)
774
+ return list(result.scalars().all())
775
+
776
+ async def get_webhook(self, webhook_id: str) -> OpenLineageWebhook | None:
777
+ """Get a specific webhook by ID.
778
+
779
+ Args:
780
+ webhook_id: Webhook unique identifier.
781
+
782
+ Returns:
783
+ Webhook if found, None otherwise.
784
+ """
785
+ result = await self.session.execute(
786
+ select(OpenLineageWebhook).where(OpenLineageWebhook.id == webhook_id)
787
+ )
788
+ return result.scalar_one_or_none()
789
+
790
+ async def create_webhook(
791
+ self,
792
+ name: str,
793
+ url: str,
794
+ is_active: bool = True,
795
+ headers: dict[str, str] | None = None,
796
+ api_key: str | None = None,
797
+ event_types: str = "all",
798
+ batch_size: int = 100,
799
+ timeout_seconds: int = 30,
800
+ ) -> OpenLineageWebhook:
801
+ """Create a new webhook configuration.
802
+
803
+ Args:
804
+ name: Human-readable name.
805
+ url: Target URL.
806
+ is_active: Whether the webhook is enabled.
807
+ headers: Custom headers.
808
+ api_key: API key for authentication.
809
+ event_types: Types of events to emit.
810
+ batch_size: Events per batch.
811
+ timeout_seconds: Request timeout.
812
+
813
+ Returns:
814
+ Created webhook.
815
+ """
816
+ webhook = OpenLineageWebhook(
817
+ name=name,
818
+ url=url,
819
+ is_active=is_active,
820
+ headers_json=headers or {},
821
+ api_key=api_key,
822
+ event_types=event_types,
823
+ batch_size=batch_size,
824
+ timeout_seconds=timeout_seconds,
825
+ )
826
+ self.session.add(webhook)
827
+ await self.session.commit()
828
+ await self.session.refresh(webhook)
829
+ return webhook
830
+
831
+ async def update_webhook(
832
+ self,
833
+ webhook_id: str,
834
+ name: str | None = None,
835
+ url: str | None = None,
836
+ is_active: bool | None = None,
837
+ headers: dict[str, str] | None = None,
838
+ api_key: str | None = None,
839
+ event_types: str | None = None,
840
+ batch_size: int | None = None,
841
+ timeout_seconds: int | None = None,
842
+ ) -> OpenLineageWebhook | None:
843
+ """Update an existing webhook.
844
+
845
+ Args:
846
+ webhook_id: Webhook to update.
847
+ name: New name (if provided).
848
+ url: New URL (if provided).
849
+ is_active: New active status (if provided).
850
+ headers: New headers (if provided).
851
+ api_key: New API key (if provided).
852
+ event_types: New event types (if provided).
853
+ batch_size: New batch size (if provided).
854
+ timeout_seconds: New timeout (if provided).
855
+
856
+ Returns:
857
+ Updated webhook or None if not found.
858
+ """
859
+ webhook = await self.get_webhook(webhook_id)
860
+ if not webhook:
861
+ return None
862
+
863
+ if name is not None:
864
+ webhook.name = name
865
+ if url is not None:
866
+ webhook.url = url
867
+ if is_active is not None:
868
+ webhook.is_active = is_active
869
+ if headers is not None:
870
+ webhook.headers_json = headers
871
+ if api_key is not None:
872
+ webhook.api_key = api_key
873
+ if event_types is not None:
874
+ webhook.event_types = event_types
875
+ if batch_size is not None:
876
+ webhook.batch_size = batch_size
877
+ if timeout_seconds is not None:
878
+ webhook.timeout_seconds = timeout_seconds
879
+
880
+ await self.session.commit()
881
+ await self.session.refresh(webhook)
882
+ return webhook
883
+
884
+ async def delete_webhook(self, webhook_id: str) -> bool:
885
+ """Delete a webhook configuration.
886
+
887
+ Args:
888
+ webhook_id: Webhook to delete.
889
+
890
+ Returns:
891
+ True if deleted, False if not found.
892
+ """
893
+ webhook = await self.get_webhook(webhook_id)
894
+ if not webhook:
895
+ return False
896
+
897
+ await self.session.delete(webhook)
898
+ await self.session.commit()
899
+ return True
900
+
901
+ async def test_webhook(
902
+ self,
903
+ url: str,
904
+ headers: dict[str, str] | None = None,
905
+ api_key: str | None = None,
906
+ timeout_seconds: int = 10,
907
+ ) -> dict[str, Any]:
908
+ """Test webhook connectivity.
909
+
910
+ Sends a test OpenLineage event to verify the endpoint is reachable
911
+ and accepts events.
912
+
913
+ Args:
914
+ url: URL to test.
915
+ headers: Custom headers.
916
+ api_key: API key for authentication.
917
+ timeout_seconds: Request timeout.
918
+
919
+ Returns:
920
+ Test result with success status and details.
921
+ """
922
+ # Create a minimal test event
923
+ test_event = {
924
+ "eventTime": datetime.utcnow().isoformat() + "Z",
925
+ "eventType": "START",
926
+ "producer": "https://github.com/truthound/truthound-dashboard",
927
+ "schemaURL": "https://openlineage.io/spec/1-0-5/OpenLineage.json#/definitions/RunEvent",
928
+ "run": {"runId": str(uuid4()), "facets": {}},
929
+ "job": {
930
+ "namespace": "truthound-dashboard",
931
+ "name": "webhook_test",
932
+ "facets": {},
933
+ },
934
+ "inputs": [],
935
+ "outputs": [],
936
+ }
937
+
938
+ request_headers = {
939
+ "Content-Type": "application/json",
940
+ "X-OpenLineage-Test": "true",
941
+ }
942
+ if api_key:
943
+ request_headers["Authorization"] = f"Bearer {api_key}"
944
+ if headers:
945
+ request_headers.update(headers)
946
+
947
+ start_time = time.time()
948
+
949
+ try:
950
+ async with httpx.AsyncClient() as client:
951
+ response = await client.post(
952
+ url,
953
+ json=test_event,
954
+ headers=request_headers,
955
+ timeout=timeout_seconds,
956
+ )
957
+
958
+ elapsed_ms = int((time.time() - start_time) * 1000)
959
+
960
+ # Truncate response body
961
+ response_body = response.text[:500] if response.text else None
962
+
963
+ if response.status_code in (200, 201, 202, 204):
964
+ return {
965
+ "success": True,
966
+ "status_code": response.status_code,
967
+ "response_time_ms": elapsed_ms,
968
+ "error_message": None,
969
+ "response_body": response_body,
970
+ }
971
+ else:
972
+ return {
973
+ "success": False,
974
+ "status_code": response.status_code,
975
+ "response_time_ms": elapsed_ms,
976
+ "error_message": f"HTTP {response.status_code}: {response.reason_phrase}",
977
+ "response_body": response_body,
978
+ }
979
+
980
+ except httpx.TimeoutException:
981
+ elapsed_ms = int((time.time() - start_time) * 1000)
982
+ return {
983
+ "success": False,
984
+ "status_code": None,
985
+ "response_time_ms": elapsed_ms,
986
+ "error_message": f"Request timed out after {timeout_seconds} seconds",
987
+ "response_body": None,
988
+ }
989
+ except httpx.ConnectError as e:
990
+ elapsed_ms = int((time.time() - start_time) * 1000)
991
+ return {
992
+ "success": False,
993
+ "status_code": None,
994
+ "response_time_ms": elapsed_ms,
995
+ "error_message": f"Connection failed: {str(e)}",
996
+ "response_body": None,
997
+ }
998
+ except Exception as e:
999
+ elapsed_ms = int((time.time() - start_time) * 1000)
1000
+ logger.exception("Webhook test failed")
1001
+ return {
1002
+ "success": False,
1003
+ "status_code": None,
1004
+ "response_time_ms": elapsed_ms,
1005
+ "error_message": str(e),
1006
+ "response_body": None,
1007
+ }
1008
+
1009
+ async def record_emission(
1010
+ self,
1011
+ webhook_id: str,
1012
+ success: bool,
1013
+ error: str | None = None,
1014
+ ) -> None:
1015
+ """Record an emission attempt for a webhook.
1016
+
1017
+ Args:
1018
+ webhook_id: Webhook ID.
1019
+ success: Whether the emission was successful.
1020
+ error: Error message if failed.
1021
+ """
1022
+ webhook = await self.get_webhook(webhook_id)
1023
+ if webhook:
1024
+ if success:
1025
+ webhook.record_success()
1026
+ else:
1027
+ webhook.record_failure(error or "Unknown error")
1028
+ await self.session.commit()