truthound-dashboard 1.3.1__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +258 -0
- truthound_dashboard/api/anomaly.py +1302 -0
- truthound_dashboard/api/cross_alerts.py +352 -0
- truthound_dashboard/api/deps.py +143 -0
- truthound_dashboard/api/drift_monitor.py +540 -0
- truthound_dashboard/api/lineage.py +1151 -0
- truthound_dashboard/api/maintenance.py +363 -0
- truthound_dashboard/api/middleware.py +373 -1
- truthound_dashboard/api/model_monitoring.py +805 -0
- truthound_dashboard/api/notifications_advanced.py +2452 -0
- truthound_dashboard/api/plugins.py +2096 -0
- truthound_dashboard/api/profile.py +211 -14
- truthound_dashboard/api/reports.py +853 -0
- truthound_dashboard/api/router.py +147 -0
- truthound_dashboard/api/rule_suggestions.py +310 -0
- truthound_dashboard/api/schema_evolution.py +231 -0
- truthound_dashboard/api/sources.py +47 -3
- truthound_dashboard/api/triggers.py +190 -0
- truthound_dashboard/api/validations.py +13 -0
- truthound_dashboard/api/validators.py +333 -4
- truthound_dashboard/api/versioning.py +309 -0
- truthound_dashboard/api/websocket.py +301 -0
- truthound_dashboard/core/__init__.py +27 -0
- truthound_dashboard/core/anomaly.py +1395 -0
- truthound_dashboard/core/anomaly_explainer.py +633 -0
- truthound_dashboard/core/cache.py +206 -0
- truthound_dashboard/core/cached_services.py +422 -0
- truthound_dashboard/core/charts.py +352 -0
- truthound_dashboard/core/connections.py +1069 -42
- truthound_dashboard/core/cross_alerts.py +837 -0
- truthound_dashboard/core/drift_monitor.py +1477 -0
- truthound_dashboard/core/drift_sampling.py +669 -0
- truthound_dashboard/core/i18n/__init__.py +42 -0
- truthound_dashboard/core/i18n/detector.py +173 -0
- truthound_dashboard/core/i18n/messages.py +564 -0
- truthound_dashboard/core/lineage.py +971 -0
- truthound_dashboard/core/maintenance.py +443 -5
- truthound_dashboard/core/model_monitoring.py +1043 -0
- truthound_dashboard/core/notifications/channels.py +1020 -1
- truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
- truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
- truthound_dashboard/core/notifications/deduplication/service.py +400 -0
- truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
- truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
- truthound_dashboard/core/notifications/dispatcher.py +43 -0
- truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
- truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
- truthound_dashboard/core/notifications/escalation/engine.py +429 -0
- truthound_dashboard/core/notifications/escalation/models.py +336 -0
- truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
- truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
- truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
- truthound_dashboard/core/notifications/events.py +49 -0
- truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
- truthound_dashboard/core/notifications/metrics/base.py +528 -0
- truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
- truthound_dashboard/core/notifications/routing/__init__.py +169 -0
- truthound_dashboard/core/notifications/routing/combinators.py +184 -0
- truthound_dashboard/core/notifications/routing/config.py +375 -0
- truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
- truthound_dashboard/core/notifications/routing/engine.py +382 -0
- truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
- truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
- truthound_dashboard/core/notifications/routing/rules.py +625 -0
- truthound_dashboard/core/notifications/routing/validator.py +678 -0
- truthound_dashboard/core/notifications/service.py +2 -0
- truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
- truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
- truthound_dashboard/core/notifications/throttling/builder.py +311 -0
- truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
- truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
- truthound_dashboard/core/openlineage.py +1028 -0
- truthound_dashboard/core/plugins/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/extractor.py +703 -0
- truthound_dashboard/core/plugins/docs/renderers.py +804 -0
- truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
- truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
- truthound_dashboard/core/plugins/hooks/manager.py +403 -0
- truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
- truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
- truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
- truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
- truthound_dashboard/core/plugins/loader.py +504 -0
- truthound_dashboard/core/plugins/registry.py +810 -0
- truthound_dashboard/core/plugins/reporter_executor.py +588 -0
- truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
- truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
- truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
- truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
- truthound_dashboard/core/plugins/sandbox.py +617 -0
- truthound_dashboard/core/plugins/security/__init__.py +68 -0
- truthound_dashboard/core/plugins/security/analyzer.py +535 -0
- truthound_dashboard/core/plugins/security/policies.py +311 -0
- truthound_dashboard/core/plugins/security/protocols.py +296 -0
- truthound_dashboard/core/plugins/security/signing.py +842 -0
- truthound_dashboard/core/plugins/security.py +446 -0
- truthound_dashboard/core/plugins/validator_executor.py +401 -0
- truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
- truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
- truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
- truthound_dashboard/core/plugins/versioning/semver.py +266 -0
- truthound_dashboard/core/profile_comparison.py +601 -0
- truthound_dashboard/core/report_history.py +570 -0
- truthound_dashboard/core/reporters/__init__.py +57 -0
- truthound_dashboard/core/reporters/base.py +296 -0
- truthound_dashboard/core/reporters/csv_reporter.py +155 -0
- truthound_dashboard/core/reporters/html_reporter.py +598 -0
- truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
- truthound_dashboard/core/reporters/i18n/base.py +494 -0
- truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
- truthound_dashboard/core/reporters/json_reporter.py +160 -0
- truthound_dashboard/core/reporters/junit_reporter.py +233 -0
- truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
- truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
- truthound_dashboard/core/reporters/registry.py +272 -0
- truthound_dashboard/core/rule_generator.py +2088 -0
- truthound_dashboard/core/scheduler.py +822 -12
- truthound_dashboard/core/schema_evolution.py +858 -0
- truthound_dashboard/core/services.py +152 -9
- truthound_dashboard/core/statistics.py +718 -0
- truthound_dashboard/core/streaming_anomaly.py +883 -0
- truthound_dashboard/core/triggers/__init__.py +45 -0
- truthound_dashboard/core/triggers/base.py +226 -0
- truthound_dashboard/core/triggers/evaluators.py +609 -0
- truthound_dashboard/core/triggers/factory.py +363 -0
- truthound_dashboard/core/unified_alerts.py +870 -0
- truthound_dashboard/core/validation_limits.py +509 -0
- truthound_dashboard/core/versioning.py +709 -0
- truthound_dashboard/core/websocket/__init__.py +59 -0
- truthound_dashboard/core/websocket/manager.py +512 -0
- truthound_dashboard/core/websocket/messages.py +130 -0
- truthound_dashboard/db/__init__.py +30 -0
- truthound_dashboard/db/models.py +3375 -3
- truthound_dashboard/main.py +22 -0
- truthound_dashboard/schemas/__init__.py +396 -1
- truthound_dashboard/schemas/anomaly.py +1258 -0
- truthound_dashboard/schemas/base.py +4 -0
- truthound_dashboard/schemas/cross_alerts.py +334 -0
- truthound_dashboard/schemas/drift_monitor.py +890 -0
- truthound_dashboard/schemas/lineage.py +428 -0
- truthound_dashboard/schemas/maintenance.py +154 -0
- truthound_dashboard/schemas/model_monitoring.py +374 -0
- truthound_dashboard/schemas/notifications_advanced.py +1363 -0
- truthound_dashboard/schemas/openlineage.py +704 -0
- truthound_dashboard/schemas/plugins.py +1293 -0
- truthound_dashboard/schemas/profile.py +420 -34
- truthound_dashboard/schemas/profile_comparison.py +242 -0
- truthound_dashboard/schemas/reports.py +285 -0
- truthound_dashboard/schemas/rule_suggestion.py +434 -0
- truthound_dashboard/schemas/schema_evolution.py +164 -0
- truthound_dashboard/schemas/source.py +117 -2
- truthound_dashboard/schemas/triggers.py +511 -0
- truthound_dashboard/schemas/unified_alerts.py +223 -0
- truthound_dashboard/schemas/validation.py +25 -1
- truthound_dashboard/schemas/validators/__init__.py +11 -0
- truthound_dashboard/schemas/validators/base.py +151 -0
- truthound_dashboard/schemas/versioning.py +152 -0
- truthound_dashboard/static/index.html +2 -2
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/METADATA +142 -22
- truthound_dashboard-1.4.0.dist-info/RECORD +239 -0
- truthound_dashboard/static/assets/index-BZG20KuF.js +0 -586
- truthound_dashboard/static/assets/index-D_HyZ3pb.css +0 -1
- truthound_dashboard/static/assets/unmerged_dictionaries-CtpqQBm0.js +0 -1
- truthound_dashboard-1.3.1.dist-info/RECORD +0 -110
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,1028 @@
|
|
|
1
|
+
"""OpenLineage emitter service.
|
|
2
|
+
|
|
3
|
+
This module provides services for converting dashboard lineage to OpenLineage events
|
|
4
|
+
and emitting them to external systems.
|
|
5
|
+
|
|
6
|
+
The OpenLineage emitter follows the Protocol-based design for extensibility:
|
|
7
|
+
- IOpenLineageEmitter: Core emission interface
|
|
8
|
+
- IEventTransformer: Transform lineage to OpenLineage events
|
|
9
|
+
- ITransport: Send events to external systems
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import logging
|
|
16
|
+
import time
|
|
17
|
+
from abc import ABC, abstractmethod
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
from typing import Any, Protocol, runtime_checkable
|
|
20
|
+
from uuid import uuid4
|
|
21
|
+
|
|
22
|
+
import httpx
|
|
23
|
+
from sqlalchemy import select
|
|
24
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
25
|
+
|
|
26
|
+
from truthound_dashboard.core.lineage import LineageService
|
|
27
|
+
from truthound_dashboard.db.models import LineageNode, LineageEdge, Source, OpenLineageWebhook
|
|
28
|
+
from truthound_dashboard.schemas.openlineage import (
|
|
29
|
+
OpenLineageDataset,
|
|
30
|
+
OpenLineageEvent,
|
|
31
|
+
OpenLineageJob,
|
|
32
|
+
OpenLineageRun,
|
|
33
|
+
RunState,
|
|
34
|
+
SchemaDatasetFacet,
|
|
35
|
+
SchemaField,
|
|
36
|
+
build_dataset_namespace,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
logger = logging.getLogger(__name__)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# =============================================================================
|
|
43
|
+
# Protocols for Extensibility
|
|
44
|
+
# =============================================================================
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@runtime_checkable
|
|
48
|
+
class IEventTransformer(Protocol):
|
|
49
|
+
"""Protocol for transforming lineage to OpenLineage events."""
|
|
50
|
+
|
|
51
|
+
def transform(
|
|
52
|
+
self,
|
|
53
|
+
nodes: list[dict[str, Any]],
|
|
54
|
+
edges: list[dict[str, Any]],
|
|
55
|
+
job_namespace: str,
|
|
56
|
+
job_name: str,
|
|
57
|
+
include_schema: bool = True,
|
|
58
|
+
) -> list[OpenLineageEvent]:
|
|
59
|
+
"""Transform lineage graph to OpenLineage events.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
nodes: List of lineage nodes.
|
|
63
|
+
edges: List of lineage edges.
|
|
64
|
+
job_namespace: Namespace for the job.
|
|
65
|
+
job_name: Name of the job.
|
|
66
|
+
include_schema: Whether to include schema facets.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
List of OpenLineage events.
|
|
70
|
+
"""
|
|
71
|
+
...
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@runtime_checkable
|
|
75
|
+
class ITransport(Protocol):
|
|
76
|
+
"""Protocol for transporting OpenLineage events."""
|
|
77
|
+
|
|
78
|
+
async def send(
|
|
79
|
+
self,
|
|
80
|
+
events: list[OpenLineageEvent],
|
|
81
|
+
url: str,
|
|
82
|
+
api_key: str | None = None,
|
|
83
|
+
headers: dict[str, str] | None = None,
|
|
84
|
+
timeout: int = 30,
|
|
85
|
+
) -> tuple[int, int]:
|
|
86
|
+
"""Send events to external system.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
events: List of events to send.
|
|
90
|
+
url: Target URL.
|
|
91
|
+
api_key: Optional API key.
|
|
92
|
+
headers: Additional headers.
|
|
93
|
+
timeout: Request timeout in seconds.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
Tuple of (sent_count, failed_count).
|
|
97
|
+
"""
|
|
98
|
+
...
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# =============================================================================
|
|
102
|
+
# Event Transformer Implementation
|
|
103
|
+
# =============================================================================
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class LineageToOpenLineageTransformer:
|
|
107
|
+
"""Transforms dashboard lineage graph to OpenLineage events.
|
|
108
|
+
|
|
109
|
+
This transformer creates a complete run event that represents the data flow
|
|
110
|
+
from source nodes through transformations to sink nodes.
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
def __init__(self, producer: str = "https://github.com/truthound/truthound-dashboard"):
|
|
114
|
+
"""Initialize transformer.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
producer: Producer URI for OpenLineage events.
|
|
118
|
+
"""
|
|
119
|
+
self.producer = producer
|
|
120
|
+
|
|
121
|
+
def transform(
|
|
122
|
+
self,
|
|
123
|
+
nodes: list[dict[str, Any]],
|
|
124
|
+
edges: list[dict[str, Any]],
|
|
125
|
+
job_namespace: str,
|
|
126
|
+
job_name: str,
|
|
127
|
+
include_schema: bool = True,
|
|
128
|
+
) -> list[OpenLineageEvent]:
|
|
129
|
+
"""Transform lineage graph to OpenLineage events.
|
|
130
|
+
|
|
131
|
+
Creates events representing the complete data flow:
|
|
132
|
+
1. START event with all inputs
|
|
133
|
+
2. COMPLETE event with all outputs
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
nodes: List of lineage nodes.
|
|
137
|
+
edges: List of lineage edges.
|
|
138
|
+
job_namespace: Namespace for the job.
|
|
139
|
+
job_name: Name of the job.
|
|
140
|
+
include_schema: Whether to include schema facets.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
List of OpenLineage events.
|
|
144
|
+
"""
|
|
145
|
+
if not nodes:
|
|
146
|
+
return []
|
|
147
|
+
|
|
148
|
+
events: list[OpenLineageEvent] = []
|
|
149
|
+
run_id = str(uuid4())
|
|
150
|
+
|
|
151
|
+
# Categorize nodes by type
|
|
152
|
+
source_nodes = [n for n in nodes if n.get("node_type") == "source"]
|
|
153
|
+
transform_nodes = [n for n in nodes if n.get("node_type") == "transform"]
|
|
154
|
+
sink_nodes = [n for n in nodes if n.get("node_type") == "sink"]
|
|
155
|
+
|
|
156
|
+
# Build edge map for dependency tracking
|
|
157
|
+
edge_map = self._build_edge_map(edges)
|
|
158
|
+
|
|
159
|
+
# Create input datasets from source nodes
|
|
160
|
+
inputs = [
|
|
161
|
+
self._node_to_dataset(node, include_schema)
|
|
162
|
+
for node in source_nodes
|
|
163
|
+
]
|
|
164
|
+
|
|
165
|
+
# Create output datasets from sink and transform nodes
|
|
166
|
+
outputs = [
|
|
167
|
+
self._node_to_dataset(node, include_schema)
|
|
168
|
+
for node in sink_nodes + transform_nodes
|
|
169
|
+
]
|
|
170
|
+
|
|
171
|
+
# Create job with facets
|
|
172
|
+
job = OpenLineageJob(
|
|
173
|
+
namespace=job_namespace,
|
|
174
|
+
name=job_name,
|
|
175
|
+
facets={
|
|
176
|
+
"truthound": {
|
|
177
|
+
"_producer": self.producer,
|
|
178
|
+
"_schemaURL": "https://truthound.io/spec/facets/1-0-0/TruthoundJobFacet.json",
|
|
179
|
+
"total_nodes": len(nodes),
|
|
180
|
+
"total_edges": len(edges),
|
|
181
|
+
"source_count": len(source_nodes),
|
|
182
|
+
"transform_count": len(transform_nodes),
|
|
183
|
+
"sink_count": len(sink_nodes),
|
|
184
|
+
}
|
|
185
|
+
},
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# Create run
|
|
189
|
+
run = OpenLineageRun(
|
|
190
|
+
run_id=run_id,
|
|
191
|
+
facets={
|
|
192
|
+
"processing_engine": {
|
|
193
|
+
"_producer": self.producer,
|
|
194
|
+
"_schemaURL": "https://openlineage.io/spec/facets/1-0-0/ProcessingEngineRunFacet.json",
|
|
195
|
+
"version": "1.0.0",
|
|
196
|
+
"name": "truthound-dashboard",
|
|
197
|
+
}
|
|
198
|
+
},
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# Create START event
|
|
202
|
+
start_event = OpenLineageEvent(
|
|
203
|
+
event_time=datetime.utcnow().isoformat() + "Z",
|
|
204
|
+
event_type=RunState.START,
|
|
205
|
+
producer=self.producer,
|
|
206
|
+
run=run,
|
|
207
|
+
job=job,
|
|
208
|
+
inputs=inputs,
|
|
209
|
+
outputs=[], # No outputs at start
|
|
210
|
+
)
|
|
211
|
+
events.append(start_event)
|
|
212
|
+
|
|
213
|
+
# Create COMPLETE event
|
|
214
|
+
complete_event = OpenLineageEvent(
|
|
215
|
+
event_time=datetime.utcnow().isoformat() + "Z",
|
|
216
|
+
event_type=RunState.COMPLETE,
|
|
217
|
+
producer=self.producer,
|
|
218
|
+
run=run,
|
|
219
|
+
job=job,
|
|
220
|
+
inputs=inputs,
|
|
221
|
+
outputs=outputs,
|
|
222
|
+
)
|
|
223
|
+
events.append(complete_event)
|
|
224
|
+
|
|
225
|
+
return events
|
|
226
|
+
|
|
227
|
+
def transform_per_node(
|
|
228
|
+
self,
|
|
229
|
+
nodes: list[dict[str, Any]],
|
|
230
|
+
edges: list[dict[str, Any]],
|
|
231
|
+
job_namespace: str,
|
|
232
|
+
include_schema: bool = True,
|
|
233
|
+
) -> list[OpenLineageEvent]:
|
|
234
|
+
"""Transform each node to a separate OpenLineage run.
|
|
235
|
+
|
|
236
|
+
This creates finer-grained events where each transformation
|
|
237
|
+
becomes its own job with explicit input/output relationships.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
nodes: List of lineage nodes.
|
|
241
|
+
edges: List of lineage edges.
|
|
242
|
+
job_namespace: Namespace for jobs.
|
|
243
|
+
include_schema: Whether to include schema facets.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
List of OpenLineage events (2 per transform node: START + COMPLETE).
|
|
247
|
+
"""
|
|
248
|
+
events: list[OpenLineageEvent] = []
|
|
249
|
+
|
|
250
|
+
# Build maps for lookups
|
|
251
|
+
node_map = {n["id"]: n for n in nodes}
|
|
252
|
+
incoming_edges = self._build_incoming_edge_map(edges)
|
|
253
|
+
outgoing_edges = self._build_outgoing_edge_map(edges)
|
|
254
|
+
|
|
255
|
+
# Process transform and sink nodes
|
|
256
|
+
for node in nodes:
|
|
257
|
+
if node.get("node_type") == "source":
|
|
258
|
+
continue # Sources don't have incoming edges
|
|
259
|
+
|
|
260
|
+
node_id = node["id"]
|
|
261
|
+
run_id = str(uuid4())
|
|
262
|
+
|
|
263
|
+
# Find input datasets (nodes pointing to this node)
|
|
264
|
+
input_node_ids = incoming_edges.get(node_id, [])
|
|
265
|
+
inputs = [
|
|
266
|
+
self._node_to_dataset(node_map[nid], include_schema)
|
|
267
|
+
for nid in input_node_ids
|
|
268
|
+
if nid in node_map
|
|
269
|
+
]
|
|
270
|
+
|
|
271
|
+
# This node is the output
|
|
272
|
+
outputs = [self._node_to_dataset(node, include_schema)]
|
|
273
|
+
|
|
274
|
+
job_name = f"process_{node.get('name', node_id)}"
|
|
275
|
+
job = OpenLineageJob(
|
|
276
|
+
namespace=job_namespace,
|
|
277
|
+
name=job_name,
|
|
278
|
+
facets={
|
|
279
|
+
"truthound": {
|
|
280
|
+
"_producer": self.producer,
|
|
281
|
+
"_schemaURL": "https://truthound.io/spec/facets/1-0-0/TruthoundJobFacet.json",
|
|
282
|
+
"node_id": node_id,
|
|
283
|
+
"node_type": node.get("node_type"),
|
|
284
|
+
}
|
|
285
|
+
},
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
run = OpenLineageRun(run_id=run_id)
|
|
289
|
+
|
|
290
|
+
# START event
|
|
291
|
+
events.append(
|
|
292
|
+
OpenLineageEvent(
|
|
293
|
+
event_time=datetime.utcnow().isoformat() + "Z",
|
|
294
|
+
event_type=RunState.START,
|
|
295
|
+
producer=self.producer,
|
|
296
|
+
run=run,
|
|
297
|
+
job=job,
|
|
298
|
+
inputs=inputs,
|
|
299
|
+
outputs=[],
|
|
300
|
+
)
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
# COMPLETE event
|
|
304
|
+
events.append(
|
|
305
|
+
OpenLineageEvent(
|
|
306
|
+
event_time=datetime.utcnow().isoformat() + "Z",
|
|
307
|
+
event_type=RunState.COMPLETE,
|
|
308
|
+
producer=self.producer,
|
|
309
|
+
run=run,
|
|
310
|
+
job=job,
|
|
311
|
+
inputs=inputs,
|
|
312
|
+
outputs=outputs,
|
|
313
|
+
)
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
return events
|
|
317
|
+
|
|
318
|
+
def _node_to_dataset(
|
|
319
|
+
self,
|
|
320
|
+
node: dict[str, Any],
|
|
321
|
+
include_schema: bool,
|
|
322
|
+
) -> OpenLineageDataset:
|
|
323
|
+
"""Convert a lineage node to an OpenLineage dataset.
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
node: Lineage node dictionary.
|
|
327
|
+
include_schema: Whether to include schema.
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
OpenLineageDataset instance.
|
|
331
|
+
"""
|
|
332
|
+
metadata = node.get("metadata") or {}
|
|
333
|
+
source_type = metadata.get("source_type", "unknown")
|
|
334
|
+
|
|
335
|
+
# Build namespace based on source type
|
|
336
|
+
namespace = build_dataset_namespace(source_type, metadata.get("config"))
|
|
337
|
+
|
|
338
|
+
facets: dict[str, Any] = {
|
|
339
|
+
"truthound": {
|
|
340
|
+
"_producer": self.producer,
|
|
341
|
+
"_schemaURL": "https://truthound.io/spec/facets/1-0-0/TruthoundDatasetFacet.json",
|
|
342
|
+
"node_id": node.get("id"),
|
|
343
|
+
"node_type": node.get("node_type"),
|
|
344
|
+
"source_id": node.get("source_id"),
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
# Add schema facet if available
|
|
349
|
+
if include_schema and metadata.get("schema_fields"):
|
|
350
|
+
facets["schema"] = {
|
|
351
|
+
"_producer": self.producer,
|
|
352
|
+
"_schemaURL": "https://openlineage.io/spec/facets/1-0-0/SchemaDatasetFacet.json",
|
|
353
|
+
"fields": [
|
|
354
|
+
{"name": f.get("name"), "type": f.get("type", "string")}
|
|
355
|
+
for f in metadata["schema_fields"]
|
|
356
|
+
],
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
return OpenLineageDataset(
|
|
360
|
+
namespace=namespace,
|
|
361
|
+
name=node.get("name", "unknown"),
|
|
362
|
+
facets=facets,
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
def _build_edge_map(
|
|
366
|
+
self,
|
|
367
|
+
edges: list[dict[str, Any]],
|
|
368
|
+
) -> dict[str, list[str]]:
|
|
369
|
+
"""Build map from source_node_id to target_node_ids."""
|
|
370
|
+
edge_map: dict[str, list[str]] = {}
|
|
371
|
+
for edge in edges:
|
|
372
|
+
source_id = edge.get("source_node_id")
|
|
373
|
+
target_id = edge.get("target_node_id")
|
|
374
|
+
if source_id and target_id:
|
|
375
|
+
if source_id not in edge_map:
|
|
376
|
+
edge_map[source_id] = []
|
|
377
|
+
edge_map[source_id].append(target_id)
|
|
378
|
+
return edge_map
|
|
379
|
+
|
|
380
|
+
def _build_incoming_edge_map(
|
|
381
|
+
self,
|
|
382
|
+
edges: list[dict[str, Any]],
|
|
383
|
+
) -> dict[str, list[str]]:
|
|
384
|
+
"""Build map from target_node_id to source_node_ids."""
|
|
385
|
+
edge_map: dict[str, list[str]] = {}
|
|
386
|
+
for edge in edges:
|
|
387
|
+
source_id = edge.get("source_node_id")
|
|
388
|
+
target_id = edge.get("target_node_id")
|
|
389
|
+
if source_id and target_id:
|
|
390
|
+
if target_id not in edge_map:
|
|
391
|
+
edge_map[target_id] = []
|
|
392
|
+
edge_map[target_id].append(source_id)
|
|
393
|
+
return edge_map
|
|
394
|
+
|
|
395
|
+
def _build_outgoing_edge_map(
|
|
396
|
+
self,
|
|
397
|
+
edges: list[dict[str, Any]],
|
|
398
|
+
) -> dict[str, list[str]]:
|
|
399
|
+
"""Build map from source_node_id to target_node_ids."""
|
|
400
|
+
return self._build_edge_map(edges)
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
# =============================================================================
|
|
404
|
+
# Transport Implementation
|
|
405
|
+
# =============================================================================
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
class HttpTransport:
|
|
409
|
+
"""HTTP transport for sending OpenLineage events."""
|
|
410
|
+
|
|
411
|
+
def __init__(self, client: httpx.AsyncClient | None = None):
|
|
412
|
+
"""Initialize transport.
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
client: Optional pre-configured httpx client.
|
|
416
|
+
"""
|
|
417
|
+
self._client = client
|
|
418
|
+
self._owns_client = client is None
|
|
419
|
+
|
|
420
|
+
async def send(
|
|
421
|
+
self,
|
|
422
|
+
events: list[OpenLineageEvent],
|
|
423
|
+
url: str,
|
|
424
|
+
api_key: str | None = None,
|
|
425
|
+
headers: dict[str, str] | None = None,
|
|
426
|
+
timeout: int = 30,
|
|
427
|
+
) -> tuple[int, int]:
|
|
428
|
+
"""Send events via HTTP POST.
|
|
429
|
+
|
|
430
|
+
Args:
|
|
431
|
+
events: List of events to send.
|
|
432
|
+
url: Target URL.
|
|
433
|
+
api_key: Optional API key.
|
|
434
|
+
headers: Additional headers.
|
|
435
|
+
timeout: Request timeout in seconds.
|
|
436
|
+
|
|
437
|
+
Returns:
|
|
438
|
+
Tuple of (sent_count, failed_count).
|
|
439
|
+
"""
|
|
440
|
+
if not events:
|
|
441
|
+
return 0, 0
|
|
442
|
+
|
|
443
|
+
client = self._client or httpx.AsyncClient()
|
|
444
|
+
sent_count = 0
|
|
445
|
+
failed_count = 0
|
|
446
|
+
|
|
447
|
+
try:
|
|
448
|
+
request_headers = {
|
|
449
|
+
"Content-Type": "application/json",
|
|
450
|
+
}
|
|
451
|
+
if api_key:
|
|
452
|
+
request_headers["Authorization"] = f"Bearer {api_key}"
|
|
453
|
+
if headers:
|
|
454
|
+
request_headers.update(headers)
|
|
455
|
+
|
|
456
|
+
for event in events:
|
|
457
|
+
try:
|
|
458
|
+
response = await client.post(
|
|
459
|
+
url,
|
|
460
|
+
json=event.model_dump(by_alias=True),
|
|
461
|
+
headers=request_headers,
|
|
462
|
+
timeout=timeout,
|
|
463
|
+
)
|
|
464
|
+
if response.status_code in (200, 201, 202):
|
|
465
|
+
sent_count += 1
|
|
466
|
+
else:
|
|
467
|
+
logger.warning(
|
|
468
|
+
f"Failed to send event: {response.status_code} {response.text}"
|
|
469
|
+
)
|
|
470
|
+
failed_count += 1
|
|
471
|
+
except Exception as e:
|
|
472
|
+
logger.error(f"Error sending event: {e}")
|
|
473
|
+
failed_count += 1
|
|
474
|
+
|
|
475
|
+
finally:
|
|
476
|
+
if self._owns_client and client:
|
|
477
|
+
await client.aclose()
|
|
478
|
+
|
|
479
|
+
return sent_count, failed_count
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
class BatchHttpTransport:
|
|
483
|
+
"""Batched HTTP transport for efficient bulk sending."""
|
|
484
|
+
|
|
485
|
+
def __init__(
|
|
486
|
+
self,
|
|
487
|
+
batch_size: int = 100,
|
|
488
|
+
client: httpx.AsyncClient | None = None,
|
|
489
|
+
):
|
|
490
|
+
"""Initialize transport.
|
|
491
|
+
|
|
492
|
+
Args:
|
|
493
|
+
batch_size: Number of events per batch.
|
|
494
|
+
client: Optional pre-configured httpx client.
|
|
495
|
+
"""
|
|
496
|
+
self.batch_size = batch_size
|
|
497
|
+
self._client = client
|
|
498
|
+
self._owns_client = client is None
|
|
499
|
+
|
|
500
|
+
async def send(
|
|
501
|
+
self,
|
|
502
|
+
events: list[OpenLineageEvent],
|
|
503
|
+
url: str,
|
|
504
|
+
api_key: str | None = None,
|
|
505
|
+
headers: dict[str, str] | None = None,
|
|
506
|
+
timeout: int = 30,
|
|
507
|
+
) -> tuple[int, int]:
|
|
508
|
+
"""Send events in batches.
|
|
509
|
+
|
|
510
|
+
Args:
|
|
511
|
+
events: List of events to send.
|
|
512
|
+
url: Target URL.
|
|
513
|
+
api_key: Optional API key.
|
|
514
|
+
headers: Additional headers.
|
|
515
|
+
timeout: Request timeout in seconds.
|
|
516
|
+
|
|
517
|
+
Returns:
|
|
518
|
+
Tuple of (sent_count, failed_count).
|
|
519
|
+
"""
|
|
520
|
+
if not events:
|
|
521
|
+
return 0, 0
|
|
522
|
+
|
|
523
|
+
client = self._client or httpx.AsyncClient()
|
|
524
|
+
sent_count = 0
|
|
525
|
+
failed_count = 0
|
|
526
|
+
|
|
527
|
+
try:
|
|
528
|
+
request_headers = {
|
|
529
|
+
"Content-Type": "application/json",
|
|
530
|
+
}
|
|
531
|
+
if api_key:
|
|
532
|
+
request_headers["Authorization"] = f"Bearer {api_key}"
|
|
533
|
+
if headers:
|
|
534
|
+
request_headers.update(headers)
|
|
535
|
+
|
|
536
|
+
# Process in batches
|
|
537
|
+
for i in range(0, len(events), self.batch_size):
|
|
538
|
+
batch = events[i : i + self.batch_size]
|
|
539
|
+
batch_payload = [e.model_dump(by_alias=True) for e in batch]
|
|
540
|
+
|
|
541
|
+
try:
|
|
542
|
+
response = await client.post(
|
|
543
|
+
url,
|
|
544
|
+
json=batch_payload,
|
|
545
|
+
headers=request_headers,
|
|
546
|
+
timeout=timeout,
|
|
547
|
+
)
|
|
548
|
+
if response.status_code in (200, 201, 202):
|
|
549
|
+
sent_count += len(batch)
|
|
550
|
+
else:
|
|
551
|
+
logger.warning(
|
|
552
|
+
f"Failed to send batch: {response.status_code} {response.text}"
|
|
553
|
+
)
|
|
554
|
+
failed_count += len(batch)
|
|
555
|
+
except Exception as e:
|
|
556
|
+
logger.error(f"Error sending batch: {e}")
|
|
557
|
+
failed_count += len(batch)
|
|
558
|
+
|
|
559
|
+
finally:
|
|
560
|
+
if self._owns_client and client:
|
|
561
|
+
await client.aclose()
|
|
562
|
+
|
|
563
|
+
return sent_count, failed_count
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
# =============================================================================
|
|
567
|
+
# OpenLineage Emitter Service
|
|
568
|
+
# =============================================================================
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
class OpenLineageEmitterService:
|
|
572
|
+
"""Service for emitting OpenLineage events from dashboard lineage.
|
|
573
|
+
|
|
574
|
+
This service provides:
|
|
575
|
+
- Export lineage graph as OpenLineage events (JSON)
|
|
576
|
+
- Emit events to external OpenLineage consumers (Marquez, DataHub, etc.)
|
|
577
|
+
- Support for granular (per-node) or aggregated (full-graph) export
|
|
578
|
+
"""
|
|
579
|
+
|
|
580
|
+
def __init__(
|
|
581
|
+
self,
|
|
582
|
+
session: AsyncSession,
|
|
583
|
+
transformer: LineageToOpenLineageTransformer | None = None,
|
|
584
|
+
transport: HttpTransport | BatchHttpTransport | None = None,
|
|
585
|
+
):
|
|
586
|
+
"""Initialize service.
|
|
587
|
+
|
|
588
|
+
Args:
|
|
589
|
+
session: Database session.
|
|
590
|
+
transformer: Optional custom transformer.
|
|
591
|
+
transport: Optional custom transport.
|
|
592
|
+
"""
|
|
593
|
+
self.session = session
|
|
594
|
+
self.lineage_service = LineageService(session)
|
|
595
|
+
self.transformer = transformer or LineageToOpenLineageTransformer()
|
|
596
|
+
self.transport = transport or HttpTransport()
|
|
597
|
+
|
|
598
|
+
async def export_as_openlineage(
|
|
599
|
+
self,
|
|
600
|
+
job_namespace: str = "truthound-dashboard",
|
|
601
|
+
job_name: str = "lineage_export",
|
|
602
|
+
source_id: str | None = None,
|
|
603
|
+
include_schema: bool = True,
|
|
604
|
+
granular: bool = False,
|
|
605
|
+
) -> dict[str, Any]:
|
|
606
|
+
"""Export lineage graph as OpenLineage events.
|
|
607
|
+
|
|
608
|
+
Args:
|
|
609
|
+
job_namespace: Namespace for the job.
|
|
610
|
+
job_name: Name of the job.
|
|
611
|
+
source_id: Optional source ID to filter.
|
|
612
|
+
include_schema: Include schema in facets.
|
|
613
|
+
granular: If True, create events per node.
|
|
614
|
+
|
|
615
|
+
Returns:
|
|
616
|
+
Dictionary with events and metadata.
|
|
617
|
+
"""
|
|
618
|
+
# Get lineage graph
|
|
619
|
+
graph = await self.lineage_service.get_graph(source_id=source_id)
|
|
620
|
+
nodes = graph.get("nodes", [])
|
|
621
|
+
edges = graph.get("edges", [])
|
|
622
|
+
|
|
623
|
+
# Transform to OpenLineage events
|
|
624
|
+
if granular:
|
|
625
|
+
events = self.transformer.transform_per_node(
|
|
626
|
+
nodes=nodes,
|
|
627
|
+
edges=edges,
|
|
628
|
+
job_namespace=job_namespace,
|
|
629
|
+
include_schema=include_schema,
|
|
630
|
+
)
|
|
631
|
+
else:
|
|
632
|
+
events = self.transformer.transform(
|
|
633
|
+
nodes=nodes,
|
|
634
|
+
edges=edges,
|
|
635
|
+
job_namespace=job_namespace,
|
|
636
|
+
job_name=job_name,
|
|
637
|
+
include_schema=include_schema,
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
# Count unique datasets
|
|
641
|
+
dataset_names = set()
|
|
642
|
+
for event in events:
|
|
643
|
+
for ds in event.inputs:
|
|
644
|
+
dataset_names.add(f"{ds.namespace}:{ds.name}")
|
|
645
|
+
for ds in event.outputs:
|
|
646
|
+
dataset_names.add(f"{ds.namespace}:{ds.name}")
|
|
647
|
+
|
|
648
|
+
# Count unique jobs
|
|
649
|
+
job_names = set()
|
|
650
|
+
for event in events:
|
|
651
|
+
job_names.add(f"{event.job.namespace}:{event.job.name}")
|
|
652
|
+
|
|
653
|
+
return {
|
|
654
|
+
"events": [e.model_dump(by_alias=True) for e in events],
|
|
655
|
+
"total_events": len(events),
|
|
656
|
+
"total_datasets": len(dataset_names),
|
|
657
|
+
"total_jobs": len(job_names),
|
|
658
|
+
"export_time": datetime.utcnow().isoformat() + "Z",
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
async def emit_to_endpoint(
|
|
662
|
+
self,
|
|
663
|
+
url: str,
|
|
664
|
+
api_key: str | None = None,
|
|
665
|
+
headers: dict[str, str] | None = None,
|
|
666
|
+
job_namespace: str = "truthound-dashboard",
|
|
667
|
+
job_name: str = "lineage_export",
|
|
668
|
+
source_id: str | None = None,
|
|
669
|
+
timeout: int = 30,
|
|
670
|
+
) -> dict[str, Any]:
|
|
671
|
+
"""Emit OpenLineage events to an external endpoint.
|
|
672
|
+
|
|
673
|
+
Args:
|
|
674
|
+
url: Target URL (e.g., Marquez API).
|
|
675
|
+
api_key: Optional API key.
|
|
676
|
+
headers: Additional headers.
|
|
677
|
+
job_namespace: Namespace for the job.
|
|
678
|
+
job_name: Name of the job.
|
|
679
|
+
source_id: Optional source ID to filter.
|
|
680
|
+
timeout: Request timeout.
|
|
681
|
+
|
|
682
|
+
Returns:
|
|
683
|
+
Result dictionary with success status.
|
|
684
|
+
"""
|
|
685
|
+
try:
|
|
686
|
+
# Export events
|
|
687
|
+
export_result = await self.export_as_openlineage(
|
|
688
|
+
job_namespace=job_namespace,
|
|
689
|
+
job_name=job_name,
|
|
690
|
+
source_id=source_id,
|
|
691
|
+
include_schema=True,
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
# Convert back to OpenLineageEvent objects
|
|
695
|
+
events = [
|
|
696
|
+
OpenLineageEvent(**e)
|
|
697
|
+
for e in export_result["events"]
|
|
698
|
+
]
|
|
699
|
+
|
|
700
|
+
# Send via transport
|
|
701
|
+
sent_count, failed_count = await self.transport.send(
|
|
702
|
+
events=events,
|
|
703
|
+
url=url,
|
|
704
|
+
api_key=api_key,
|
|
705
|
+
headers=headers,
|
|
706
|
+
timeout=timeout,
|
|
707
|
+
)
|
|
708
|
+
|
|
709
|
+
return {
|
|
710
|
+
"success": failed_count == 0,
|
|
711
|
+
"events_sent": sent_count,
|
|
712
|
+
"failed_events": failed_count,
|
|
713
|
+
"error_message": None if failed_count == 0 else f"{failed_count} events failed to send",
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
except Exception as e:
|
|
717
|
+
logger.exception("Failed to emit OpenLineage events")
|
|
718
|
+
return {
|
|
719
|
+
"success": False,
|
|
720
|
+
"events_sent": 0,
|
|
721
|
+
"failed_events": 0,
|
|
722
|
+
"error_message": str(e),
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
def export_as_ndjson(
|
|
726
|
+
self,
|
|
727
|
+
events: list[dict[str, Any]],
|
|
728
|
+
) -> str:
|
|
729
|
+
"""Export events as newline-delimited JSON.
|
|
730
|
+
|
|
731
|
+
Args:
|
|
732
|
+
events: List of event dictionaries.
|
|
733
|
+
|
|
734
|
+
Returns:
|
|
735
|
+
NDJSON string.
|
|
736
|
+
"""
|
|
737
|
+
return "\n".join(json.dumps(e) for e in events)
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
# =============================================================================
|
|
741
|
+
# Webhook Management Service
|
|
742
|
+
# =============================================================================
|
|
743
|
+
|
|
744
|
+
|
|
745
|
+
class OpenLineageWebhookService:
|
|
746
|
+
"""Service for managing OpenLineage webhook configurations.
|
|
747
|
+
|
|
748
|
+
Provides CRUD operations for webhook configurations and
|
|
749
|
+
testing webhook connectivity.
|
|
750
|
+
"""
|
|
751
|
+
|
|
752
|
+
def __init__(self, session: AsyncSession):
|
|
753
|
+
"""Initialize service.
|
|
754
|
+
|
|
755
|
+
Args:
|
|
756
|
+
session: Database session.
|
|
757
|
+
"""
|
|
758
|
+
self.session = session
|
|
759
|
+
|
|
760
|
+
async def list_webhooks(self, active_only: bool = False) -> list[OpenLineageWebhook]:
|
|
761
|
+
"""List all configured webhooks.
|
|
762
|
+
|
|
763
|
+
Args:
|
|
764
|
+
active_only: If True, only return active webhooks.
|
|
765
|
+
|
|
766
|
+
Returns:
|
|
767
|
+
List of webhook configurations.
|
|
768
|
+
"""
|
|
769
|
+
query = select(OpenLineageWebhook).order_by(OpenLineageWebhook.created_at.desc())
|
|
770
|
+
if active_only:
|
|
771
|
+
query = query.where(OpenLineageWebhook.is_active == True)
|
|
772
|
+
|
|
773
|
+
result = await self.session.execute(query)
|
|
774
|
+
return list(result.scalars().all())
|
|
775
|
+
|
|
776
|
+
async def get_webhook(self, webhook_id: str) -> OpenLineageWebhook | None:
|
|
777
|
+
"""Get a specific webhook by ID.
|
|
778
|
+
|
|
779
|
+
Args:
|
|
780
|
+
webhook_id: Webhook unique identifier.
|
|
781
|
+
|
|
782
|
+
Returns:
|
|
783
|
+
Webhook if found, None otherwise.
|
|
784
|
+
"""
|
|
785
|
+
result = await self.session.execute(
|
|
786
|
+
select(OpenLineageWebhook).where(OpenLineageWebhook.id == webhook_id)
|
|
787
|
+
)
|
|
788
|
+
return result.scalar_one_or_none()
|
|
789
|
+
|
|
790
|
+
async def create_webhook(
|
|
791
|
+
self,
|
|
792
|
+
name: str,
|
|
793
|
+
url: str,
|
|
794
|
+
is_active: bool = True,
|
|
795
|
+
headers: dict[str, str] | None = None,
|
|
796
|
+
api_key: str | None = None,
|
|
797
|
+
event_types: str = "all",
|
|
798
|
+
batch_size: int = 100,
|
|
799
|
+
timeout_seconds: int = 30,
|
|
800
|
+
) -> OpenLineageWebhook:
|
|
801
|
+
"""Create a new webhook configuration.
|
|
802
|
+
|
|
803
|
+
Args:
|
|
804
|
+
name: Human-readable name.
|
|
805
|
+
url: Target URL.
|
|
806
|
+
is_active: Whether the webhook is enabled.
|
|
807
|
+
headers: Custom headers.
|
|
808
|
+
api_key: API key for authentication.
|
|
809
|
+
event_types: Types of events to emit.
|
|
810
|
+
batch_size: Events per batch.
|
|
811
|
+
timeout_seconds: Request timeout.
|
|
812
|
+
|
|
813
|
+
Returns:
|
|
814
|
+
Created webhook.
|
|
815
|
+
"""
|
|
816
|
+
webhook = OpenLineageWebhook(
|
|
817
|
+
name=name,
|
|
818
|
+
url=url,
|
|
819
|
+
is_active=is_active,
|
|
820
|
+
headers_json=headers or {},
|
|
821
|
+
api_key=api_key,
|
|
822
|
+
event_types=event_types,
|
|
823
|
+
batch_size=batch_size,
|
|
824
|
+
timeout_seconds=timeout_seconds,
|
|
825
|
+
)
|
|
826
|
+
self.session.add(webhook)
|
|
827
|
+
await self.session.commit()
|
|
828
|
+
await self.session.refresh(webhook)
|
|
829
|
+
return webhook
|
|
830
|
+
|
|
831
|
+
async def update_webhook(
|
|
832
|
+
self,
|
|
833
|
+
webhook_id: str,
|
|
834
|
+
name: str | None = None,
|
|
835
|
+
url: str | None = None,
|
|
836
|
+
is_active: bool | None = None,
|
|
837
|
+
headers: dict[str, str] | None = None,
|
|
838
|
+
api_key: str | None = None,
|
|
839
|
+
event_types: str | None = None,
|
|
840
|
+
batch_size: int | None = None,
|
|
841
|
+
timeout_seconds: int | None = None,
|
|
842
|
+
) -> OpenLineageWebhook | None:
|
|
843
|
+
"""Update an existing webhook.
|
|
844
|
+
|
|
845
|
+
Args:
|
|
846
|
+
webhook_id: Webhook to update.
|
|
847
|
+
name: New name (if provided).
|
|
848
|
+
url: New URL (if provided).
|
|
849
|
+
is_active: New active status (if provided).
|
|
850
|
+
headers: New headers (if provided).
|
|
851
|
+
api_key: New API key (if provided).
|
|
852
|
+
event_types: New event types (if provided).
|
|
853
|
+
batch_size: New batch size (if provided).
|
|
854
|
+
timeout_seconds: New timeout (if provided).
|
|
855
|
+
|
|
856
|
+
Returns:
|
|
857
|
+
Updated webhook or None if not found.
|
|
858
|
+
"""
|
|
859
|
+
webhook = await self.get_webhook(webhook_id)
|
|
860
|
+
if not webhook:
|
|
861
|
+
return None
|
|
862
|
+
|
|
863
|
+
if name is not None:
|
|
864
|
+
webhook.name = name
|
|
865
|
+
if url is not None:
|
|
866
|
+
webhook.url = url
|
|
867
|
+
if is_active is not None:
|
|
868
|
+
webhook.is_active = is_active
|
|
869
|
+
if headers is not None:
|
|
870
|
+
webhook.headers_json = headers
|
|
871
|
+
if api_key is not None:
|
|
872
|
+
webhook.api_key = api_key
|
|
873
|
+
if event_types is not None:
|
|
874
|
+
webhook.event_types = event_types
|
|
875
|
+
if batch_size is not None:
|
|
876
|
+
webhook.batch_size = batch_size
|
|
877
|
+
if timeout_seconds is not None:
|
|
878
|
+
webhook.timeout_seconds = timeout_seconds
|
|
879
|
+
|
|
880
|
+
await self.session.commit()
|
|
881
|
+
await self.session.refresh(webhook)
|
|
882
|
+
return webhook
|
|
883
|
+
|
|
884
|
+
async def delete_webhook(self, webhook_id: str) -> bool:
|
|
885
|
+
"""Delete a webhook configuration.
|
|
886
|
+
|
|
887
|
+
Args:
|
|
888
|
+
webhook_id: Webhook to delete.
|
|
889
|
+
|
|
890
|
+
Returns:
|
|
891
|
+
True if deleted, False if not found.
|
|
892
|
+
"""
|
|
893
|
+
webhook = await self.get_webhook(webhook_id)
|
|
894
|
+
if not webhook:
|
|
895
|
+
return False
|
|
896
|
+
|
|
897
|
+
await self.session.delete(webhook)
|
|
898
|
+
await self.session.commit()
|
|
899
|
+
return True
|
|
900
|
+
|
|
901
|
+
async def test_webhook(
|
|
902
|
+
self,
|
|
903
|
+
url: str,
|
|
904
|
+
headers: dict[str, str] | None = None,
|
|
905
|
+
api_key: str | None = None,
|
|
906
|
+
timeout_seconds: int = 10,
|
|
907
|
+
) -> dict[str, Any]:
|
|
908
|
+
"""Test webhook connectivity.
|
|
909
|
+
|
|
910
|
+
Sends a test OpenLineage event to verify the endpoint is reachable
|
|
911
|
+
and accepts events.
|
|
912
|
+
|
|
913
|
+
Args:
|
|
914
|
+
url: URL to test.
|
|
915
|
+
headers: Custom headers.
|
|
916
|
+
api_key: API key for authentication.
|
|
917
|
+
timeout_seconds: Request timeout.
|
|
918
|
+
|
|
919
|
+
Returns:
|
|
920
|
+
Test result with success status and details.
|
|
921
|
+
"""
|
|
922
|
+
# Create a minimal test event
|
|
923
|
+
test_event = {
|
|
924
|
+
"eventTime": datetime.utcnow().isoformat() + "Z",
|
|
925
|
+
"eventType": "START",
|
|
926
|
+
"producer": "https://github.com/truthound/truthound-dashboard",
|
|
927
|
+
"schemaURL": "https://openlineage.io/spec/1-0-5/OpenLineage.json#/definitions/RunEvent",
|
|
928
|
+
"run": {"runId": str(uuid4()), "facets": {}},
|
|
929
|
+
"job": {
|
|
930
|
+
"namespace": "truthound-dashboard",
|
|
931
|
+
"name": "webhook_test",
|
|
932
|
+
"facets": {},
|
|
933
|
+
},
|
|
934
|
+
"inputs": [],
|
|
935
|
+
"outputs": [],
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
request_headers = {
|
|
939
|
+
"Content-Type": "application/json",
|
|
940
|
+
"X-OpenLineage-Test": "true",
|
|
941
|
+
}
|
|
942
|
+
if api_key:
|
|
943
|
+
request_headers["Authorization"] = f"Bearer {api_key}"
|
|
944
|
+
if headers:
|
|
945
|
+
request_headers.update(headers)
|
|
946
|
+
|
|
947
|
+
start_time = time.time()
|
|
948
|
+
|
|
949
|
+
try:
|
|
950
|
+
async with httpx.AsyncClient() as client:
|
|
951
|
+
response = await client.post(
|
|
952
|
+
url,
|
|
953
|
+
json=test_event,
|
|
954
|
+
headers=request_headers,
|
|
955
|
+
timeout=timeout_seconds,
|
|
956
|
+
)
|
|
957
|
+
|
|
958
|
+
elapsed_ms = int((time.time() - start_time) * 1000)
|
|
959
|
+
|
|
960
|
+
# Truncate response body
|
|
961
|
+
response_body = response.text[:500] if response.text else None
|
|
962
|
+
|
|
963
|
+
if response.status_code in (200, 201, 202, 204):
|
|
964
|
+
return {
|
|
965
|
+
"success": True,
|
|
966
|
+
"status_code": response.status_code,
|
|
967
|
+
"response_time_ms": elapsed_ms,
|
|
968
|
+
"error_message": None,
|
|
969
|
+
"response_body": response_body,
|
|
970
|
+
}
|
|
971
|
+
else:
|
|
972
|
+
return {
|
|
973
|
+
"success": False,
|
|
974
|
+
"status_code": response.status_code,
|
|
975
|
+
"response_time_ms": elapsed_ms,
|
|
976
|
+
"error_message": f"HTTP {response.status_code}: {response.reason_phrase}",
|
|
977
|
+
"response_body": response_body,
|
|
978
|
+
}
|
|
979
|
+
|
|
980
|
+
except httpx.TimeoutException:
|
|
981
|
+
elapsed_ms = int((time.time() - start_time) * 1000)
|
|
982
|
+
return {
|
|
983
|
+
"success": False,
|
|
984
|
+
"status_code": None,
|
|
985
|
+
"response_time_ms": elapsed_ms,
|
|
986
|
+
"error_message": f"Request timed out after {timeout_seconds} seconds",
|
|
987
|
+
"response_body": None,
|
|
988
|
+
}
|
|
989
|
+
except httpx.ConnectError as e:
|
|
990
|
+
elapsed_ms = int((time.time() - start_time) * 1000)
|
|
991
|
+
return {
|
|
992
|
+
"success": False,
|
|
993
|
+
"status_code": None,
|
|
994
|
+
"response_time_ms": elapsed_ms,
|
|
995
|
+
"error_message": f"Connection failed: {str(e)}",
|
|
996
|
+
"response_body": None,
|
|
997
|
+
}
|
|
998
|
+
except Exception as e:
|
|
999
|
+
elapsed_ms = int((time.time() - start_time) * 1000)
|
|
1000
|
+
logger.exception("Webhook test failed")
|
|
1001
|
+
return {
|
|
1002
|
+
"success": False,
|
|
1003
|
+
"status_code": None,
|
|
1004
|
+
"response_time_ms": elapsed_ms,
|
|
1005
|
+
"error_message": str(e),
|
|
1006
|
+
"response_body": None,
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
async def record_emission(
|
|
1010
|
+
self,
|
|
1011
|
+
webhook_id: str,
|
|
1012
|
+
success: bool,
|
|
1013
|
+
error: str | None = None,
|
|
1014
|
+
) -> None:
|
|
1015
|
+
"""Record an emission attempt for a webhook.
|
|
1016
|
+
|
|
1017
|
+
Args:
|
|
1018
|
+
webhook_id: Webhook ID.
|
|
1019
|
+
success: Whether the emission was successful.
|
|
1020
|
+
error: Error message if failed.
|
|
1021
|
+
"""
|
|
1022
|
+
webhook = await self.get_webhook(webhook_id)
|
|
1023
|
+
if webhook:
|
|
1024
|
+
if success:
|
|
1025
|
+
webhook.record_success()
|
|
1026
|
+
else:
|
|
1027
|
+
webhook.record_failure(error or "Unknown error")
|
|
1028
|
+
await self.session.commit()
|