truthound-dashboard 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +258 -0
- truthound_dashboard/api/anomaly.py +1302 -0
- truthound_dashboard/api/cross_alerts.py +352 -0
- truthound_dashboard/api/deps.py +143 -0
- truthound_dashboard/api/drift_monitor.py +540 -0
- truthound_dashboard/api/lineage.py +1151 -0
- truthound_dashboard/api/maintenance.py +363 -0
- truthound_dashboard/api/middleware.py +373 -1
- truthound_dashboard/api/model_monitoring.py +805 -0
- truthound_dashboard/api/notifications_advanced.py +2452 -0
- truthound_dashboard/api/plugins.py +2096 -0
- truthound_dashboard/api/profile.py +211 -14
- truthound_dashboard/api/reports.py +853 -0
- truthound_dashboard/api/router.py +147 -0
- truthound_dashboard/api/rule_suggestions.py +310 -0
- truthound_dashboard/api/schema_evolution.py +231 -0
- truthound_dashboard/api/sources.py +47 -3
- truthound_dashboard/api/triggers.py +190 -0
- truthound_dashboard/api/validations.py +13 -0
- truthound_dashboard/api/validators.py +333 -4
- truthound_dashboard/api/versioning.py +309 -0
- truthound_dashboard/api/websocket.py +301 -0
- truthound_dashboard/core/__init__.py +27 -0
- truthound_dashboard/core/anomaly.py +1395 -0
- truthound_dashboard/core/anomaly_explainer.py +633 -0
- truthound_dashboard/core/cache.py +206 -0
- truthound_dashboard/core/cached_services.py +422 -0
- truthound_dashboard/core/charts.py +352 -0
- truthound_dashboard/core/connections.py +1069 -42
- truthound_dashboard/core/cross_alerts.py +837 -0
- truthound_dashboard/core/drift_monitor.py +1477 -0
- truthound_dashboard/core/drift_sampling.py +669 -0
- truthound_dashboard/core/i18n/__init__.py +42 -0
- truthound_dashboard/core/i18n/detector.py +173 -0
- truthound_dashboard/core/i18n/messages.py +564 -0
- truthound_dashboard/core/lineage.py +971 -0
- truthound_dashboard/core/maintenance.py +443 -5
- truthound_dashboard/core/model_monitoring.py +1043 -0
- truthound_dashboard/core/notifications/channels.py +1020 -1
- truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
- truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
- truthound_dashboard/core/notifications/deduplication/service.py +400 -0
- truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
- truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
- truthound_dashboard/core/notifications/dispatcher.py +43 -0
- truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
- truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
- truthound_dashboard/core/notifications/escalation/engine.py +429 -0
- truthound_dashboard/core/notifications/escalation/models.py +336 -0
- truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
- truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
- truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
- truthound_dashboard/core/notifications/events.py +49 -0
- truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
- truthound_dashboard/core/notifications/metrics/base.py +528 -0
- truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
- truthound_dashboard/core/notifications/routing/__init__.py +169 -0
- truthound_dashboard/core/notifications/routing/combinators.py +184 -0
- truthound_dashboard/core/notifications/routing/config.py +375 -0
- truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
- truthound_dashboard/core/notifications/routing/engine.py +382 -0
- truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
- truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
- truthound_dashboard/core/notifications/routing/rules.py +625 -0
- truthound_dashboard/core/notifications/routing/validator.py +678 -0
- truthound_dashboard/core/notifications/service.py +2 -0
- truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
- truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
- truthound_dashboard/core/notifications/throttling/builder.py +311 -0
- truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
- truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
- truthound_dashboard/core/openlineage.py +1028 -0
- truthound_dashboard/core/plugins/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/__init__.py +39 -0
- truthound_dashboard/core/plugins/docs/extractor.py +703 -0
- truthound_dashboard/core/plugins/docs/renderers.py +804 -0
- truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
- truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
- truthound_dashboard/core/plugins/hooks/manager.py +403 -0
- truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
- truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
- truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
- truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
- truthound_dashboard/core/plugins/loader.py +504 -0
- truthound_dashboard/core/plugins/registry.py +810 -0
- truthound_dashboard/core/plugins/reporter_executor.py +588 -0
- truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
- truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
- truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
- truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
- truthound_dashboard/core/plugins/sandbox.py +617 -0
- truthound_dashboard/core/plugins/security/__init__.py +68 -0
- truthound_dashboard/core/plugins/security/analyzer.py +535 -0
- truthound_dashboard/core/plugins/security/policies.py +311 -0
- truthound_dashboard/core/plugins/security/protocols.py +296 -0
- truthound_dashboard/core/plugins/security/signing.py +842 -0
- truthound_dashboard/core/plugins/security.py +446 -0
- truthound_dashboard/core/plugins/validator_executor.py +401 -0
- truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
- truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
- truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
- truthound_dashboard/core/plugins/versioning/semver.py +266 -0
- truthound_dashboard/core/profile_comparison.py +601 -0
- truthound_dashboard/core/report_history.py +570 -0
- truthound_dashboard/core/reporters/__init__.py +57 -0
- truthound_dashboard/core/reporters/base.py +296 -0
- truthound_dashboard/core/reporters/csv_reporter.py +155 -0
- truthound_dashboard/core/reporters/html_reporter.py +598 -0
- truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
- truthound_dashboard/core/reporters/i18n/base.py +494 -0
- truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
- truthound_dashboard/core/reporters/json_reporter.py +160 -0
- truthound_dashboard/core/reporters/junit_reporter.py +233 -0
- truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
- truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
- truthound_dashboard/core/reporters/registry.py +272 -0
- truthound_dashboard/core/rule_generator.py +2088 -0
- truthound_dashboard/core/scheduler.py +822 -12
- truthound_dashboard/core/schema_evolution.py +858 -0
- truthound_dashboard/core/services.py +152 -9
- truthound_dashboard/core/statistics.py +718 -0
- truthound_dashboard/core/streaming_anomaly.py +883 -0
- truthound_dashboard/core/triggers/__init__.py +45 -0
- truthound_dashboard/core/triggers/base.py +226 -0
- truthound_dashboard/core/triggers/evaluators.py +609 -0
- truthound_dashboard/core/triggers/factory.py +363 -0
- truthound_dashboard/core/unified_alerts.py +870 -0
- truthound_dashboard/core/validation_limits.py +509 -0
- truthound_dashboard/core/versioning.py +709 -0
- truthound_dashboard/core/websocket/__init__.py +59 -0
- truthound_dashboard/core/websocket/manager.py +512 -0
- truthound_dashboard/core/websocket/messages.py +130 -0
- truthound_dashboard/db/__init__.py +30 -0
- truthound_dashboard/db/models.py +3375 -3
- truthound_dashboard/main.py +22 -0
- truthound_dashboard/schemas/__init__.py +396 -1
- truthound_dashboard/schemas/anomaly.py +1258 -0
- truthound_dashboard/schemas/base.py +4 -0
- truthound_dashboard/schemas/cross_alerts.py +334 -0
- truthound_dashboard/schemas/drift_monitor.py +890 -0
- truthound_dashboard/schemas/lineage.py +428 -0
- truthound_dashboard/schemas/maintenance.py +154 -0
- truthound_dashboard/schemas/model_monitoring.py +374 -0
- truthound_dashboard/schemas/notifications_advanced.py +1363 -0
- truthound_dashboard/schemas/openlineage.py +704 -0
- truthound_dashboard/schemas/plugins.py +1293 -0
- truthound_dashboard/schemas/profile.py +420 -34
- truthound_dashboard/schemas/profile_comparison.py +242 -0
- truthound_dashboard/schemas/reports.py +285 -0
- truthound_dashboard/schemas/rule_suggestion.py +434 -0
- truthound_dashboard/schemas/schema_evolution.py +164 -0
- truthound_dashboard/schemas/source.py +117 -2
- truthound_dashboard/schemas/triggers.py +511 -0
- truthound_dashboard/schemas/unified_alerts.py +223 -0
- truthound_dashboard/schemas/validation.py +25 -1
- truthound_dashboard/schemas/validators/__init__.py +11 -0
- truthound_dashboard/schemas/validators/base.py +151 -0
- truthound_dashboard/schemas/versioning.py +152 -0
- truthound_dashboard/static/index.html +2 -2
- {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/METADATA +142 -18
- truthound_dashboard-1.4.0.dist-info/RECORD +239 -0
- truthound_dashboard/static/assets/index-BCA8H1hO.js +0 -574
- truthound_dashboard/static/assets/index-BNsSQ2fN.css +0 -1
- truthound_dashboard/static/assets/unmerged_dictionaries-CsJWCRx9.js +0 -1
- truthound_dashboard-1.3.0.dist-info/RECORD +0 -110
- {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.3.0.dist-info → truthound_dashboard-1.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,704 @@
|
|
|
1
|
+
"""OpenLineage schema definitions.
|
|
2
|
+
|
|
3
|
+
This module implements the OpenLineage specification for data lineage interoperability.
|
|
4
|
+
See: https://openlineage.io/spec/
|
|
5
|
+
|
|
6
|
+
The OpenLineage spec defines a standard for lineage metadata, enabling interoperability
|
|
7
|
+
between data tools like Airflow, Spark, dbt, and Truthound.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from enum import Enum
|
|
14
|
+
from typing import Any, Literal
|
|
15
|
+
from uuid import UUID, uuid4
|
|
16
|
+
|
|
17
|
+
from pydantic import Field, field_validator
|
|
18
|
+
|
|
19
|
+
from .base import BaseSchema
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# =============================================================================
|
|
23
|
+
# OpenLineage Enums
|
|
24
|
+
# =============================================================================
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class RunState(str, Enum):
|
|
28
|
+
"""OpenLineage run state.
|
|
29
|
+
|
|
30
|
+
Defines the lifecycle of a job run.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
START = "START"
|
|
34
|
+
RUNNING = "RUNNING"
|
|
35
|
+
COMPLETE = "COMPLETE"
|
|
36
|
+
FAIL = "FAIL"
|
|
37
|
+
ABORT = "ABORT"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class DatasetType(str, Enum):
|
|
41
|
+
"""Dataset types for categorization."""
|
|
42
|
+
|
|
43
|
+
TABLE = "table"
|
|
44
|
+
FILE = "file"
|
|
45
|
+
STREAM = "stream"
|
|
46
|
+
VIEW = "view"
|
|
47
|
+
QUERY = "query"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# =============================================================================
|
|
51
|
+
# OpenLineage Facets
|
|
52
|
+
# =============================================================================
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class BaseFacet(BaseSchema):
|
|
56
|
+
"""Base facet with producer information.
|
|
57
|
+
|
|
58
|
+
All facets include metadata about what produced them.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
model_config = {"populate_by_name": True}
|
|
62
|
+
|
|
63
|
+
producer: str = Field(
|
|
64
|
+
default="truthound-dashboard",
|
|
65
|
+
serialization_alias="_producer",
|
|
66
|
+
validation_alias="_producer",
|
|
67
|
+
description="URI identifying the producer of this metadata",
|
|
68
|
+
)
|
|
69
|
+
schemaURL: str = Field(
|
|
70
|
+
default="https://openlineage.io/spec/facets/1-0-0/",
|
|
71
|
+
serialization_alias="_schemaURL",
|
|
72
|
+
validation_alias="_schemaURL",
|
|
73
|
+
description="URL to the JSON schema for this facet",
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class SchemaField(BaseSchema):
|
|
78
|
+
"""Schema field definition for dataset schema facet."""
|
|
79
|
+
|
|
80
|
+
name: str = Field(..., description="Field name")
|
|
81
|
+
type: str = Field(default="string", description="Field data type")
|
|
82
|
+
description: str | None = Field(default=None, description="Field description")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class SchemaDatasetFacet(BaseFacet):
|
|
86
|
+
"""Dataset schema facet.
|
|
87
|
+
|
|
88
|
+
Describes the schema of a dataset including column definitions.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
fields: list[SchemaField] = Field(
|
|
92
|
+
default_factory=list,
|
|
93
|
+
description="List of schema fields",
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class DataQualityMetricsInputDatasetFacet(BaseFacet):
|
|
98
|
+
"""Data quality metrics facet for input datasets."""
|
|
99
|
+
|
|
100
|
+
row_count: int | None = Field(default=None, description="Total row count")
|
|
101
|
+
bytes: int | None = Field(default=None, description="Total bytes")
|
|
102
|
+
column_metrics: dict[str, dict[str, Any]] | None = Field(
|
|
103
|
+
default=None,
|
|
104
|
+
description="Per-column metrics (null_count, distinct_count, etc.)",
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class DataQualityAssertionsDatasetFacet(BaseFacet):
|
|
109
|
+
"""Data quality assertions facet.
|
|
110
|
+
|
|
111
|
+
Records validation results from data quality checks.
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
assertions: list[dict[str, Any]] = Field(
|
|
115
|
+
default_factory=list,
|
|
116
|
+
description="List of quality assertions",
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class ColumnLineageDatasetFacet(BaseFacet):
|
|
121
|
+
"""Column-level lineage facet.
|
|
122
|
+
|
|
123
|
+
Tracks how individual columns are derived from source columns.
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
fields: dict[str, dict[str, Any]] = Field(
|
|
127
|
+
default_factory=dict,
|
|
128
|
+
description="Column-level lineage mapping",
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class DocumentationDatasetFacet(BaseFacet):
|
|
133
|
+
"""Documentation facet for datasets."""
|
|
134
|
+
|
|
135
|
+
description: str = Field(..., description="Dataset description")
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class OwnershipDatasetFacet(BaseFacet):
|
|
139
|
+
"""Ownership information facet."""
|
|
140
|
+
|
|
141
|
+
owners: list[dict[str, str]] = Field(
|
|
142
|
+
default_factory=list,
|
|
143
|
+
description="List of owners with name and type",
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class LifecycleStateChangeDatasetFacet(BaseFacet):
|
|
148
|
+
"""Lifecycle state change facet."""
|
|
149
|
+
|
|
150
|
+
lifecycle_state_change: str = Field(
|
|
151
|
+
...,
|
|
152
|
+
description="State change type (CREATE, DROP, TRUNCATE, ALTER, etc.)",
|
|
153
|
+
)
|
|
154
|
+
previous_identifier: dict[str, str] | None = Field(
|
|
155
|
+
default=None,
|
|
156
|
+
description="Previous dataset identifier if renamed",
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
class SourceCodeJobFacet(BaseFacet):
|
|
161
|
+
"""Source code information for jobs."""
|
|
162
|
+
|
|
163
|
+
language: str = Field(default="python", description="Programming language")
|
|
164
|
+
source_code: str | None = Field(default=None, description="Source code snippet")
|
|
165
|
+
source_code_url: str | None = Field(default=None, description="URL to source code")
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class SQLJobFacet(BaseFacet):
|
|
169
|
+
"""SQL query facet for jobs."""
|
|
170
|
+
|
|
171
|
+
query: str = Field(..., description="SQL query text")
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class ErrorMessageRunFacet(BaseFacet):
|
|
175
|
+
"""Error message facet for failed runs."""
|
|
176
|
+
|
|
177
|
+
message: str = Field(..., description="Error message")
|
|
178
|
+
programming_language: str = Field(default="python", description="Language")
|
|
179
|
+
stack_trace: str | None = Field(default=None, description="Stack trace")
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class ParentRunFacet(BaseFacet):
|
|
183
|
+
"""Parent run reference for nested runs."""
|
|
184
|
+
|
|
185
|
+
run: dict[str, str] = Field(..., description="Parent run ID reference")
|
|
186
|
+
job: dict[str, str] = Field(..., description="Parent job reference")
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class NominalTimeRunFacet(BaseFacet):
|
|
190
|
+
"""Nominal time facet for scheduling information."""
|
|
191
|
+
|
|
192
|
+
nominal_start_time: str = Field(..., description="Scheduled start time (ISO 8601)")
|
|
193
|
+
nominal_end_time: str | None = Field(default=None, description="Scheduled end time")
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
class ProcessingEngineRunFacet(BaseFacet):
|
|
197
|
+
"""Processing engine information."""
|
|
198
|
+
|
|
199
|
+
version: str = Field(..., description="Engine version")
|
|
200
|
+
name: str = Field(default="truthound", description="Engine name")
|
|
201
|
+
openlineage_adapter_version: str = Field(
|
|
202
|
+
default="1.0.0",
|
|
203
|
+
description="OpenLineage adapter version",
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
# =============================================================================
|
|
208
|
+
# OpenLineage Core Objects
|
|
209
|
+
# =============================================================================
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
class OpenLineageDataset(BaseSchema):
|
|
213
|
+
"""OpenLineage dataset representation.
|
|
214
|
+
|
|
215
|
+
Datasets are the fundamental unit of data in OpenLineage.
|
|
216
|
+
They can be inputs (consumed) or outputs (produced) by jobs.
|
|
217
|
+
"""
|
|
218
|
+
|
|
219
|
+
namespace: str = Field(
|
|
220
|
+
...,
|
|
221
|
+
description="Namespace (e.g., 'file://', 'postgresql://host:5432')",
|
|
222
|
+
examples=["file://local", "postgresql://localhost:5432/mydb"],
|
|
223
|
+
)
|
|
224
|
+
name: str = Field(
|
|
225
|
+
...,
|
|
226
|
+
description="Dataset name (table name, file path, etc.)",
|
|
227
|
+
examples=["customers", "/data/sales.csv"],
|
|
228
|
+
)
|
|
229
|
+
facets: dict[str, Any] = Field(
|
|
230
|
+
default_factory=dict,
|
|
231
|
+
description="Dataset facets (schema, quality, etc.)",
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
@classmethod
|
|
235
|
+
def from_source(
|
|
236
|
+
cls,
|
|
237
|
+
source_id: str,
|
|
238
|
+
source_name: str,
|
|
239
|
+
source_type: str,
|
|
240
|
+
namespace: str,
|
|
241
|
+
schema_fields: list[dict[str, Any]] | None = None,
|
|
242
|
+
metadata: dict[str, Any] | None = None,
|
|
243
|
+
) -> "OpenLineageDataset":
|
|
244
|
+
"""Create dataset from a dashboard source.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
source_id: Source unique identifier.
|
|
248
|
+
source_name: Human-readable source name.
|
|
249
|
+
source_type: Source type (file, postgresql, etc.).
|
|
250
|
+
namespace: Namespace URI.
|
|
251
|
+
schema_fields: Optional schema field definitions.
|
|
252
|
+
metadata: Optional additional metadata.
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
OpenLineageDataset instance.
|
|
256
|
+
"""
|
|
257
|
+
facets: dict[str, Any] = {}
|
|
258
|
+
|
|
259
|
+
# Add schema facet if fields provided
|
|
260
|
+
if schema_fields:
|
|
261
|
+
facets["schema"] = SchemaDatasetFacet(
|
|
262
|
+
fields=[
|
|
263
|
+
SchemaField(
|
|
264
|
+
name=f.get("name", ""),
|
|
265
|
+
type=f.get("type", "string"),
|
|
266
|
+
description=f.get("description"),
|
|
267
|
+
)
|
|
268
|
+
for f in schema_fields
|
|
269
|
+
]
|
|
270
|
+
).model_dump(by_alias=True)
|
|
271
|
+
|
|
272
|
+
# Add documentation facet
|
|
273
|
+
if metadata and metadata.get("description"):
|
|
274
|
+
facets["documentation"] = DocumentationDatasetFacet(
|
|
275
|
+
description=metadata["description"]
|
|
276
|
+
).model_dump(by_alias=True)
|
|
277
|
+
|
|
278
|
+
# Add custom facet for truthound metadata
|
|
279
|
+
facets["truthound"] = {
|
|
280
|
+
"_producer": "truthound-dashboard",
|
|
281
|
+
"_schemaURL": "https://truthound.io/spec/facets/1-0-0/TruthoundDatasetFacet.json",
|
|
282
|
+
"source_id": source_id,
|
|
283
|
+
"source_type": source_type,
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
return cls(
|
|
287
|
+
namespace=namespace,
|
|
288
|
+
name=source_name,
|
|
289
|
+
facets=facets,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
class OpenLineageJob(BaseSchema):
|
|
294
|
+
"""OpenLineage job representation.
|
|
295
|
+
|
|
296
|
+
Jobs represent data processing tasks that consume and produce datasets.
|
|
297
|
+
"""
|
|
298
|
+
|
|
299
|
+
namespace: str = Field(
|
|
300
|
+
...,
|
|
301
|
+
description="Job namespace (typically the orchestrator or system)",
|
|
302
|
+
examples=["truthound-dashboard", "airflow://prod"],
|
|
303
|
+
)
|
|
304
|
+
name: str = Field(
|
|
305
|
+
...,
|
|
306
|
+
description="Job name",
|
|
307
|
+
examples=["data_validation", "etl_pipeline"],
|
|
308
|
+
)
|
|
309
|
+
facets: dict[str, Any] = Field(
|
|
310
|
+
default_factory=dict,
|
|
311
|
+
description="Job facets (source code, documentation, etc.)",
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
class OpenLineageRun(BaseSchema):
|
|
316
|
+
"""OpenLineage run representation.
|
|
317
|
+
|
|
318
|
+
Runs are instances of job executions with a unique ID and lifecycle.
|
|
319
|
+
"""
|
|
320
|
+
|
|
321
|
+
run_id: str = Field(
|
|
322
|
+
default_factory=lambda: str(uuid4()),
|
|
323
|
+
description="Unique run identifier (UUID)",
|
|
324
|
+
)
|
|
325
|
+
facets: dict[str, Any] = Field(
|
|
326
|
+
default_factory=dict,
|
|
327
|
+
description="Run facets (parent, error, timing, etc.)",
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
class OpenLineageEvent(BaseSchema):
|
|
332
|
+
"""OpenLineage event.
|
|
333
|
+
|
|
334
|
+
Events capture state changes in a run's lifecycle.
|
|
335
|
+
This is the primary output format for OpenLineage export.
|
|
336
|
+
"""
|
|
337
|
+
|
|
338
|
+
event_time: str = Field(
|
|
339
|
+
default_factory=lambda: datetime.utcnow().isoformat() + "Z",
|
|
340
|
+
description="Event timestamp (ISO 8601 with timezone)",
|
|
341
|
+
)
|
|
342
|
+
event_type: RunState = Field(
|
|
343
|
+
...,
|
|
344
|
+
alias="eventType",
|
|
345
|
+
description="Event type (START, RUNNING, COMPLETE, FAIL, ABORT)",
|
|
346
|
+
)
|
|
347
|
+
producer: str = Field(
|
|
348
|
+
default="https://github.com/truthound/truthound-dashboard",
|
|
349
|
+
description="URI identifying the producer",
|
|
350
|
+
)
|
|
351
|
+
schema_url: str = Field(
|
|
352
|
+
default="https://openlineage.io/spec/1-0-5/OpenLineage.json#/definitions/RunEvent",
|
|
353
|
+
alias="schemaURL",
|
|
354
|
+
description="URL to the OpenLineage schema",
|
|
355
|
+
)
|
|
356
|
+
run: OpenLineageRun = Field(..., description="Run information")
|
|
357
|
+
job: OpenLineageJob = Field(..., description="Job information")
|
|
358
|
+
inputs: list[OpenLineageDataset] = Field(
|
|
359
|
+
default_factory=list,
|
|
360
|
+
description="Input datasets consumed by the job",
|
|
361
|
+
)
|
|
362
|
+
outputs: list[OpenLineageDataset] = Field(
|
|
363
|
+
default_factory=list,
|
|
364
|
+
description="Output datasets produced by the job",
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
class Config:
|
|
368
|
+
populate_by_name = True
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
# =============================================================================
|
|
372
|
+
# Export Request/Response Schemas
|
|
373
|
+
# =============================================================================
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
class OpenLineageExportFormat(str, Enum):
|
|
377
|
+
"""Supported export formats."""
|
|
378
|
+
|
|
379
|
+
JSON = "json"
|
|
380
|
+
NDJSON = "ndjson" # Newline-delimited JSON (for streaming)
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
class OpenLineageExportRequest(BaseSchema):
|
|
384
|
+
"""Request to export lineage as OpenLineage events."""
|
|
385
|
+
|
|
386
|
+
job_namespace: str = Field(
|
|
387
|
+
default="truthound-dashboard",
|
|
388
|
+
description="Namespace for the job",
|
|
389
|
+
)
|
|
390
|
+
job_name: str = Field(
|
|
391
|
+
default="lineage_export",
|
|
392
|
+
description="Name for the job",
|
|
393
|
+
)
|
|
394
|
+
source_id: str | None = Field(
|
|
395
|
+
default=None,
|
|
396
|
+
description="Optional source ID to filter lineage",
|
|
397
|
+
)
|
|
398
|
+
include_schema: bool = Field(
|
|
399
|
+
default=True,
|
|
400
|
+
description="Include schema information in dataset facets",
|
|
401
|
+
)
|
|
402
|
+
include_quality_metrics: bool = Field(
|
|
403
|
+
default=False,
|
|
404
|
+
description="Include data quality metrics if available",
|
|
405
|
+
)
|
|
406
|
+
format: OpenLineageExportFormat = Field(
|
|
407
|
+
default=OpenLineageExportFormat.JSON,
|
|
408
|
+
description="Export format",
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
class OpenLineageExportResponse(BaseSchema):
|
|
413
|
+
"""Response containing OpenLineage events."""
|
|
414
|
+
|
|
415
|
+
events: list[OpenLineageEvent] = Field(
|
|
416
|
+
...,
|
|
417
|
+
description="List of OpenLineage events",
|
|
418
|
+
)
|
|
419
|
+
total_events: int = Field(..., description="Total number of events")
|
|
420
|
+
total_datasets: int = Field(..., description="Total unique datasets")
|
|
421
|
+
total_jobs: int = Field(..., description="Total jobs represented")
|
|
422
|
+
export_time: str = Field(
|
|
423
|
+
default_factory=lambda: datetime.utcnow().isoformat() + "Z",
|
|
424
|
+
description="Export timestamp",
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
class OpenLineageWebhookConfig(BaseSchema):
|
|
429
|
+
"""Configuration for OpenLineage webhook emission."""
|
|
430
|
+
|
|
431
|
+
url: str = Field(
|
|
432
|
+
...,
|
|
433
|
+
description="Webhook URL to send events to",
|
|
434
|
+
examples=["https://api.openlineage.io/v1/lineage"],
|
|
435
|
+
)
|
|
436
|
+
api_key: str | None = Field(
|
|
437
|
+
default=None,
|
|
438
|
+
description="Optional API key for authentication",
|
|
439
|
+
)
|
|
440
|
+
headers: dict[str, str] = Field(
|
|
441
|
+
default_factory=dict,
|
|
442
|
+
description="Additional headers to include",
|
|
443
|
+
)
|
|
444
|
+
batch_size: int = Field(
|
|
445
|
+
default=100,
|
|
446
|
+
ge=1,
|
|
447
|
+
le=1000,
|
|
448
|
+
description="Number of events to send per batch",
|
|
449
|
+
)
|
|
450
|
+
timeout_seconds: int = Field(
|
|
451
|
+
default=30,
|
|
452
|
+
ge=1,
|
|
453
|
+
le=300,
|
|
454
|
+
description="Request timeout in seconds",
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
class OpenLineageEmitRequest(BaseSchema):
|
|
459
|
+
"""Request to emit OpenLineage events to an external system."""
|
|
460
|
+
|
|
461
|
+
webhook: OpenLineageWebhookConfig = Field(
|
|
462
|
+
...,
|
|
463
|
+
description="Webhook configuration",
|
|
464
|
+
)
|
|
465
|
+
source_id: str | None = Field(
|
|
466
|
+
default=None,
|
|
467
|
+
description="Optional source ID to filter lineage",
|
|
468
|
+
)
|
|
469
|
+
job_namespace: str = Field(
|
|
470
|
+
default="truthound-dashboard",
|
|
471
|
+
description="Namespace for the job",
|
|
472
|
+
)
|
|
473
|
+
job_name: str = Field(
|
|
474
|
+
default="lineage_export",
|
|
475
|
+
description="Name for the job",
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
class OpenLineageEmitResponse(BaseSchema):
|
|
480
|
+
"""Response from emitting OpenLineage events."""
|
|
481
|
+
|
|
482
|
+
success: bool = Field(..., description="Whether emission was successful")
|
|
483
|
+
events_sent: int = Field(..., description="Number of events sent")
|
|
484
|
+
failed_events: int = Field(default=0, description="Number of failed events")
|
|
485
|
+
error_message: str | None = Field(
|
|
486
|
+
default=None,
|
|
487
|
+
description="Error message if emission failed",
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
# =============================================================================
|
|
492
|
+
# Webhook Configuration Schemas
|
|
493
|
+
# =============================================================================
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
class WebhookEventType(str, Enum):
|
|
497
|
+
"""Types of OpenLineage events for webhook configuration."""
|
|
498
|
+
|
|
499
|
+
JOB = "job"
|
|
500
|
+
DATASET = "dataset"
|
|
501
|
+
ALL = "all"
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
class WebhookCreate(BaseSchema):
|
|
505
|
+
"""Schema for creating a new OpenLineage webhook."""
|
|
506
|
+
|
|
507
|
+
name: str = Field(
|
|
508
|
+
...,
|
|
509
|
+
min_length=1,
|
|
510
|
+
max_length=255,
|
|
511
|
+
description="Human-readable name for the webhook",
|
|
512
|
+
examples=["Marquez Production", "DataHub Dev"],
|
|
513
|
+
)
|
|
514
|
+
url: str = Field(
|
|
515
|
+
...,
|
|
516
|
+
description="Target URL for the webhook",
|
|
517
|
+
examples=["https://api.marquez.io/v1/lineage", "http://localhost:5000/api/v1/lineage"],
|
|
518
|
+
)
|
|
519
|
+
is_active: bool = Field(
|
|
520
|
+
default=True,
|
|
521
|
+
description="Whether the webhook is enabled",
|
|
522
|
+
)
|
|
523
|
+
headers: dict[str, str] = Field(
|
|
524
|
+
default_factory=dict,
|
|
525
|
+
description="Custom headers to include (excluding Authorization)",
|
|
526
|
+
)
|
|
527
|
+
api_key: str | None = Field(
|
|
528
|
+
default=None,
|
|
529
|
+
description="API key for authentication (sent as Bearer token)",
|
|
530
|
+
)
|
|
531
|
+
event_types: WebhookEventType = Field(
|
|
532
|
+
default=WebhookEventType.ALL,
|
|
533
|
+
description="Types of events to emit",
|
|
534
|
+
)
|
|
535
|
+
batch_size: int = Field(
|
|
536
|
+
default=100,
|
|
537
|
+
ge=1,
|
|
538
|
+
le=1000,
|
|
539
|
+
description="Number of events per batch",
|
|
540
|
+
)
|
|
541
|
+
timeout_seconds: int = Field(
|
|
542
|
+
default=30,
|
|
543
|
+
ge=1,
|
|
544
|
+
le=300,
|
|
545
|
+
description="Request timeout in seconds",
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
class WebhookUpdate(BaseSchema):
|
|
550
|
+
"""Schema for updating an existing webhook."""
|
|
551
|
+
|
|
552
|
+
name: str | None = Field(
|
|
553
|
+
default=None,
|
|
554
|
+
min_length=1,
|
|
555
|
+
max_length=255,
|
|
556
|
+
description="Human-readable name for the webhook",
|
|
557
|
+
)
|
|
558
|
+
url: str | None = Field(
|
|
559
|
+
default=None,
|
|
560
|
+
description="Target URL for the webhook",
|
|
561
|
+
)
|
|
562
|
+
is_active: bool | None = Field(
|
|
563
|
+
default=None,
|
|
564
|
+
description="Whether the webhook is enabled",
|
|
565
|
+
)
|
|
566
|
+
headers: dict[str, str] | None = Field(
|
|
567
|
+
default=None,
|
|
568
|
+
description="Custom headers to include",
|
|
569
|
+
)
|
|
570
|
+
api_key: str | None = Field(
|
|
571
|
+
default=None,
|
|
572
|
+
description="API key for authentication",
|
|
573
|
+
)
|
|
574
|
+
event_types: WebhookEventType | None = Field(
|
|
575
|
+
default=None,
|
|
576
|
+
description="Types of events to emit",
|
|
577
|
+
)
|
|
578
|
+
batch_size: int | None = Field(
|
|
579
|
+
default=None,
|
|
580
|
+
ge=1,
|
|
581
|
+
le=1000,
|
|
582
|
+
description="Number of events per batch",
|
|
583
|
+
)
|
|
584
|
+
timeout_seconds: int | None = Field(
|
|
585
|
+
default=None,
|
|
586
|
+
ge=1,
|
|
587
|
+
le=300,
|
|
588
|
+
description="Request timeout in seconds",
|
|
589
|
+
)
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
class WebhookResponse(BaseSchema):
|
|
593
|
+
"""Schema for webhook response."""
|
|
594
|
+
|
|
595
|
+
id: str = Field(..., description="Unique webhook ID")
|
|
596
|
+
name: str = Field(..., description="Webhook name")
|
|
597
|
+
url: str = Field(..., description="Target URL")
|
|
598
|
+
is_active: bool = Field(..., description="Whether webhook is enabled")
|
|
599
|
+
headers: dict[str, str] = Field(default_factory=dict, description="Custom headers")
|
|
600
|
+
event_types: str = Field(..., description="Event types to emit")
|
|
601
|
+
batch_size: int = Field(..., description="Batch size")
|
|
602
|
+
timeout_seconds: int = Field(..., description="Timeout in seconds")
|
|
603
|
+
last_sent_at: str | None = Field(default=None, description="Last successful emission time")
|
|
604
|
+
success_count: int = Field(default=0, description="Total successful emissions")
|
|
605
|
+
failure_count: int = Field(default=0, description="Total failed emissions")
|
|
606
|
+
last_error: str | None = Field(default=None, description="Last error message")
|
|
607
|
+
created_at: str = Field(..., description="Creation timestamp")
|
|
608
|
+
updated_at: str | None = Field(default=None, description="Last update timestamp")
|
|
609
|
+
|
|
610
|
+
|
|
611
|
+
class WebhookListResponse(BaseSchema):
|
|
612
|
+
"""Response for listing webhooks."""
|
|
613
|
+
|
|
614
|
+
data: list[WebhookResponse] = Field(..., description="List of webhooks")
|
|
615
|
+
total: int = Field(..., description="Total number of webhooks")
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
class WebhookTestRequest(BaseSchema):
|
|
619
|
+
"""Request to test a webhook connection."""
|
|
620
|
+
|
|
621
|
+
url: str = Field(
|
|
622
|
+
...,
|
|
623
|
+
description="URL to test",
|
|
624
|
+
examples=["https://api.marquez.io/v1/lineage"],
|
|
625
|
+
)
|
|
626
|
+
headers: dict[str, str] = Field(
|
|
627
|
+
default_factory=dict,
|
|
628
|
+
description="Headers to include in test request",
|
|
629
|
+
)
|
|
630
|
+
api_key: str | None = Field(
|
|
631
|
+
default=None,
|
|
632
|
+
description="API key for authentication",
|
|
633
|
+
)
|
|
634
|
+
timeout_seconds: int = Field(
|
|
635
|
+
default=10,
|
|
636
|
+
ge=1,
|
|
637
|
+
le=60,
|
|
638
|
+
description="Test request timeout",
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
class WebhookTestResult(BaseSchema):
|
|
643
|
+
"""Result of a webhook test."""
|
|
644
|
+
|
|
645
|
+
success: bool = Field(..., description="Whether the test was successful")
|
|
646
|
+
status_code: int | None = Field(default=None, description="HTTP status code")
|
|
647
|
+
response_time_ms: int | None = Field(default=None, description="Response time in ms")
|
|
648
|
+
error_message: str | None = Field(default=None, description="Error message if failed")
|
|
649
|
+
response_body: str | None = Field(default=None, description="Response body (truncated)")
|
|
650
|
+
|
|
651
|
+
|
|
652
|
+
# =============================================================================
|
|
653
|
+
# Dataset Namespace Helpers
|
|
654
|
+
# =============================================================================
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
def build_dataset_namespace(source_type: str, config: dict[str, Any] | None = None) -> str:
|
|
658
|
+
"""Build a namespace URI from source configuration.
|
|
659
|
+
|
|
660
|
+
Args:
|
|
661
|
+
source_type: Type of data source.
|
|
662
|
+
config: Source configuration dictionary.
|
|
663
|
+
|
|
664
|
+
Returns:
|
|
665
|
+
Namespace URI string.
|
|
666
|
+
"""
|
|
667
|
+
config = config or {}
|
|
668
|
+
|
|
669
|
+
if source_type == "file":
|
|
670
|
+
return f"file://{config.get('base_path', 'local')}"
|
|
671
|
+
|
|
672
|
+
if source_type == "postgresql":
|
|
673
|
+
host = config.get("host", "localhost")
|
|
674
|
+
port = config.get("port", 5432)
|
|
675
|
+
database = config.get("database", "")
|
|
676
|
+
return f"postgresql://{host}:{port}/{database}"
|
|
677
|
+
|
|
678
|
+
if source_type == "mysql":
|
|
679
|
+
host = config.get("host", "localhost")
|
|
680
|
+
port = config.get("port", 3306)
|
|
681
|
+
database = config.get("database", "")
|
|
682
|
+
return f"mysql://{host}:{port}/{database}"
|
|
683
|
+
|
|
684
|
+
if source_type == "snowflake":
|
|
685
|
+
account = config.get("account", "")
|
|
686
|
+
database = config.get("database", "")
|
|
687
|
+
return f"snowflake://{account}/{database}"
|
|
688
|
+
|
|
689
|
+
if source_type == "bigquery":
|
|
690
|
+
project = config.get("project", "")
|
|
691
|
+
dataset = config.get("dataset", "")
|
|
692
|
+
return f"bigquery://{project}.{dataset}"
|
|
693
|
+
|
|
694
|
+
if source_type == "redshift":
|
|
695
|
+
host = config.get("host", "")
|
|
696
|
+
database = config.get("database", "")
|
|
697
|
+
return f"redshift://{host}/{database}"
|
|
698
|
+
|
|
699
|
+
if source_type == "databricks":
|
|
700
|
+
workspace = config.get("workspace_url", "")
|
|
701
|
+
return f"databricks://{workspace}"
|
|
702
|
+
|
|
703
|
+
# Default namespace
|
|
704
|
+
return f"{source_type}://truthound"
|