truthound-dashboard 1.3.1__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. truthound_dashboard/api/alerts.py +258 -0
  2. truthound_dashboard/api/anomaly.py +1302 -0
  3. truthound_dashboard/api/cross_alerts.py +352 -0
  4. truthound_dashboard/api/deps.py +143 -0
  5. truthound_dashboard/api/drift_monitor.py +540 -0
  6. truthound_dashboard/api/lineage.py +1151 -0
  7. truthound_dashboard/api/maintenance.py +363 -0
  8. truthound_dashboard/api/middleware.py +373 -1
  9. truthound_dashboard/api/model_monitoring.py +805 -0
  10. truthound_dashboard/api/notifications_advanced.py +2452 -0
  11. truthound_dashboard/api/plugins.py +2096 -0
  12. truthound_dashboard/api/profile.py +211 -14
  13. truthound_dashboard/api/reports.py +853 -0
  14. truthound_dashboard/api/router.py +147 -0
  15. truthound_dashboard/api/rule_suggestions.py +310 -0
  16. truthound_dashboard/api/schema_evolution.py +231 -0
  17. truthound_dashboard/api/sources.py +47 -3
  18. truthound_dashboard/api/triggers.py +190 -0
  19. truthound_dashboard/api/validations.py +13 -0
  20. truthound_dashboard/api/validators.py +333 -4
  21. truthound_dashboard/api/versioning.py +309 -0
  22. truthound_dashboard/api/websocket.py +301 -0
  23. truthound_dashboard/core/__init__.py +27 -0
  24. truthound_dashboard/core/anomaly.py +1395 -0
  25. truthound_dashboard/core/anomaly_explainer.py +633 -0
  26. truthound_dashboard/core/cache.py +206 -0
  27. truthound_dashboard/core/cached_services.py +422 -0
  28. truthound_dashboard/core/charts.py +352 -0
  29. truthound_dashboard/core/connections.py +1069 -42
  30. truthound_dashboard/core/cross_alerts.py +837 -0
  31. truthound_dashboard/core/drift_monitor.py +1477 -0
  32. truthound_dashboard/core/drift_sampling.py +669 -0
  33. truthound_dashboard/core/i18n/__init__.py +42 -0
  34. truthound_dashboard/core/i18n/detector.py +173 -0
  35. truthound_dashboard/core/i18n/messages.py +564 -0
  36. truthound_dashboard/core/lineage.py +971 -0
  37. truthound_dashboard/core/maintenance.py +443 -5
  38. truthound_dashboard/core/model_monitoring.py +1043 -0
  39. truthound_dashboard/core/notifications/channels.py +1020 -1
  40. truthound_dashboard/core/notifications/deduplication/__init__.py +143 -0
  41. truthound_dashboard/core/notifications/deduplication/policies.py +274 -0
  42. truthound_dashboard/core/notifications/deduplication/service.py +400 -0
  43. truthound_dashboard/core/notifications/deduplication/stores.py +2365 -0
  44. truthound_dashboard/core/notifications/deduplication/strategies.py +422 -0
  45. truthound_dashboard/core/notifications/dispatcher.py +43 -0
  46. truthound_dashboard/core/notifications/escalation/__init__.py +149 -0
  47. truthound_dashboard/core/notifications/escalation/backends.py +1384 -0
  48. truthound_dashboard/core/notifications/escalation/engine.py +429 -0
  49. truthound_dashboard/core/notifications/escalation/models.py +336 -0
  50. truthound_dashboard/core/notifications/escalation/scheduler.py +1187 -0
  51. truthound_dashboard/core/notifications/escalation/state_machine.py +330 -0
  52. truthound_dashboard/core/notifications/escalation/stores.py +2896 -0
  53. truthound_dashboard/core/notifications/events.py +49 -0
  54. truthound_dashboard/core/notifications/metrics/__init__.py +115 -0
  55. truthound_dashboard/core/notifications/metrics/base.py +528 -0
  56. truthound_dashboard/core/notifications/metrics/collectors.py +583 -0
  57. truthound_dashboard/core/notifications/routing/__init__.py +169 -0
  58. truthound_dashboard/core/notifications/routing/combinators.py +184 -0
  59. truthound_dashboard/core/notifications/routing/config.py +375 -0
  60. truthound_dashboard/core/notifications/routing/config_parser.py +867 -0
  61. truthound_dashboard/core/notifications/routing/engine.py +382 -0
  62. truthound_dashboard/core/notifications/routing/expression_engine.py +1269 -0
  63. truthound_dashboard/core/notifications/routing/jinja2_engine.py +774 -0
  64. truthound_dashboard/core/notifications/routing/rules.py +625 -0
  65. truthound_dashboard/core/notifications/routing/validator.py +678 -0
  66. truthound_dashboard/core/notifications/service.py +2 -0
  67. truthound_dashboard/core/notifications/stats_aggregator.py +850 -0
  68. truthound_dashboard/core/notifications/throttling/__init__.py +83 -0
  69. truthound_dashboard/core/notifications/throttling/builder.py +311 -0
  70. truthound_dashboard/core/notifications/throttling/stores.py +1859 -0
  71. truthound_dashboard/core/notifications/throttling/throttlers.py +633 -0
  72. truthound_dashboard/core/openlineage.py +1028 -0
  73. truthound_dashboard/core/plugins/__init__.py +39 -0
  74. truthound_dashboard/core/plugins/docs/__init__.py +39 -0
  75. truthound_dashboard/core/plugins/docs/extractor.py +703 -0
  76. truthound_dashboard/core/plugins/docs/renderers.py +804 -0
  77. truthound_dashboard/core/plugins/hooks/__init__.py +63 -0
  78. truthound_dashboard/core/plugins/hooks/decorators.py +367 -0
  79. truthound_dashboard/core/plugins/hooks/manager.py +403 -0
  80. truthound_dashboard/core/plugins/hooks/protocols.py +265 -0
  81. truthound_dashboard/core/plugins/lifecycle/__init__.py +41 -0
  82. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +584 -0
  83. truthound_dashboard/core/plugins/lifecycle/machine.py +419 -0
  84. truthound_dashboard/core/plugins/lifecycle/states.py +266 -0
  85. truthound_dashboard/core/plugins/loader.py +504 -0
  86. truthound_dashboard/core/plugins/registry.py +810 -0
  87. truthound_dashboard/core/plugins/reporter_executor.py +588 -0
  88. truthound_dashboard/core/plugins/sandbox/__init__.py +59 -0
  89. truthound_dashboard/core/plugins/sandbox/code_validator.py +243 -0
  90. truthound_dashboard/core/plugins/sandbox/engines.py +770 -0
  91. truthound_dashboard/core/plugins/sandbox/protocols.py +194 -0
  92. truthound_dashboard/core/plugins/sandbox.py +617 -0
  93. truthound_dashboard/core/plugins/security/__init__.py +68 -0
  94. truthound_dashboard/core/plugins/security/analyzer.py +535 -0
  95. truthound_dashboard/core/plugins/security/policies.py +311 -0
  96. truthound_dashboard/core/plugins/security/protocols.py +296 -0
  97. truthound_dashboard/core/plugins/security/signing.py +842 -0
  98. truthound_dashboard/core/plugins/security.py +446 -0
  99. truthound_dashboard/core/plugins/validator_executor.py +401 -0
  100. truthound_dashboard/core/plugins/versioning/__init__.py +51 -0
  101. truthound_dashboard/core/plugins/versioning/constraints.py +377 -0
  102. truthound_dashboard/core/plugins/versioning/dependencies.py +541 -0
  103. truthound_dashboard/core/plugins/versioning/semver.py +266 -0
  104. truthound_dashboard/core/profile_comparison.py +601 -0
  105. truthound_dashboard/core/report_history.py +570 -0
  106. truthound_dashboard/core/reporters/__init__.py +57 -0
  107. truthound_dashboard/core/reporters/base.py +296 -0
  108. truthound_dashboard/core/reporters/csv_reporter.py +155 -0
  109. truthound_dashboard/core/reporters/html_reporter.py +598 -0
  110. truthound_dashboard/core/reporters/i18n/__init__.py +65 -0
  111. truthound_dashboard/core/reporters/i18n/base.py +494 -0
  112. truthound_dashboard/core/reporters/i18n/catalogs.py +930 -0
  113. truthound_dashboard/core/reporters/json_reporter.py +160 -0
  114. truthound_dashboard/core/reporters/junit_reporter.py +233 -0
  115. truthound_dashboard/core/reporters/markdown_reporter.py +207 -0
  116. truthound_dashboard/core/reporters/pdf_reporter.py +209 -0
  117. truthound_dashboard/core/reporters/registry.py +272 -0
  118. truthound_dashboard/core/rule_generator.py +2088 -0
  119. truthound_dashboard/core/scheduler.py +822 -12
  120. truthound_dashboard/core/schema_evolution.py +858 -0
  121. truthound_dashboard/core/services.py +152 -9
  122. truthound_dashboard/core/statistics.py +718 -0
  123. truthound_dashboard/core/streaming_anomaly.py +883 -0
  124. truthound_dashboard/core/triggers/__init__.py +45 -0
  125. truthound_dashboard/core/triggers/base.py +226 -0
  126. truthound_dashboard/core/triggers/evaluators.py +609 -0
  127. truthound_dashboard/core/triggers/factory.py +363 -0
  128. truthound_dashboard/core/unified_alerts.py +870 -0
  129. truthound_dashboard/core/validation_limits.py +509 -0
  130. truthound_dashboard/core/versioning.py +709 -0
  131. truthound_dashboard/core/websocket/__init__.py +59 -0
  132. truthound_dashboard/core/websocket/manager.py +512 -0
  133. truthound_dashboard/core/websocket/messages.py +130 -0
  134. truthound_dashboard/db/__init__.py +30 -0
  135. truthound_dashboard/db/models.py +3375 -3
  136. truthound_dashboard/main.py +22 -0
  137. truthound_dashboard/schemas/__init__.py +396 -1
  138. truthound_dashboard/schemas/anomaly.py +1258 -0
  139. truthound_dashboard/schemas/base.py +4 -0
  140. truthound_dashboard/schemas/cross_alerts.py +334 -0
  141. truthound_dashboard/schemas/drift_monitor.py +890 -0
  142. truthound_dashboard/schemas/lineage.py +428 -0
  143. truthound_dashboard/schemas/maintenance.py +154 -0
  144. truthound_dashboard/schemas/model_monitoring.py +374 -0
  145. truthound_dashboard/schemas/notifications_advanced.py +1363 -0
  146. truthound_dashboard/schemas/openlineage.py +704 -0
  147. truthound_dashboard/schemas/plugins.py +1293 -0
  148. truthound_dashboard/schemas/profile.py +420 -34
  149. truthound_dashboard/schemas/profile_comparison.py +242 -0
  150. truthound_dashboard/schemas/reports.py +285 -0
  151. truthound_dashboard/schemas/rule_suggestion.py +434 -0
  152. truthound_dashboard/schemas/schema_evolution.py +164 -0
  153. truthound_dashboard/schemas/source.py +117 -2
  154. truthound_dashboard/schemas/triggers.py +511 -0
  155. truthound_dashboard/schemas/unified_alerts.py +223 -0
  156. truthound_dashboard/schemas/validation.py +25 -1
  157. truthound_dashboard/schemas/validators/__init__.py +11 -0
  158. truthound_dashboard/schemas/validators/base.py +151 -0
  159. truthound_dashboard/schemas/versioning.py +152 -0
  160. truthound_dashboard/static/index.html +2 -2
  161. {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/METADATA +142 -22
  162. truthound_dashboard-1.4.0.dist-info/RECORD +239 -0
  163. truthound_dashboard/static/assets/index-BZG20KuF.js +0 -586
  164. truthound_dashboard/static/assets/index-D_HyZ3pb.css +0 -1
  165. truthound_dashboard/static/assets/unmerged_dictionaries-CtpqQBm0.js +0 -1
  166. truthound_dashboard-1.3.1.dist-info/RECORD +0 -110
  167. {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/WHEEL +0 -0
  168. {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/entry_points.txt +0 -0
  169. {truthound_dashboard-1.3.1.dist-info → truthound_dashboard-1.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,704 @@
1
+ """OpenLineage schema definitions.
2
+
3
+ This module implements the OpenLineage specification for data lineage interoperability.
4
+ See: https://openlineage.io/spec/
5
+
6
+ The OpenLineage spec defines a standard for lineage metadata, enabling interoperability
7
+ between data tools like Airflow, Spark, dbt, and Truthound.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from datetime import datetime
13
+ from enum import Enum
14
+ from typing import Any, Literal
15
+ from uuid import UUID, uuid4
16
+
17
+ from pydantic import Field, field_validator
18
+
19
+ from .base import BaseSchema
20
+
21
+
22
+ # =============================================================================
23
+ # OpenLineage Enums
24
+ # =============================================================================
25
+
26
+
27
+ class RunState(str, Enum):
28
+ """OpenLineage run state.
29
+
30
+ Defines the lifecycle of a job run.
31
+ """
32
+
33
+ START = "START"
34
+ RUNNING = "RUNNING"
35
+ COMPLETE = "COMPLETE"
36
+ FAIL = "FAIL"
37
+ ABORT = "ABORT"
38
+
39
+
40
+ class DatasetType(str, Enum):
41
+ """Dataset types for categorization."""
42
+
43
+ TABLE = "table"
44
+ FILE = "file"
45
+ STREAM = "stream"
46
+ VIEW = "view"
47
+ QUERY = "query"
48
+
49
+
50
+ # =============================================================================
51
+ # OpenLineage Facets
52
+ # =============================================================================
53
+
54
+
55
+ class BaseFacet(BaseSchema):
56
+ """Base facet with producer information.
57
+
58
+ All facets include metadata about what produced them.
59
+ """
60
+
61
+ model_config = {"populate_by_name": True}
62
+
63
+ producer: str = Field(
64
+ default="truthound-dashboard",
65
+ serialization_alias="_producer",
66
+ validation_alias="_producer",
67
+ description="URI identifying the producer of this metadata",
68
+ )
69
+ schemaURL: str = Field(
70
+ default="https://openlineage.io/spec/facets/1-0-0/",
71
+ serialization_alias="_schemaURL",
72
+ validation_alias="_schemaURL",
73
+ description="URL to the JSON schema for this facet",
74
+ )
75
+
76
+
77
+ class SchemaField(BaseSchema):
78
+ """Schema field definition for dataset schema facet."""
79
+
80
+ name: str = Field(..., description="Field name")
81
+ type: str = Field(default="string", description="Field data type")
82
+ description: str | None = Field(default=None, description="Field description")
83
+
84
+
85
+ class SchemaDatasetFacet(BaseFacet):
86
+ """Dataset schema facet.
87
+
88
+ Describes the schema of a dataset including column definitions.
89
+ """
90
+
91
+ fields: list[SchemaField] = Field(
92
+ default_factory=list,
93
+ description="List of schema fields",
94
+ )
95
+
96
+
97
+ class DataQualityMetricsInputDatasetFacet(BaseFacet):
98
+ """Data quality metrics facet for input datasets."""
99
+
100
+ row_count: int | None = Field(default=None, description="Total row count")
101
+ bytes: int | None = Field(default=None, description="Total bytes")
102
+ column_metrics: dict[str, dict[str, Any]] | None = Field(
103
+ default=None,
104
+ description="Per-column metrics (null_count, distinct_count, etc.)",
105
+ )
106
+
107
+
108
+ class DataQualityAssertionsDatasetFacet(BaseFacet):
109
+ """Data quality assertions facet.
110
+
111
+ Records validation results from data quality checks.
112
+ """
113
+
114
+ assertions: list[dict[str, Any]] = Field(
115
+ default_factory=list,
116
+ description="List of quality assertions",
117
+ )
118
+
119
+
120
+ class ColumnLineageDatasetFacet(BaseFacet):
121
+ """Column-level lineage facet.
122
+
123
+ Tracks how individual columns are derived from source columns.
124
+ """
125
+
126
+ fields: dict[str, dict[str, Any]] = Field(
127
+ default_factory=dict,
128
+ description="Column-level lineage mapping",
129
+ )
130
+
131
+
132
+ class DocumentationDatasetFacet(BaseFacet):
133
+ """Documentation facet for datasets."""
134
+
135
+ description: str = Field(..., description="Dataset description")
136
+
137
+
138
+ class OwnershipDatasetFacet(BaseFacet):
139
+ """Ownership information facet."""
140
+
141
+ owners: list[dict[str, str]] = Field(
142
+ default_factory=list,
143
+ description="List of owners with name and type",
144
+ )
145
+
146
+
147
+ class LifecycleStateChangeDatasetFacet(BaseFacet):
148
+ """Lifecycle state change facet."""
149
+
150
+ lifecycle_state_change: str = Field(
151
+ ...,
152
+ description="State change type (CREATE, DROP, TRUNCATE, ALTER, etc.)",
153
+ )
154
+ previous_identifier: dict[str, str] | None = Field(
155
+ default=None,
156
+ description="Previous dataset identifier if renamed",
157
+ )
158
+
159
+
160
+ class SourceCodeJobFacet(BaseFacet):
161
+ """Source code information for jobs."""
162
+
163
+ language: str = Field(default="python", description="Programming language")
164
+ source_code: str | None = Field(default=None, description="Source code snippet")
165
+ source_code_url: str | None = Field(default=None, description="URL to source code")
166
+
167
+
168
+ class SQLJobFacet(BaseFacet):
169
+ """SQL query facet for jobs."""
170
+
171
+ query: str = Field(..., description="SQL query text")
172
+
173
+
174
+ class ErrorMessageRunFacet(BaseFacet):
175
+ """Error message facet for failed runs."""
176
+
177
+ message: str = Field(..., description="Error message")
178
+ programming_language: str = Field(default="python", description="Language")
179
+ stack_trace: str | None = Field(default=None, description="Stack trace")
180
+
181
+
182
+ class ParentRunFacet(BaseFacet):
183
+ """Parent run reference for nested runs."""
184
+
185
+ run: dict[str, str] = Field(..., description="Parent run ID reference")
186
+ job: dict[str, str] = Field(..., description="Parent job reference")
187
+
188
+
189
+ class NominalTimeRunFacet(BaseFacet):
190
+ """Nominal time facet for scheduling information."""
191
+
192
+ nominal_start_time: str = Field(..., description="Scheduled start time (ISO 8601)")
193
+ nominal_end_time: str | None = Field(default=None, description="Scheduled end time")
194
+
195
+
196
+ class ProcessingEngineRunFacet(BaseFacet):
197
+ """Processing engine information."""
198
+
199
+ version: str = Field(..., description="Engine version")
200
+ name: str = Field(default="truthound", description="Engine name")
201
+ openlineage_adapter_version: str = Field(
202
+ default="1.0.0",
203
+ description="OpenLineage adapter version",
204
+ )
205
+
206
+
207
+ # =============================================================================
208
+ # OpenLineage Core Objects
209
+ # =============================================================================
210
+
211
+
212
+ class OpenLineageDataset(BaseSchema):
213
+ """OpenLineage dataset representation.
214
+
215
+ Datasets are the fundamental unit of data in OpenLineage.
216
+ They can be inputs (consumed) or outputs (produced) by jobs.
217
+ """
218
+
219
+ namespace: str = Field(
220
+ ...,
221
+ description="Namespace (e.g., 'file://', 'postgresql://host:5432')",
222
+ examples=["file://local", "postgresql://localhost:5432/mydb"],
223
+ )
224
+ name: str = Field(
225
+ ...,
226
+ description="Dataset name (table name, file path, etc.)",
227
+ examples=["customers", "/data/sales.csv"],
228
+ )
229
+ facets: dict[str, Any] = Field(
230
+ default_factory=dict,
231
+ description="Dataset facets (schema, quality, etc.)",
232
+ )
233
+
234
+ @classmethod
235
+ def from_source(
236
+ cls,
237
+ source_id: str,
238
+ source_name: str,
239
+ source_type: str,
240
+ namespace: str,
241
+ schema_fields: list[dict[str, Any]] | None = None,
242
+ metadata: dict[str, Any] | None = None,
243
+ ) -> "OpenLineageDataset":
244
+ """Create dataset from a dashboard source.
245
+
246
+ Args:
247
+ source_id: Source unique identifier.
248
+ source_name: Human-readable source name.
249
+ source_type: Source type (file, postgresql, etc.).
250
+ namespace: Namespace URI.
251
+ schema_fields: Optional schema field definitions.
252
+ metadata: Optional additional metadata.
253
+
254
+ Returns:
255
+ OpenLineageDataset instance.
256
+ """
257
+ facets: dict[str, Any] = {}
258
+
259
+ # Add schema facet if fields provided
260
+ if schema_fields:
261
+ facets["schema"] = SchemaDatasetFacet(
262
+ fields=[
263
+ SchemaField(
264
+ name=f.get("name", ""),
265
+ type=f.get("type", "string"),
266
+ description=f.get("description"),
267
+ )
268
+ for f in schema_fields
269
+ ]
270
+ ).model_dump(by_alias=True)
271
+
272
+ # Add documentation facet
273
+ if metadata and metadata.get("description"):
274
+ facets["documentation"] = DocumentationDatasetFacet(
275
+ description=metadata["description"]
276
+ ).model_dump(by_alias=True)
277
+
278
+ # Add custom facet for truthound metadata
279
+ facets["truthound"] = {
280
+ "_producer": "truthound-dashboard",
281
+ "_schemaURL": "https://truthound.io/spec/facets/1-0-0/TruthoundDatasetFacet.json",
282
+ "source_id": source_id,
283
+ "source_type": source_type,
284
+ }
285
+
286
+ return cls(
287
+ namespace=namespace,
288
+ name=source_name,
289
+ facets=facets,
290
+ )
291
+
292
+
293
+ class OpenLineageJob(BaseSchema):
294
+ """OpenLineage job representation.
295
+
296
+ Jobs represent data processing tasks that consume and produce datasets.
297
+ """
298
+
299
+ namespace: str = Field(
300
+ ...,
301
+ description="Job namespace (typically the orchestrator or system)",
302
+ examples=["truthound-dashboard", "airflow://prod"],
303
+ )
304
+ name: str = Field(
305
+ ...,
306
+ description="Job name",
307
+ examples=["data_validation", "etl_pipeline"],
308
+ )
309
+ facets: dict[str, Any] = Field(
310
+ default_factory=dict,
311
+ description="Job facets (source code, documentation, etc.)",
312
+ )
313
+
314
+
315
+ class OpenLineageRun(BaseSchema):
316
+ """OpenLineage run representation.
317
+
318
+ Runs are instances of job executions with a unique ID and lifecycle.
319
+ """
320
+
321
+ run_id: str = Field(
322
+ default_factory=lambda: str(uuid4()),
323
+ description="Unique run identifier (UUID)",
324
+ )
325
+ facets: dict[str, Any] = Field(
326
+ default_factory=dict,
327
+ description="Run facets (parent, error, timing, etc.)",
328
+ )
329
+
330
+
331
+ class OpenLineageEvent(BaseSchema):
332
+ """OpenLineage event.
333
+
334
+ Events capture state changes in a run's lifecycle.
335
+ This is the primary output format for OpenLineage export.
336
+ """
337
+
338
+ event_time: str = Field(
339
+ default_factory=lambda: datetime.utcnow().isoformat() + "Z",
340
+ description="Event timestamp (ISO 8601 with timezone)",
341
+ )
342
+ event_type: RunState = Field(
343
+ ...,
344
+ alias="eventType",
345
+ description="Event type (START, RUNNING, COMPLETE, FAIL, ABORT)",
346
+ )
347
+ producer: str = Field(
348
+ default="https://github.com/truthound/truthound-dashboard",
349
+ description="URI identifying the producer",
350
+ )
351
+ schema_url: str = Field(
352
+ default="https://openlineage.io/spec/1-0-5/OpenLineage.json#/definitions/RunEvent",
353
+ alias="schemaURL",
354
+ description="URL to the OpenLineage schema",
355
+ )
356
+ run: OpenLineageRun = Field(..., description="Run information")
357
+ job: OpenLineageJob = Field(..., description="Job information")
358
+ inputs: list[OpenLineageDataset] = Field(
359
+ default_factory=list,
360
+ description="Input datasets consumed by the job",
361
+ )
362
+ outputs: list[OpenLineageDataset] = Field(
363
+ default_factory=list,
364
+ description="Output datasets produced by the job",
365
+ )
366
+
367
+ class Config:
368
+ populate_by_name = True
369
+
370
+
371
+ # =============================================================================
372
+ # Export Request/Response Schemas
373
+ # =============================================================================
374
+
375
+
376
+ class OpenLineageExportFormat(str, Enum):
377
+ """Supported export formats."""
378
+
379
+ JSON = "json"
380
+ NDJSON = "ndjson" # Newline-delimited JSON (for streaming)
381
+
382
+
383
+ class OpenLineageExportRequest(BaseSchema):
384
+ """Request to export lineage as OpenLineage events."""
385
+
386
+ job_namespace: str = Field(
387
+ default="truthound-dashboard",
388
+ description="Namespace for the job",
389
+ )
390
+ job_name: str = Field(
391
+ default="lineage_export",
392
+ description="Name for the job",
393
+ )
394
+ source_id: str | None = Field(
395
+ default=None,
396
+ description="Optional source ID to filter lineage",
397
+ )
398
+ include_schema: bool = Field(
399
+ default=True,
400
+ description="Include schema information in dataset facets",
401
+ )
402
+ include_quality_metrics: bool = Field(
403
+ default=False,
404
+ description="Include data quality metrics if available",
405
+ )
406
+ format: OpenLineageExportFormat = Field(
407
+ default=OpenLineageExportFormat.JSON,
408
+ description="Export format",
409
+ )
410
+
411
+
412
+ class OpenLineageExportResponse(BaseSchema):
413
+ """Response containing OpenLineage events."""
414
+
415
+ events: list[OpenLineageEvent] = Field(
416
+ ...,
417
+ description="List of OpenLineage events",
418
+ )
419
+ total_events: int = Field(..., description="Total number of events")
420
+ total_datasets: int = Field(..., description="Total unique datasets")
421
+ total_jobs: int = Field(..., description="Total jobs represented")
422
+ export_time: str = Field(
423
+ default_factory=lambda: datetime.utcnow().isoformat() + "Z",
424
+ description="Export timestamp",
425
+ )
426
+
427
+
428
+ class OpenLineageWebhookConfig(BaseSchema):
429
+ """Configuration for OpenLineage webhook emission."""
430
+
431
+ url: str = Field(
432
+ ...,
433
+ description="Webhook URL to send events to",
434
+ examples=["https://api.openlineage.io/v1/lineage"],
435
+ )
436
+ api_key: str | None = Field(
437
+ default=None,
438
+ description="Optional API key for authentication",
439
+ )
440
+ headers: dict[str, str] = Field(
441
+ default_factory=dict,
442
+ description="Additional headers to include",
443
+ )
444
+ batch_size: int = Field(
445
+ default=100,
446
+ ge=1,
447
+ le=1000,
448
+ description="Number of events to send per batch",
449
+ )
450
+ timeout_seconds: int = Field(
451
+ default=30,
452
+ ge=1,
453
+ le=300,
454
+ description="Request timeout in seconds",
455
+ )
456
+
457
+
458
+ class OpenLineageEmitRequest(BaseSchema):
459
+ """Request to emit OpenLineage events to an external system."""
460
+
461
+ webhook: OpenLineageWebhookConfig = Field(
462
+ ...,
463
+ description="Webhook configuration",
464
+ )
465
+ source_id: str | None = Field(
466
+ default=None,
467
+ description="Optional source ID to filter lineage",
468
+ )
469
+ job_namespace: str = Field(
470
+ default="truthound-dashboard",
471
+ description="Namespace for the job",
472
+ )
473
+ job_name: str = Field(
474
+ default="lineage_export",
475
+ description="Name for the job",
476
+ )
477
+
478
+
479
+ class OpenLineageEmitResponse(BaseSchema):
480
+ """Response from emitting OpenLineage events."""
481
+
482
+ success: bool = Field(..., description="Whether emission was successful")
483
+ events_sent: int = Field(..., description="Number of events sent")
484
+ failed_events: int = Field(default=0, description="Number of failed events")
485
+ error_message: str | None = Field(
486
+ default=None,
487
+ description="Error message if emission failed",
488
+ )
489
+
490
+
491
+ # =============================================================================
492
+ # Webhook Configuration Schemas
493
+ # =============================================================================
494
+
495
+
496
+ class WebhookEventType(str, Enum):
497
+ """Types of OpenLineage events for webhook configuration."""
498
+
499
+ JOB = "job"
500
+ DATASET = "dataset"
501
+ ALL = "all"
502
+
503
+
504
+ class WebhookCreate(BaseSchema):
505
+ """Schema for creating a new OpenLineage webhook."""
506
+
507
+ name: str = Field(
508
+ ...,
509
+ min_length=1,
510
+ max_length=255,
511
+ description="Human-readable name for the webhook",
512
+ examples=["Marquez Production", "DataHub Dev"],
513
+ )
514
+ url: str = Field(
515
+ ...,
516
+ description="Target URL for the webhook",
517
+ examples=["https://api.marquez.io/v1/lineage", "http://localhost:5000/api/v1/lineage"],
518
+ )
519
+ is_active: bool = Field(
520
+ default=True,
521
+ description="Whether the webhook is enabled",
522
+ )
523
+ headers: dict[str, str] = Field(
524
+ default_factory=dict,
525
+ description="Custom headers to include (excluding Authorization)",
526
+ )
527
+ api_key: str | None = Field(
528
+ default=None,
529
+ description="API key for authentication (sent as Bearer token)",
530
+ )
531
+ event_types: WebhookEventType = Field(
532
+ default=WebhookEventType.ALL,
533
+ description="Types of events to emit",
534
+ )
535
+ batch_size: int = Field(
536
+ default=100,
537
+ ge=1,
538
+ le=1000,
539
+ description="Number of events per batch",
540
+ )
541
+ timeout_seconds: int = Field(
542
+ default=30,
543
+ ge=1,
544
+ le=300,
545
+ description="Request timeout in seconds",
546
+ )
547
+
548
+
549
+ class WebhookUpdate(BaseSchema):
550
+ """Schema for updating an existing webhook."""
551
+
552
+ name: str | None = Field(
553
+ default=None,
554
+ min_length=1,
555
+ max_length=255,
556
+ description="Human-readable name for the webhook",
557
+ )
558
+ url: str | None = Field(
559
+ default=None,
560
+ description="Target URL for the webhook",
561
+ )
562
+ is_active: bool | None = Field(
563
+ default=None,
564
+ description="Whether the webhook is enabled",
565
+ )
566
+ headers: dict[str, str] | None = Field(
567
+ default=None,
568
+ description="Custom headers to include",
569
+ )
570
+ api_key: str | None = Field(
571
+ default=None,
572
+ description="API key for authentication",
573
+ )
574
+ event_types: WebhookEventType | None = Field(
575
+ default=None,
576
+ description="Types of events to emit",
577
+ )
578
+ batch_size: int | None = Field(
579
+ default=None,
580
+ ge=1,
581
+ le=1000,
582
+ description="Number of events per batch",
583
+ )
584
+ timeout_seconds: int | None = Field(
585
+ default=None,
586
+ ge=1,
587
+ le=300,
588
+ description="Request timeout in seconds",
589
+ )
590
+
591
+
592
+ class WebhookResponse(BaseSchema):
593
+ """Schema for webhook response."""
594
+
595
+ id: str = Field(..., description="Unique webhook ID")
596
+ name: str = Field(..., description="Webhook name")
597
+ url: str = Field(..., description="Target URL")
598
+ is_active: bool = Field(..., description="Whether webhook is enabled")
599
+ headers: dict[str, str] = Field(default_factory=dict, description="Custom headers")
600
+ event_types: str = Field(..., description="Event types to emit")
601
+ batch_size: int = Field(..., description="Batch size")
602
+ timeout_seconds: int = Field(..., description="Timeout in seconds")
603
+ last_sent_at: str | None = Field(default=None, description="Last successful emission time")
604
+ success_count: int = Field(default=0, description="Total successful emissions")
605
+ failure_count: int = Field(default=0, description="Total failed emissions")
606
+ last_error: str | None = Field(default=None, description="Last error message")
607
+ created_at: str = Field(..., description="Creation timestamp")
608
+ updated_at: str | None = Field(default=None, description="Last update timestamp")
609
+
610
+
611
+ class WebhookListResponse(BaseSchema):
612
+ """Response for listing webhooks."""
613
+
614
+ data: list[WebhookResponse] = Field(..., description="List of webhooks")
615
+ total: int = Field(..., description="Total number of webhooks")
616
+
617
+
618
+ class WebhookTestRequest(BaseSchema):
619
+ """Request to test a webhook connection."""
620
+
621
+ url: str = Field(
622
+ ...,
623
+ description="URL to test",
624
+ examples=["https://api.marquez.io/v1/lineage"],
625
+ )
626
+ headers: dict[str, str] = Field(
627
+ default_factory=dict,
628
+ description="Headers to include in test request",
629
+ )
630
+ api_key: str | None = Field(
631
+ default=None,
632
+ description="API key for authentication",
633
+ )
634
+ timeout_seconds: int = Field(
635
+ default=10,
636
+ ge=1,
637
+ le=60,
638
+ description="Test request timeout",
639
+ )
640
+
641
+
642
+ class WebhookTestResult(BaseSchema):
643
+ """Result of a webhook test."""
644
+
645
+ success: bool = Field(..., description="Whether the test was successful")
646
+ status_code: int | None = Field(default=None, description="HTTP status code")
647
+ response_time_ms: int | None = Field(default=None, description="Response time in ms")
648
+ error_message: str | None = Field(default=None, description="Error message if failed")
649
+ response_body: str | None = Field(default=None, description="Response body (truncated)")
650
+
651
+
652
+ # =============================================================================
653
+ # Dataset Namespace Helpers
654
+ # =============================================================================
655
+
656
+
657
+ def build_dataset_namespace(source_type: str, config: dict[str, Any] | None = None) -> str:
658
+ """Build a namespace URI from source configuration.
659
+
660
+ Args:
661
+ source_type: Type of data source.
662
+ config: Source configuration dictionary.
663
+
664
+ Returns:
665
+ Namespace URI string.
666
+ """
667
+ config = config or {}
668
+
669
+ if source_type == "file":
670
+ return f"file://{config.get('base_path', 'local')}"
671
+
672
+ if source_type == "postgresql":
673
+ host = config.get("host", "localhost")
674
+ port = config.get("port", 5432)
675
+ database = config.get("database", "")
676
+ return f"postgresql://{host}:{port}/{database}"
677
+
678
+ if source_type == "mysql":
679
+ host = config.get("host", "localhost")
680
+ port = config.get("port", 3306)
681
+ database = config.get("database", "")
682
+ return f"mysql://{host}:{port}/{database}"
683
+
684
+ if source_type == "snowflake":
685
+ account = config.get("account", "")
686
+ database = config.get("database", "")
687
+ return f"snowflake://{account}/{database}"
688
+
689
+ if source_type == "bigquery":
690
+ project = config.get("project", "")
691
+ dataset = config.get("dataset", "")
692
+ return f"bigquery://{project}.{dataset}"
693
+
694
+ if source_type == "redshift":
695
+ host = config.get("host", "")
696
+ database = config.get("database", "")
697
+ return f"redshift://{host}/{database}"
698
+
699
+ if source_type == "databricks":
700
+ workspace = config.get("workspace_url", "")
701
+ return f"databricks://{workspace}"
702
+
703
+ # Default namespace
704
+ return f"{source_type}://truthound"