truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +75 -86
- truthound_dashboard/api/anomaly.py +7 -13
- truthound_dashboard/api/cross_alerts.py +38 -52
- truthound_dashboard/api/drift.py +49 -59
- truthound_dashboard/api/drift_monitor.py +234 -79
- truthound_dashboard/api/enterprise_sampling.py +498 -0
- truthound_dashboard/api/history.py +57 -5
- truthound_dashboard/api/lineage.py +3 -48
- truthound_dashboard/api/maintenance.py +104 -49
- truthound_dashboard/api/mask.py +1 -2
- truthound_dashboard/api/middleware.py +2 -1
- truthound_dashboard/api/model_monitoring.py +435 -311
- truthound_dashboard/api/notifications.py +227 -191
- truthound_dashboard/api/notifications_advanced.py +21 -20
- truthound_dashboard/api/observability.py +586 -0
- truthound_dashboard/api/plugins.py +2 -433
- truthound_dashboard/api/profile.py +199 -37
- truthound_dashboard/api/quality_reporter.py +701 -0
- truthound_dashboard/api/reports.py +7 -16
- truthound_dashboard/api/router.py +66 -0
- truthound_dashboard/api/rule_suggestions.py +5 -5
- truthound_dashboard/api/scan.py +17 -19
- truthound_dashboard/api/schedules.py +85 -50
- truthound_dashboard/api/schema_evolution.py +6 -6
- truthound_dashboard/api/schema_watcher.py +667 -0
- truthound_dashboard/api/sources.py +98 -27
- truthound_dashboard/api/tiering.py +1323 -0
- truthound_dashboard/api/triggers.py +14 -11
- truthound_dashboard/api/validations.py +12 -11
- truthound_dashboard/api/versioning.py +1 -6
- truthound_dashboard/core/__init__.py +129 -3
- truthound_dashboard/core/actions/__init__.py +62 -0
- truthound_dashboard/core/actions/custom.py +426 -0
- truthound_dashboard/core/actions/notifications.py +910 -0
- truthound_dashboard/core/actions/storage.py +472 -0
- truthound_dashboard/core/actions/webhook.py +281 -0
- truthound_dashboard/core/anomaly.py +262 -67
- truthound_dashboard/core/anomaly_explainer.py +4 -3
- truthound_dashboard/core/backends/__init__.py +67 -0
- truthound_dashboard/core/backends/base.py +299 -0
- truthound_dashboard/core/backends/errors.py +191 -0
- truthound_dashboard/core/backends/factory.py +423 -0
- truthound_dashboard/core/backends/mock_backend.py +451 -0
- truthound_dashboard/core/backends/truthound_backend.py +718 -0
- truthound_dashboard/core/checkpoint/__init__.py +87 -0
- truthound_dashboard/core/checkpoint/adapters.py +814 -0
- truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
- truthound_dashboard/core/checkpoint/runner.py +270 -0
- truthound_dashboard/core/connections.py +645 -23
- truthound_dashboard/core/converters/__init__.py +14 -0
- truthound_dashboard/core/converters/truthound.py +620 -0
- truthound_dashboard/core/cross_alerts.py +540 -320
- truthound_dashboard/core/datasource_factory.py +1672 -0
- truthound_dashboard/core/drift_monitor.py +216 -20
- truthound_dashboard/core/enterprise_sampling.py +1291 -0
- truthound_dashboard/core/interfaces/__init__.py +225 -0
- truthound_dashboard/core/interfaces/actions.py +652 -0
- truthound_dashboard/core/interfaces/base.py +247 -0
- truthound_dashboard/core/interfaces/checkpoint.py +676 -0
- truthound_dashboard/core/interfaces/protocols.py +664 -0
- truthound_dashboard/core/interfaces/reporters.py +650 -0
- truthound_dashboard/core/interfaces/routing.py +646 -0
- truthound_dashboard/core/interfaces/triggers.py +619 -0
- truthound_dashboard/core/lineage.py +407 -71
- truthound_dashboard/core/model_monitoring.py +431 -3
- truthound_dashboard/core/notifications/base.py +4 -0
- truthound_dashboard/core/notifications/channels.py +501 -1203
- truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
- truthound_dashboard/core/notifications/deduplication/service.py +131 -348
- truthound_dashboard/core/notifications/dispatcher.py +202 -11
- truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
- truthound_dashboard/core/notifications/escalation/engine.py +168 -358
- truthound_dashboard/core/notifications/routing/__init__.py +88 -128
- truthound_dashboard/core/notifications/routing/engine.py +90 -317
- truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
- truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
- truthound_dashboard/core/notifications/throttling/builder.py +117 -255
- truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
- truthound_dashboard/core/phase5/collaboration.py +1 -1
- truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
- truthound_dashboard/core/quality_reporter.py +1359 -0
- truthound_dashboard/core/report_history.py +0 -6
- truthound_dashboard/core/reporters/__init__.py +175 -14
- truthound_dashboard/core/reporters/adapters.py +943 -0
- truthound_dashboard/core/reporters/base.py +0 -3
- truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
- truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
- truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
- truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
- truthound_dashboard/core/reporters/compat.py +266 -0
- truthound_dashboard/core/reporters/csv_reporter.py +2 -35
- truthound_dashboard/core/reporters/factory.py +526 -0
- truthound_dashboard/core/reporters/interfaces.py +745 -0
- truthound_dashboard/core/reporters/registry.py +1 -10
- truthound_dashboard/core/scheduler.py +165 -0
- truthound_dashboard/core/schema_evolution.py +3 -3
- truthound_dashboard/core/schema_watcher.py +1528 -0
- truthound_dashboard/core/services.py +595 -76
- truthound_dashboard/core/store_manager.py +810 -0
- truthound_dashboard/core/streaming_anomaly.py +169 -4
- truthound_dashboard/core/tiering.py +1309 -0
- truthound_dashboard/core/triggers/evaluators.py +178 -8
- truthound_dashboard/core/truthound_adapter.py +2620 -197
- truthound_dashboard/core/unified_alerts.py +23 -20
- truthound_dashboard/db/__init__.py +8 -0
- truthound_dashboard/db/database.py +8 -2
- truthound_dashboard/db/models.py +944 -25
- truthound_dashboard/db/repository.py +2 -0
- truthound_dashboard/main.py +15 -0
- truthound_dashboard/schemas/__init__.py +177 -16
- truthound_dashboard/schemas/base.py +44 -23
- truthound_dashboard/schemas/collaboration.py +19 -6
- truthound_dashboard/schemas/cross_alerts.py +19 -3
- truthound_dashboard/schemas/drift.py +61 -55
- truthound_dashboard/schemas/drift_monitor.py +67 -23
- truthound_dashboard/schemas/enterprise_sampling.py +653 -0
- truthound_dashboard/schemas/lineage.py +0 -33
- truthound_dashboard/schemas/mask.py +10 -8
- truthound_dashboard/schemas/model_monitoring.py +89 -10
- truthound_dashboard/schemas/notifications_advanced.py +13 -0
- truthound_dashboard/schemas/observability.py +453 -0
- truthound_dashboard/schemas/plugins.py +0 -280
- truthound_dashboard/schemas/profile.py +154 -247
- truthound_dashboard/schemas/quality_reporter.py +403 -0
- truthound_dashboard/schemas/reports.py +2 -2
- truthound_dashboard/schemas/rule_suggestion.py +8 -1
- truthound_dashboard/schemas/scan.py +4 -24
- truthound_dashboard/schemas/schedule.py +11 -3
- truthound_dashboard/schemas/schema_watcher.py +727 -0
- truthound_dashboard/schemas/source.py +17 -2
- truthound_dashboard/schemas/tiering.py +822 -0
- truthound_dashboard/schemas/triggers.py +16 -0
- truthound_dashboard/schemas/unified_alerts.py +7 -0
- truthound_dashboard/schemas/validation.py +0 -13
- truthound_dashboard/schemas/validators/base.py +41 -21
- truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
- truthound_dashboard/schemas/validators/localization_validators.py +273 -0
- truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
- truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
- truthound_dashboard/schemas/validators/referential_validators.py +312 -0
- truthound_dashboard/schemas/validators/registry.py +93 -8
- truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
- truthound_dashboard/schemas/versioning.py +1 -6
- truthound_dashboard/static/index.html +2 -2
- truthound_dashboard-1.5.1.dist-info/METADATA +312 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/RECORD +149 -148
- truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
- truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
- truthound_dashboard/core/plugins/hooks/manager.py +0 -403
- truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
- truthound_dashboard/core/reporters/junit_reporter.py +0 -233
- truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
- truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
- truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
- truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
- truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
- truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
- truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
- truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
- truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
- truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
- truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
- truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
- truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
- truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
- truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
- truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
- truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
- truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
- truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
- truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
- truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
- truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
- truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
- truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
- truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
- truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
- truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
- truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
- truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
- truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
- truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
- truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
- truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
- truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
- truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
- truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
- truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
- truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
- truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
- truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
- truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
- truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
- truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
- truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
- truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
- truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
- truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
- truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
- truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
- truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -30,18 +30,39 @@ from urllib.parse import quote_plus
|
|
|
30
30
|
class SourceType(str, Enum):
|
|
31
31
|
"""Supported data source types."""
|
|
32
32
|
|
|
33
|
+
# File-based
|
|
33
34
|
FILE = "file"
|
|
35
|
+
CSV = "csv"
|
|
36
|
+
PARQUET = "parquet"
|
|
37
|
+
JSON = "json"
|
|
38
|
+
NDJSON = "ndjson"
|
|
39
|
+
JSONL = "jsonl"
|
|
40
|
+
|
|
41
|
+
# Core SQL
|
|
34
42
|
POSTGRESQL = "postgresql"
|
|
35
43
|
MYSQL = "mysql"
|
|
36
44
|
SQLITE = "sqlite"
|
|
45
|
+
|
|
46
|
+
# Cloud Data Warehouses
|
|
37
47
|
SNOWFLAKE = "snowflake"
|
|
38
48
|
BIGQUERY = "bigquery"
|
|
39
49
|
REDSHIFT = "redshift"
|
|
40
50
|
DATABRICKS = "databricks"
|
|
51
|
+
|
|
52
|
+
# Enterprise
|
|
41
53
|
ORACLE = "oracle"
|
|
42
54
|
SQLSERVER = "sqlserver"
|
|
55
|
+
|
|
56
|
+
# Big Data
|
|
43
57
|
SPARK = "spark"
|
|
44
58
|
|
|
59
|
+
# NoSQL
|
|
60
|
+
MONGODB = "mongodb"
|
|
61
|
+
ELASTICSEARCH = "elasticsearch"
|
|
62
|
+
|
|
63
|
+
# Streaming
|
|
64
|
+
KAFKA = "kafka"
|
|
65
|
+
|
|
45
66
|
|
|
46
67
|
class FieldType(str, Enum):
|
|
47
68
|
"""Field types for configuration forms."""
|
|
@@ -104,7 +125,7 @@ class SourceTypeDefinition:
|
|
|
104
125
|
name: str
|
|
105
126
|
description: str
|
|
106
127
|
icon: str
|
|
107
|
-
category: Literal["file", "database", "warehouse", "bigdata"]
|
|
128
|
+
category: Literal["file", "database", "warehouse", "bigdata", "nosql", "streaming"]
|
|
108
129
|
fields: list[FieldDefinition]
|
|
109
130
|
docs_url: str = ""
|
|
110
131
|
|
|
@@ -166,7 +187,7 @@ class FileConnectionBuilder(ConnectionBuilder):
|
|
|
166
187
|
"""Connection builder for file-based sources."""
|
|
167
188
|
|
|
168
189
|
source_type = SourceType.FILE
|
|
169
|
-
SUPPORTED_EXTENSIONS = {".csv", ".parquet", ".json", ".
|
|
190
|
+
SUPPORTED_EXTENSIONS = {".csv", ".parquet", ".json", ".ndjson", ".jsonl"}
|
|
170
191
|
|
|
171
192
|
def build(self, config: dict[str, Any]) -> str:
|
|
172
193
|
"""Build file path from config."""
|
|
@@ -195,7 +216,7 @@ class FileConnectionBuilder(ConnectionBuilder):
|
|
|
195
216
|
return SourceTypeDefinition(
|
|
196
217
|
type=SourceType.FILE.value,
|
|
197
218
|
name="File",
|
|
198
|
-
description="Local file (CSV, Parquet, JSON,
|
|
219
|
+
description="Local file (CSV, Parquet, JSON, NDJSON, JSONL)",
|
|
199
220
|
icon="file",
|
|
200
221
|
category="file",
|
|
201
222
|
fields=[
|
|
@@ -216,7 +237,8 @@ class FileConnectionBuilder(ConnectionBuilder):
|
|
|
216
237
|
{"value": "csv", "label": "CSV"},
|
|
217
238
|
{"value": "parquet", "label": "Parquet"},
|
|
218
239
|
{"value": "json", "label": "JSON"},
|
|
219
|
-
{"value": "
|
|
240
|
+
{"value": "ndjson", "label": "NDJSON"},
|
|
241
|
+
{"value": "jsonl", "label": "JSONL"},
|
|
220
242
|
],
|
|
221
243
|
default="auto",
|
|
222
244
|
description="File format (auto-detected from extension if not specified)",
|
|
@@ -250,13 +272,195 @@ class FileConnectionBuilder(ConnectionBuilder):
|
|
|
250
272
|
default=True,
|
|
251
273
|
description="First row contains column names",
|
|
252
274
|
),
|
|
275
|
+
],
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
class CSVConnectionBuilder(FileConnectionBuilder):
|
|
280
|
+
"""Connection builder for CSV files."""
|
|
281
|
+
|
|
282
|
+
source_type = SourceType.CSV
|
|
283
|
+
|
|
284
|
+
@classmethod
|
|
285
|
+
def get_definition(cls) -> SourceTypeDefinition:
|
|
286
|
+
return SourceTypeDefinition(
|
|
287
|
+
type=SourceType.CSV.value,
|
|
288
|
+
name="CSV",
|
|
289
|
+
description="Comma-separated values file",
|
|
290
|
+
icon="file",
|
|
291
|
+
category="file",
|
|
292
|
+
fields=[
|
|
253
293
|
FieldDefinition(
|
|
254
|
-
name="
|
|
255
|
-
label="
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
294
|
+
name="path",
|
|
295
|
+
label="File Path",
|
|
296
|
+
type=FieldType.FILE_PATH,
|
|
297
|
+
required=True,
|
|
298
|
+
placeholder="/path/to/data.csv",
|
|
299
|
+
description="Path to the CSV file",
|
|
300
|
+
),
|
|
301
|
+
FieldDefinition(
|
|
302
|
+
name="delimiter",
|
|
303
|
+
label="Delimiter",
|
|
304
|
+
placeholder=",",
|
|
305
|
+
default=",",
|
|
306
|
+
description="CSV delimiter character",
|
|
307
|
+
),
|
|
308
|
+
FieldDefinition(
|
|
309
|
+
name="encoding",
|
|
310
|
+
label="Encoding",
|
|
311
|
+
type=FieldType.SELECT,
|
|
312
|
+
options=[
|
|
313
|
+
{"value": "utf-8", "label": "UTF-8"},
|
|
314
|
+
{"value": "utf-16", "label": "UTF-16"},
|
|
315
|
+
{"value": "iso-8859-1", "label": "ISO-8859-1 (Latin-1)"},
|
|
316
|
+
{"value": "cp1252", "label": "Windows-1252"},
|
|
317
|
+
],
|
|
318
|
+
default="utf-8",
|
|
319
|
+
description="File encoding",
|
|
320
|
+
),
|
|
321
|
+
FieldDefinition(
|
|
322
|
+
name="has_header",
|
|
323
|
+
label="Has Header Row",
|
|
324
|
+
type=FieldType.BOOLEAN,
|
|
325
|
+
default=True,
|
|
326
|
+
description="First row contains column names",
|
|
327
|
+
),
|
|
328
|
+
],
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
class ParquetConnectionBuilder(FileConnectionBuilder):
|
|
333
|
+
"""Connection builder for Parquet files."""
|
|
334
|
+
|
|
335
|
+
source_type = SourceType.PARQUET
|
|
336
|
+
|
|
337
|
+
@classmethod
|
|
338
|
+
def get_definition(cls) -> SourceTypeDefinition:
|
|
339
|
+
return SourceTypeDefinition(
|
|
340
|
+
type=SourceType.PARQUET.value,
|
|
341
|
+
name="Parquet",
|
|
342
|
+
description="Apache Parquet columnar storage file",
|
|
343
|
+
icon="file",
|
|
344
|
+
category="file",
|
|
345
|
+
fields=[
|
|
346
|
+
FieldDefinition(
|
|
347
|
+
name="path",
|
|
348
|
+
label="File Path",
|
|
349
|
+
type=FieldType.FILE_PATH,
|
|
350
|
+
required=True,
|
|
351
|
+
placeholder="/path/to/data.parquet",
|
|
352
|
+
description="Path to the Parquet file",
|
|
353
|
+
),
|
|
354
|
+
],
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
class JSONConnectionBuilder(FileConnectionBuilder):
|
|
359
|
+
"""Connection builder for JSON files."""
|
|
360
|
+
|
|
361
|
+
source_type = SourceType.JSON
|
|
362
|
+
|
|
363
|
+
@classmethod
|
|
364
|
+
def get_definition(cls) -> SourceTypeDefinition:
|
|
365
|
+
return SourceTypeDefinition(
|
|
366
|
+
type=SourceType.JSON.value,
|
|
367
|
+
name="JSON",
|
|
368
|
+
description="JSON file (array of objects)",
|
|
369
|
+
icon="file_json",
|
|
370
|
+
category="file",
|
|
371
|
+
fields=[
|
|
372
|
+
FieldDefinition(
|
|
373
|
+
name="path",
|
|
374
|
+
label="File Path",
|
|
375
|
+
type=FieldType.FILE_PATH,
|
|
376
|
+
required=True,
|
|
377
|
+
placeholder="/path/to/data.json",
|
|
378
|
+
description="Path to the JSON file",
|
|
379
|
+
),
|
|
380
|
+
FieldDefinition(
|
|
381
|
+
name="encoding",
|
|
382
|
+
label="Encoding",
|
|
383
|
+
type=FieldType.SELECT,
|
|
384
|
+
options=[
|
|
385
|
+
{"value": "utf-8", "label": "UTF-8"},
|
|
386
|
+
{"value": "utf-16", "label": "UTF-16"},
|
|
387
|
+
],
|
|
388
|
+
default="utf-8",
|
|
389
|
+
description="File encoding",
|
|
390
|
+
),
|
|
391
|
+
],
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
class NDJSONConnectionBuilder(FileConnectionBuilder):
|
|
396
|
+
"""Connection builder for NDJSON files."""
|
|
397
|
+
|
|
398
|
+
source_type = SourceType.NDJSON
|
|
399
|
+
|
|
400
|
+
@classmethod
|
|
401
|
+
def get_definition(cls) -> SourceTypeDefinition:
|
|
402
|
+
return SourceTypeDefinition(
|
|
403
|
+
type=SourceType.NDJSON.value,
|
|
404
|
+
name="NDJSON",
|
|
405
|
+
description="Newline-delimited JSON file",
|
|
406
|
+
icon="file_json",
|
|
407
|
+
category="file",
|
|
408
|
+
fields=[
|
|
409
|
+
FieldDefinition(
|
|
410
|
+
name="path",
|
|
411
|
+
label="File Path",
|
|
412
|
+
type=FieldType.FILE_PATH,
|
|
413
|
+
required=True,
|
|
414
|
+
placeholder="/path/to/data.ndjson",
|
|
415
|
+
description="Path to the NDJSON file",
|
|
416
|
+
),
|
|
417
|
+
FieldDefinition(
|
|
418
|
+
name="encoding",
|
|
419
|
+
label="Encoding",
|
|
420
|
+
type=FieldType.SELECT,
|
|
421
|
+
options=[
|
|
422
|
+
{"value": "utf-8", "label": "UTF-8"},
|
|
423
|
+
{"value": "utf-16", "label": "UTF-16"},
|
|
424
|
+
],
|
|
425
|
+
default="utf-8",
|
|
426
|
+
description="File encoding",
|
|
427
|
+
),
|
|
428
|
+
],
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
class JSONLConnectionBuilder(FileConnectionBuilder):
|
|
433
|
+
"""Connection builder for JSONL files."""
|
|
434
|
+
|
|
435
|
+
source_type = SourceType.JSONL
|
|
436
|
+
|
|
437
|
+
@classmethod
|
|
438
|
+
def get_definition(cls) -> SourceTypeDefinition:
|
|
439
|
+
return SourceTypeDefinition(
|
|
440
|
+
type=SourceType.JSONL.value,
|
|
441
|
+
name="JSONL",
|
|
442
|
+
description="JSON Lines file (one JSON object per line)",
|
|
443
|
+
icon="file_json",
|
|
444
|
+
category="file",
|
|
445
|
+
fields=[
|
|
446
|
+
FieldDefinition(
|
|
447
|
+
name="path",
|
|
448
|
+
label="File Path",
|
|
449
|
+
type=FieldType.FILE_PATH,
|
|
450
|
+
required=True,
|
|
451
|
+
placeholder="/path/to/data.jsonl",
|
|
452
|
+
description="Path to the JSONL file",
|
|
453
|
+
),
|
|
454
|
+
FieldDefinition(
|
|
455
|
+
name="encoding",
|
|
456
|
+
label="Encoding",
|
|
457
|
+
type=FieldType.SELECT,
|
|
458
|
+
options=[
|
|
459
|
+
{"value": "utf-8", "label": "UTF-8"},
|
|
460
|
+
{"value": "utf-16", "label": "UTF-16"},
|
|
461
|
+
],
|
|
462
|
+
default="utf-8",
|
|
463
|
+
description="File encoding",
|
|
260
464
|
),
|
|
261
465
|
],
|
|
262
466
|
)
|
|
@@ -1195,19 +1399,395 @@ class SparkConnectionBuilder(ConnectionBuilder):
|
|
|
1195
1399
|
)
|
|
1196
1400
|
|
|
1197
1401
|
|
|
1402
|
+
class MongoDBConnectionBuilder(ConnectionBuilder):
|
|
1403
|
+
"""Connection builder for MongoDB."""
|
|
1404
|
+
|
|
1405
|
+
source_type = SourceType.MONGODB
|
|
1406
|
+
|
|
1407
|
+
def build(self, config: dict[str, Any]) -> str:
|
|
1408
|
+
"""Build MongoDB connection string."""
|
|
1409
|
+
# Support direct connection string
|
|
1410
|
+
if config.get("connection_string"):
|
|
1411
|
+
return config["connection_string"]
|
|
1412
|
+
|
|
1413
|
+
host = config.get("host", "localhost")
|
|
1414
|
+
port = config.get("port", 27017)
|
|
1415
|
+
database = config.get("database", "")
|
|
1416
|
+
username = config.get("username", "")
|
|
1417
|
+
password = quote_plus(config.get("password", ""))
|
|
1418
|
+
|
|
1419
|
+
# Build connection string
|
|
1420
|
+
if username and password:
|
|
1421
|
+
conn = f"mongodb://{username}:{password}@{host}:{port}/{database}"
|
|
1422
|
+
else:
|
|
1423
|
+
conn = f"mongodb://{host}:{port}/{database}"
|
|
1424
|
+
|
|
1425
|
+
# Add options
|
|
1426
|
+
options = []
|
|
1427
|
+
if config.get("auth_source"):
|
|
1428
|
+
options.append(f"authSource={config['auth_source']}")
|
|
1429
|
+
if config.get("replica_set"):
|
|
1430
|
+
options.append(f"replicaSet={config['replica_set']}")
|
|
1431
|
+
if config.get("ssl"):
|
|
1432
|
+
options.append("ssl=true")
|
|
1433
|
+
|
|
1434
|
+
if options:
|
|
1435
|
+
conn += "?" + "&".join(options)
|
|
1436
|
+
|
|
1437
|
+
return conn
|
|
1438
|
+
|
|
1439
|
+
def validate_config(self, config: dict[str, Any]) -> list[str]:
|
|
1440
|
+
"""Validate MongoDB configuration."""
|
|
1441
|
+
errors = []
|
|
1442
|
+
|
|
1443
|
+
# Either connection_string or host is required
|
|
1444
|
+
if not config.get("connection_string") and not config.get("host"):
|
|
1445
|
+
errors.append("Either connection_string or host is required")
|
|
1446
|
+
|
|
1447
|
+
if not config.get("database"):
|
|
1448
|
+
errors.append("database is required")
|
|
1449
|
+
|
|
1450
|
+
if not config.get("collection"):
|
|
1451
|
+
errors.append("collection is required")
|
|
1452
|
+
|
|
1453
|
+
return errors
|
|
1454
|
+
|
|
1455
|
+
@classmethod
|
|
1456
|
+
def get_definition(cls) -> SourceTypeDefinition:
|
|
1457
|
+
"""Get the source type definition for UI rendering."""
|
|
1458
|
+
return SourceTypeDefinition(
|
|
1459
|
+
type=SourceType.MONGODB.value,
|
|
1460
|
+
name="MongoDB",
|
|
1461
|
+
description="MongoDB document database",
|
|
1462
|
+
icon="database",
|
|
1463
|
+
category="nosql",
|
|
1464
|
+
docs_url="https://www.mongodb.com/docs/",
|
|
1465
|
+
fields=[
|
|
1466
|
+
FieldDefinition(
|
|
1467
|
+
name="connection_string",
|
|
1468
|
+
label="Connection String",
|
|
1469
|
+
placeholder="mongodb://localhost:27017/mydb",
|
|
1470
|
+
description="Full MongoDB connection URI (alternative to individual fields)",
|
|
1471
|
+
),
|
|
1472
|
+
FieldDefinition(
|
|
1473
|
+
name="host",
|
|
1474
|
+
label="Host",
|
|
1475
|
+
placeholder="localhost",
|
|
1476
|
+
description="MongoDB server hostname or IP",
|
|
1477
|
+
),
|
|
1478
|
+
FieldDefinition(
|
|
1479
|
+
name="port",
|
|
1480
|
+
label="Port",
|
|
1481
|
+
type=FieldType.NUMBER,
|
|
1482
|
+
default=27017,
|
|
1483
|
+
min_value=1,
|
|
1484
|
+
max_value=65535,
|
|
1485
|
+
description="MongoDB server port",
|
|
1486
|
+
),
|
|
1487
|
+
FieldDefinition(
|
|
1488
|
+
name="database",
|
|
1489
|
+
label="Database",
|
|
1490
|
+
required=True,
|
|
1491
|
+
placeholder="mydb",
|
|
1492
|
+
description="Database name",
|
|
1493
|
+
),
|
|
1494
|
+
FieldDefinition(
|
|
1495
|
+
name="collection",
|
|
1496
|
+
label="Collection",
|
|
1497
|
+
required=True,
|
|
1498
|
+
placeholder="users",
|
|
1499
|
+
description="Collection name to validate",
|
|
1500
|
+
),
|
|
1501
|
+
FieldDefinition(
|
|
1502
|
+
name="username",
|
|
1503
|
+
label="Username",
|
|
1504
|
+
description="Database username",
|
|
1505
|
+
),
|
|
1506
|
+
FieldDefinition(
|
|
1507
|
+
name="password",
|
|
1508
|
+
label="Password",
|
|
1509
|
+
type=FieldType.PASSWORD,
|
|
1510
|
+
description="Database password",
|
|
1511
|
+
),
|
|
1512
|
+
FieldDefinition(
|
|
1513
|
+
name="auth_source",
|
|
1514
|
+
label="Auth Source",
|
|
1515
|
+
placeholder="admin",
|
|
1516
|
+
default="admin",
|
|
1517
|
+
description="Authentication database",
|
|
1518
|
+
),
|
|
1519
|
+
FieldDefinition(
|
|
1520
|
+
name="replica_set",
|
|
1521
|
+
label="Replica Set",
|
|
1522
|
+
placeholder="rs0",
|
|
1523
|
+
description="Replica set name (for replica set connections)",
|
|
1524
|
+
),
|
|
1525
|
+
FieldDefinition(
|
|
1526
|
+
name="ssl",
|
|
1527
|
+
label="Use SSL/TLS",
|
|
1528
|
+
type=FieldType.BOOLEAN,
|
|
1529
|
+
default=False,
|
|
1530
|
+
description="Enable SSL/TLS connection",
|
|
1531
|
+
),
|
|
1532
|
+
],
|
|
1533
|
+
)
|
|
1534
|
+
|
|
1535
|
+
|
|
1536
|
+
class ElasticsearchConnectionBuilder(ConnectionBuilder):
|
|
1537
|
+
"""Connection builder for Elasticsearch."""
|
|
1538
|
+
|
|
1539
|
+
source_type = SourceType.ELASTICSEARCH
|
|
1540
|
+
|
|
1541
|
+
def build(self, config: dict[str, Any]) -> str:
|
|
1542
|
+
"""Build Elasticsearch connection URL."""
|
|
1543
|
+
hosts = config.get("hosts", config.get("host", "localhost"))
|
|
1544
|
+
port = config.get("port", 9200)
|
|
1545
|
+
scheme = "https" if config.get("use_ssl") else "http"
|
|
1546
|
+
|
|
1547
|
+
# Handle multiple hosts
|
|
1548
|
+
if isinstance(hosts, list):
|
|
1549
|
+
return ",".join([f"{scheme}://{h}:{port}" for h in hosts])
|
|
1550
|
+
|
|
1551
|
+
return f"{scheme}://{hosts}:{port}"
|
|
1552
|
+
|
|
1553
|
+
def validate_config(self, config: dict[str, Any]) -> list[str]:
|
|
1554
|
+
"""Validate Elasticsearch configuration."""
|
|
1555
|
+
errors = []
|
|
1556
|
+
|
|
1557
|
+
if not config.get("hosts") and not config.get("host"):
|
|
1558
|
+
errors.append("Either hosts or host is required")
|
|
1559
|
+
|
|
1560
|
+
if not config.get("index"):
|
|
1561
|
+
errors.append("index is required")
|
|
1562
|
+
|
|
1563
|
+
return errors
|
|
1564
|
+
|
|
1565
|
+
@classmethod
|
|
1566
|
+
def get_definition(cls) -> SourceTypeDefinition:
|
|
1567
|
+
"""Get the source type definition for UI rendering."""
|
|
1568
|
+
return SourceTypeDefinition(
|
|
1569
|
+
type=SourceType.ELASTICSEARCH.value,
|
|
1570
|
+
name="Elasticsearch",
|
|
1571
|
+
description="Elasticsearch search engine",
|
|
1572
|
+
icon="search",
|
|
1573
|
+
category="nosql",
|
|
1574
|
+
docs_url="https://www.elastic.co/guide/en/elasticsearch/reference/current/",
|
|
1575
|
+
fields=[
|
|
1576
|
+
FieldDefinition(
|
|
1577
|
+
name="host",
|
|
1578
|
+
label="Host",
|
|
1579
|
+
required=True,
|
|
1580
|
+
placeholder="localhost",
|
|
1581
|
+
description="Elasticsearch server hostname or IP",
|
|
1582
|
+
),
|
|
1583
|
+
FieldDefinition(
|
|
1584
|
+
name="port",
|
|
1585
|
+
label="Port",
|
|
1586
|
+
type=FieldType.NUMBER,
|
|
1587
|
+
default=9200,
|
|
1588
|
+
min_value=1,
|
|
1589
|
+
max_value=65535,
|
|
1590
|
+
description="Elasticsearch server port",
|
|
1591
|
+
),
|
|
1592
|
+
FieldDefinition(
|
|
1593
|
+
name="index",
|
|
1594
|
+
label="Index",
|
|
1595
|
+
required=True,
|
|
1596
|
+
placeholder="my_index",
|
|
1597
|
+
description="Index name to validate",
|
|
1598
|
+
),
|
|
1599
|
+
FieldDefinition(
|
|
1600
|
+
name="username",
|
|
1601
|
+
label="Username",
|
|
1602
|
+
description="Elasticsearch username",
|
|
1603
|
+
),
|
|
1604
|
+
FieldDefinition(
|
|
1605
|
+
name="password",
|
|
1606
|
+
label="Password",
|
|
1607
|
+
type=FieldType.PASSWORD,
|
|
1608
|
+
description="Elasticsearch password",
|
|
1609
|
+
),
|
|
1610
|
+
FieldDefinition(
|
|
1611
|
+
name="api_key",
|
|
1612
|
+
label="API Key",
|
|
1613
|
+
type=FieldType.PASSWORD,
|
|
1614
|
+
description="API key for authentication (alternative to username/password)",
|
|
1615
|
+
),
|
|
1616
|
+
FieldDefinition(
|
|
1617
|
+
name="use_ssl",
|
|
1618
|
+
label="Use SSL/TLS",
|
|
1619
|
+
type=FieldType.BOOLEAN,
|
|
1620
|
+
default=True,
|
|
1621
|
+
description="Enable SSL/TLS connection",
|
|
1622
|
+
),
|
|
1623
|
+
FieldDefinition(
|
|
1624
|
+
name="verify_certs",
|
|
1625
|
+
label="Verify Certificates",
|
|
1626
|
+
type=FieldType.BOOLEAN,
|
|
1627
|
+
default=True,
|
|
1628
|
+
description="Verify SSL certificates",
|
|
1629
|
+
),
|
|
1630
|
+
FieldDefinition(
|
|
1631
|
+
name="cloud_id",
|
|
1632
|
+
label="Cloud ID",
|
|
1633
|
+
placeholder="deployment:dXMtd2VzdC0...",
|
|
1634
|
+
description="Elastic Cloud deployment ID (for Elastic Cloud)",
|
|
1635
|
+
),
|
|
1636
|
+
],
|
|
1637
|
+
)
|
|
1638
|
+
|
|
1639
|
+
|
|
1640
|
+
class KafkaConnectionBuilder(ConnectionBuilder):
|
|
1641
|
+
"""Connection builder for Apache Kafka."""
|
|
1642
|
+
|
|
1643
|
+
source_type = SourceType.KAFKA
|
|
1644
|
+
|
|
1645
|
+
def build(self, config: dict[str, Any]) -> str:
|
|
1646
|
+
"""Build Kafka bootstrap servers string."""
|
|
1647
|
+
bootstrap_servers = config.get("bootstrap_servers", "localhost:9092")
|
|
1648
|
+
if isinstance(bootstrap_servers, list):
|
|
1649
|
+
return ",".join(bootstrap_servers)
|
|
1650
|
+
return bootstrap_servers
|
|
1651
|
+
|
|
1652
|
+
def validate_config(self, config: dict[str, Any]) -> list[str]:
|
|
1653
|
+
"""Validate Kafka configuration."""
|
|
1654
|
+
errors = []
|
|
1655
|
+
|
|
1656
|
+
if not config.get("bootstrap_servers"):
|
|
1657
|
+
errors.append("bootstrap_servers is required")
|
|
1658
|
+
|
|
1659
|
+
if not config.get("topic"):
|
|
1660
|
+
errors.append("topic is required")
|
|
1661
|
+
|
|
1662
|
+
return errors
|
|
1663
|
+
|
|
1664
|
+
@classmethod
|
|
1665
|
+
def get_definition(cls) -> SourceTypeDefinition:
|
|
1666
|
+
"""Get the source type definition for UI rendering."""
|
|
1667
|
+
return SourceTypeDefinition(
|
|
1668
|
+
type=SourceType.KAFKA.value,
|
|
1669
|
+
name="Apache Kafka",
|
|
1670
|
+
description="Apache Kafka streaming platform",
|
|
1671
|
+
icon="radio",
|
|
1672
|
+
category="streaming",
|
|
1673
|
+
docs_url="https://kafka.apache.org/documentation/",
|
|
1674
|
+
fields=[
|
|
1675
|
+
FieldDefinition(
|
|
1676
|
+
name="bootstrap_servers",
|
|
1677
|
+
label="Bootstrap Servers",
|
|
1678
|
+
required=True,
|
|
1679
|
+
placeholder="localhost:9092",
|
|
1680
|
+
description="Comma-separated list of Kafka broker addresses",
|
|
1681
|
+
),
|
|
1682
|
+
FieldDefinition(
|
|
1683
|
+
name="topic",
|
|
1684
|
+
label="Topic",
|
|
1685
|
+
required=True,
|
|
1686
|
+
placeholder="my_topic",
|
|
1687
|
+
description="Kafka topic to consume from",
|
|
1688
|
+
),
|
|
1689
|
+
FieldDefinition(
|
|
1690
|
+
name="group_id",
|
|
1691
|
+
label="Consumer Group ID",
|
|
1692
|
+
placeholder="truthound-validator",
|
|
1693
|
+
default="truthound-validator",
|
|
1694
|
+
description="Consumer group identifier",
|
|
1695
|
+
),
|
|
1696
|
+
FieldDefinition(
|
|
1697
|
+
name="auto_offset_reset",
|
|
1698
|
+
label="Auto Offset Reset",
|
|
1699
|
+
type=FieldType.SELECT,
|
|
1700
|
+
options=[
|
|
1701
|
+
{"value": "earliest", "label": "Earliest (from beginning)"},
|
|
1702
|
+
{"value": "latest", "label": "Latest (only new messages)"},
|
|
1703
|
+
],
|
|
1704
|
+
default="earliest",
|
|
1705
|
+
description="Where to start consuming if no offset is stored",
|
|
1706
|
+
),
|
|
1707
|
+
FieldDefinition(
|
|
1708
|
+
name="max_messages",
|
|
1709
|
+
label="Max Messages",
|
|
1710
|
+
type=FieldType.NUMBER,
|
|
1711
|
+
default=10000,
|
|
1712
|
+
min_value=1,
|
|
1713
|
+
max_value=1000000,
|
|
1714
|
+
description="Maximum number of messages to consume per batch",
|
|
1715
|
+
),
|
|
1716
|
+
FieldDefinition(
|
|
1717
|
+
name="security_protocol",
|
|
1718
|
+
label="Security Protocol",
|
|
1719
|
+
type=FieldType.SELECT,
|
|
1720
|
+
options=[
|
|
1721
|
+
{"value": "PLAINTEXT", "label": "Plaintext"},
|
|
1722
|
+
{"value": "SSL", "label": "SSL"},
|
|
1723
|
+
{"value": "SASL_PLAINTEXT", "label": "SASL Plaintext"},
|
|
1724
|
+
{"value": "SASL_SSL", "label": "SASL SSL"},
|
|
1725
|
+
],
|
|
1726
|
+
default="PLAINTEXT",
|
|
1727
|
+
description="Security protocol for broker communication",
|
|
1728
|
+
),
|
|
1729
|
+
FieldDefinition(
|
|
1730
|
+
name="sasl_mechanism",
|
|
1731
|
+
label="SASL Mechanism",
|
|
1732
|
+
type=FieldType.SELECT,
|
|
1733
|
+
options=[
|
|
1734
|
+
{"value": "PLAIN", "label": "PLAIN"},
|
|
1735
|
+
{"value": "SCRAM-SHA-256", "label": "SCRAM-SHA-256"},
|
|
1736
|
+
{"value": "SCRAM-SHA-512", "label": "SCRAM-SHA-512"},
|
|
1737
|
+
{"value": "OAUTHBEARER", "label": "OAuth Bearer"},
|
|
1738
|
+
],
|
|
1739
|
+
default="PLAIN",
|
|
1740
|
+
description="SASL authentication mechanism",
|
|
1741
|
+
depends_on="security_protocol",
|
|
1742
|
+
depends_value="SASL_SSL",
|
|
1743
|
+
),
|
|
1744
|
+
FieldDefinition(
|
|
1745
|
+
name="sasl_username",
|
|
1746
|
+
label="SASL Username",
|
|
1747
|
+
description="SASL authentication username",
|
|
1748
|
+
depends_on="security_protocol",
|
|
1749
|
+
depends_value="SASL_SSL",
|
|
1750
|
+
),
|
|
1751
|
+
FieldDefinition(
|
|
1752
|
+
name="sasl_password",
|
|
1753
|
+
label="SASL Password",
|
|
1754
|
+
type=FieldType.PASSWORD,
|
|
1755
|
+
description="SASL authentication password",
|
|
1756
|
+
depends_on="security_protocol",
|
|
1757
|
+
depends_value="SASL_SSL",
|
|
1758
|
+
),
|
|
1759
|
+
],
|
|
1760
|
+
)
|
|
1761
|
+
|
|
1762
|
+
|
|
1198
1763
|
# Registry of connection builders
|
|
1199
1764
|
CONNECTION_BUILDERS: dict[str, type[ConnectionBuilder]] = {
|
|
1765
|
+
# File-based
|
|
1200
1766
|
SourceType.FILE.value: FileConnectionBuilder,
|
|
1767
|
+
SourceType.CSV.value: CSVConnectionBuilder,
|
|
1768
|
+
SourceType.PARQUET.value: ParquetConnectionBuilder,
|
|
1769
|
+
SourceType.JSON.value: JSONConnectionBuilder,
|
|
1770
|
+
SourceType.NDJSON.value: NDJSONConnectionBuilder,
|
|
1771
|
+
SourceType.JSONL.value: JSONLConnectionBuilder,
|
|
1772
|
+
# Core SQL
|
|
1201
1773
|
SourceType.POSTGRESQL.value: PostgreSQLConnectionBuilder,
|
|
1202
1774
|
SourceType.MYSQL.value: MySQLConnectionBuilder,
|
|
1203
1775
|
SourceType.SQLITE.value: SQLiteConnectionBuilder,
|
|
1776
|
+
# Cloud Data Warehouses
|
|
1204
1777
|
SourceType.SNOWFLAKE.value: SnowflakeConnectionBuilder,
|
|
1205
1778
|
SourceType.BIGQUERY.value: BigQueryConnectionBuilder,
|
|
1206
1779
|
SourceType.REDSHIFT.value: RedshiftConnectionBuilder,
|
|
1207
1780
|
SourceType.DATABRICKS.value: DatabricksConnectionBuilder,
|
|
1781
|
+
# Enterprise
|
|
1208
1782
|
SourceType.ORACLE.value: OracleConnectionBuilder,
|
|
1209
1783
|
SourceType.SQLSERVER.value: SQLServerConnectionBuilder,
|
|
1784
|
+
# Big Data
|
|
1210
1785
|
SourceType.SPARK.value: SparkConnectionBuilder,
|
|
1786
|
+
# NoSQL
|
|
1787
|
+
SourceType.MONGODB.value: MongoDBConnectionBuilder,
|
|
1788
|
+
SourceType.ELASTICSEARCH.value: ElasticsearchConnectionBuilder,
|
|
1789
|
+
# Streaming
|
|
1790
|
+
SourceType.KAFKA.value: KafkaConnectionBuilder,
|
|
1211
1791
|
}
|
|
1212
1792
|
|
|
1213
1793
|
|
|
@@ -1256,6 +1836,10 @@ def build_connection_string(source_type: str, config: dict[str, Any]) -> str:
|
|
|
1256
1836
|
async def test_connection(source_type: str, config: dict[str, Any]) -> dict[str, Any]:
|
|
1257
1837
|
"""Test database connection.
|
|
1258
1838
|
|
|
1839
|
+
Updated for truthound 2.x API:
|
|
1840
|
+
- Uses DataSourceFactory with new datasource API
|
|
1841
|
+
- Delegates to datasource_factory.test_connection for better reuse
|
|
1842
|
+
|
|
1259
1843
|
Args:
|
|
1260
1844
|
source_type: Type of data source.
|
|
1261
1845
|
config: Source-specific configuration.
|
|
@@ -1274,8 +1858,6 @@ async def test_connection(source_type: str, config: dict[str, Any]) -> dict[str,
|
|
|
1274
1858
|
"error": f"Configuration errors: {'; '.join(errors)}",
|
|
1275
1859
|
}
|
|
1276
1860
|
|
|
1277
|
-
connection_string = builder.build(config)
|
|
1278
|
-
|
|
1279
1861
|
if source_type == "file":
|
|
1280
1862
|
# For files, just check if path exists
|
|
1281
1863
|
path = Path(config["path"])
|
|
@@ -1286,15 +1868,39 @@ async def test_connection(source_type: str, config: dict[str, Any]) -> dict[str,
|
|
|
1286
1868
|
"message": f"File exists: {path.name} ({path.stat().st_size:,} bytes)",
|
|
1287
1869
|
}
|
|
1288
1870
|
|
|
1289
|
-
#
|
|
1290
|
-
import
|
|
1871
|
+
# Use new DataSourceFactory test_connection for database sources
|
|
1872
|
+
from .datasource_factory import test_connection as factory_test_connection
|
|
1291
1873
|
|
|
1292
|
-
#
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1874
|
+
# Build SourceConfig-compatible dict
|
|
1875
|
+
full_config = {"type": source_type, **config}
|
|
1876
|
+
|
|
1877
|
+
# Map field names from connections to datasource_factory
|
|
1878
|
+
field_mapping = {
|
|
1879
|
+
"username": "user", # connections uses username, factory uses user
|
|
1297
1880
|
}
|
|
1881
|
+
for old_key, new_key in field_mapping.items():
|
|
1882
|
+
if old_key in full_config and new_key not in full_config:
|
|
1883
|
+
full_config[new_key] = full_config[old_key]
|
|
1884
|
+
|
|
1885
|
+
result = await factory_test_connection(full_config)
|
|
1886
|
+
|
|
1887
|
+
# Convert result format
|
|
1888
|
+
if result["success"]:
|
|
1889
|
+
metadata = result.get("metadata", {})
|
|
1890
|
+
row_count = metadata.get("row_count", "unknown")
|
|
1891
|
+
columns = metadata.get("columns", [])
|
|
1892
|
+
col_count = len(columns) if columns else "unknown"
|
|
1893
|
+
|
|
1894
|
+
return {
|
|
1895
|
+
"success": True,
|
|
1896
|
+
"message": f"Connected! Found {col_count} columns, {row_count} rows",
|
|
1897
|
+
"metadata": metadata,
|
|
1898
|
+
}
|
|
1899
|
+
else:
|
|
1900
|
+
return {
|
|
1901
|
+
"success": False,
|
|
1902
|
+
"error": result.get("message", "Connection failed"),
|
|
1903
|
+
}
|
|
1298
1904
|
|
|
1299
1905
|
except ImportError:
|
|
1300
1906
|
return {"success": False, "error": "truthound package not available"}
|
|
@@ -1312,11 +1918,18 @@ def get_supported_source_types() -> list[dict[str, Any]]:
|
|
|
1312
1918
|
List of source type definitions.
|
|
1313
1919
|
"""
|
|
1314
1920
|
result = []
|
|
1921
|
+
seen_types: set[str] = set()
|
|
1315
1922
|
for source_type in SourceType:
|
|
1923
|
+
# Skip generic FILE type - specific format types (CSV, Parquet, etc.) cover it
|
|
1924
|
+
if source_type == SourceType.FILE:
|
|
1925
|
+
continue
|
|
1316
1926
|
builder_class = CONNECTION_BUILDERS.get(source_type.value)
|
|
1317
1927
|
if builder_class:
|
|
1318
1928
|
definition = builder_class.get_definition()
|
|
1319
|
-
|
|
1929
|
+
# Deduplicate by type value
|
|
1930
|
+
if definition.type not in seen_types:
|
|
1931
|
+
seen_types.add(definition.type)
|
|
1932
|
+
result.append(definition.to_dict())
|
|
1320
1933
|
return result
|
|
1321
1934
|
|
|
1322
1935
|
|
|
@@ -1331,6 +1944,8 @@ def get_source_type_categories() -> list[dict[str, str]]:
|
|
|
1331
1944
|
{"value": "database", "label": "Databases", "description": "Relational databases"},
|
|
1332
1945
|
{"value": "warehouse", "label": "Data Warehouses", "description": "Cloud data warehouses"},
|
|
1333
1946
|
{"value": "bigdata", "label": "Big Data", "description": "Big data platforms"},
|
|
1947
|
+
{"value": "nosql", "label": "NoSQL", "description": "Document and search databases"},
|
|
1948
|
+
{"value": "streaming", "label": "Streaming", "description": "Streaming data platforms"},
|
|
1334
1949
|
]
|
|
1335
1950
|
|
|
1336
1951
|
|
|
@@ -1345,14 +1960,21 @@ def get_source_types_by_category() -> dict[str, list[dict[str, Any]]]:
|
|
|
1345
1960
|
"database": [],
|
|
1346
1961
|
"warehouse": [],
|
|
1347
1962
|
"bigdata": [],
|
|
1963
|
+
"nosql": [],
|
|
1964
|
+
"streaming": [],
|
|
1348
1965
|
}
|
|
1349
1966
|
|
|
1967
|
+
seen_types: set[str] = set()
|
|
1350
1968
|
for source_type in SourceType:
|
|
1969
|
+
if source_type == SourceType.FILE:
|
|
1970
|
+
continue
|
|
1351
1971
|
builder_class = CONNECTION_BUILDERS.get(source_type.value)
|
|
1352
1972
|
if builder_class:
|
|
1353
1973
|
definition = builder_class.get_definition()
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1974
|
+
if definition.type not in seen_types:
|
|
1975
|
+
seen_types.add(definition.type)
|
|
1976
|
+
category = definition.category
|
|
1977
|
+
if category in categories:
|
|
1978
|
+
categories[category].append(definition.to_dict())
|
|
1357
1979
|
|
|
1358
1980
|
return categories
|