truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. truthound_dashboard/api/alerts.py +75 -86
  2. truthound_dashboard/api/anomaly.py +7 -13
  3. truthound_dashboard/api/cross_alerts.py +38 -52
  4. truthound_dashboard/api/drift.py +49 -59
  5. truthound_dashboard/api/drift_monitor.py +234 -79
  6. truthound_dashboard/api/enterprise_sampling.py +498 -0
  7. truthound_dashboard/api/history.py +57 -5
  8. truthound_dashboard/api/lineage.py +3 -48
  9. truthound_dashboard/api/maintenance.py +104 -49
  10. truthound_dashboard/api/mask.py +1 -2
  11. truthound_dashboard/api/middleware.py +2 -1
  12. truthound_dashboard/api/model_monitoring.py +435 -311
  13. truthound_dashboard/api/notifications.py +227 -191
  14. truthound_dashboard/api/notifications_advanced.py +21 -20
  15. truthound_dashboard/api/observability.py +586 -0
  16. truthound_dashboard/api/plugins.py +2 -433
  17. truthound_dashboard/api/profile.py +199 -37
  18. truthound_dashboard/api/quality_reporter.py +701 -0
  19. truthound_dashboard/api/reports.py +7 -16
  20. truthound_dashboard/api/router.py +66 -0
  21. truthound_dashboard/api/rule_suggestions.py +5 -5
  22. truthound_dashboard/api/scan.py +17 -19
  23. truthound_dashboard/api/schedules.py +85 -50
  24. truthound_dashboard/api/schema_evolution.py +6 -6
  25. truthound_dashboard/api/schema_watcher.py +667 -0
  26. truthound_dashboard/api/sources.py +98 -27
  27. truthound_dashboard/api/tiering.py +1323 -0
  28. truthound_dashboard/api/triggers.py +14 -11
  29. truthound_dashboard/api/validations.py +12 -11
  30. truthound_dashboard/api/versioning.py +1 -6
  31. truthound_dashboard/core/__init__.py +129 -3
  32. truthound_dashboard/core/actions/__init__.py +62 -0
  33. truthound_dashboard/core/actions/custom.py +426 -0
  34. truthound_dashboard/core/actions/notifications.py +910 -0
  35. truthound_dashboard/core/actions/storage.py +472 -0
  36. truthound_dashboard/core/actions/webhook.py +281 -0
  37. truthound_dashboard/core/anomaly.py +262 -67
  38. truthound_dashboard/core/anomaly_explainer.py +4 -3
  39. truthound_dashboard/core/backends/__init__.py +67 -0
  40. truthound_dashboard/core/backends/base.py +299 -0
  41. truthound_dashboard/core/backends/errors.py +191 -0
  42. truthound_dashboard/core/backends/factory.py +423 -0
  43. truthound_dashboard/core/backends/mock_backend.py +451 -0
  44. truthound_dashboard/core/backends/truthound_backend.py +718 -0
  45. truthound_dashboard/core/checkpoint/__init__.py +87 -0
  46. truthound_dashboard/core/checkpoint/adapters.py +814 -0
  47. truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
  48. truthound_dashboard/core/checkpoint/runner.py +270 -0
  49. truthound_dashboard/core/connections.py +645 -23
  50. truthound_dashboard/core/converters/__init__.py +14 -0
  51. truthound_dashboard/core/converters/truthound.py +620 -0
  52. truthound_dashboard/core/cross_alerts.py +540 -320
  53. truthound_dashboard/core/datasource_factory.py +1672 -0
  54. truthound_dashboard/core/drift_monitor.py +216 -20
  55. truthound_dashboard/core/enterprise_sampling.py +1291 -0
  56. truthound_dashboard/core/interfaces/__init__.py +225 -0
  57. truthound_dashboard/core/interfaces/actions.py +652 -0
  58. truthound_dashboard/core/interfaces/base.py +247 -0
  59. truthound_dashboard/core/interfaces/checkpoint.py +676 -0
  60. truthound_dashboard/core/interfaces/protocols.py +664 -0
  61. truthound_dashboard/core/interfaces/reporters.py +650 -0
  62. truthound_dashboard/core/interfaces/routing.py +646 -0
  63. truthound_dashboard/core/interfaces/triggers.py +619 -0
  64. truthound_dashboard/core/lineage.py +407 -71
  65. truthound_dashboard/core/model_monitoring.py +431 -3
  66. truthound_dashboard/core/notifications/base.py +4 -0
  67. truthound_dashboard/core/notifications/channels.py +501 -1203
  68. truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
  69. truthound_dashboard/core/notifications/deduplication/service.py +131 -348
  70. truthound_dashboard/core/notifications/dispatcher.py +202 -11
  71. truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
  72. truthound_dashboard/core/notifications/escalation/engine.py +168 -358
  73. truthound_dashboard/core/notifications/routing/__init__.py +88 -128
  74. truthound_dashboard/core/notifications/routing/engine.py +90 -317
  75. truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
  76. truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
  77. truthound_dashboard/core/notifications/throttling/builder.py +117 -255
  78. truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
  79. truthound_dashboard/core/phase5/collaboration.py +1 -1
  80. truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
  81. truthound_dashboard/core/quality_reporter.py +1359 -0
  82. truthound_dashboard/core/report_history.py +0 -6
  83. truthound_dashboard/core/reporters/__init__.py +175 -14
  84. truthound_dashboard/core/reporters/adapters.py +943 -0
  85. truthound_dashboard/core/reporters/base.py +0 -3
  86. truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
  87. truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
  88. truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
  89. truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
  90. truthound_dashboard/core/reporters/compat.py +266 -0
  91. truthound_dashboard/core/reporters/csv_reporter.py +2 -35
  92. truthound_dashboard/core/reporters/factory.py +526 -0
  93. truthound_dashboard/core/reporters/interfaces.py +745 -0
  94. truthound_dashboard/core/reporters/registry.py +1 -10
  95. truthound_dashboard/core/scheduler.py +165 -0
  96. truthound_dashboard/core/schema_evolution.py +3 -3
  97. truthound_dashboard/core/schema_watcher.py +1528 -0
  98. truthound_dashboard/core/services.py +595 -76
  99. truthound_dashboard/core/store_manager.py +810 -0
  100. truthound_dashboard/core/streaming_anomaly.py +169 -4
  101. truthound_dashboard/core/tiering.py +1309 -0
  102. truthound_dashboard/core/triggers/evaluators.py +178 -8
  103. truthound_dashboard/core/truthound_adapter.py +2620 -197
  104. truthound_dashboard/core/unified_alerts.py +23 -20
  105. truthound_dashboard/db/__init__.py +8 -0
  106. truthound_dashboard/db/database.py +8 -2
  107. truthound_dashboard/db/models.py +944 -25
  108. truthound_dashboard/db/repository.py +2 -0
  109. truthound_dashboard/main.py +15 -0
  110. truthound_dashboard/schemas/__init__.py +177 -16
  111. truthound_dashboard/schemas/base.py +44 -23
  112. truthound_dashboard/schemas/collaboration.py +19 -6
  113. truthound_dashboard/schemas/cross_alerts.py +19 -3
  114. truthound_dashboard/schemas/drift.py +61 -55
  115. truthound_dashboard/schemas/drift_monitor.py +67 -23
  116. truthound_dashboard/schemas/enterprise_sampling.py +653 -0
  117. truthound_dashboard/schemas/lineage.py +0 -33
  118. truthound_dashboard/schemas/mask.py +10 -8
  119. truthound_dashboard/schemas/model_monitoring.py +89 -10
  120. truthound_dashboard/schemas/notifications_advanced.py +13 -0
  121. truthound_dashboard/schemas/observability.py +453 -0
  122. truthound_dashboard/schemas/plugins.py +0 -280
  123. truthound_dashboard/schemas/profile.py +154 -247
  124. truthound_dashboard/schemas/quality_reporter.py +403 -0
  125. truthound_dashboard/schemas/reports.py +2 -2
  126. truthound_dashboard/schemas/rule_suggestion.py +8 -1
  127. truthound_dashboard/schemas/scan.py +4 -24
  128. truthound_dashboard/schemas/schedule.py +11 -3
  129. truthound_dashboard/schemas/schema_watcher.py +727 -0
  130. truthound_dashboard/schemas/source.py +17 -2
  131. truthound_dashboard/schemas/tiering.py +822 -0
  132. truthound_dashboard/schemas/triggers.py +16 -0
  133. truthound_dashboard/schemas/unified_alerts.py +7 -0
  134. truthound_dashboard/schemas/validation.py +0 -13
  135. truthound_dashboard/schemas/validators/base.py +41 -21
  136. truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
  137. truthound_dashboard/schemas/validators/localization_validators.py +273 -0
  138. truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
  139. truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
  140. truthound_dashboard/schemas/validators/referential_validators.py +312 -0
  141. truthound_dashboard/schemas/validators/registry.py +93 -8
  142. truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
  143. truthound_dashboard/schemas/versioning.py +1 -6
  144. truthound_dashboard/static/index.html +2 -2
  145. truthound_dashboard-1.5.1.dist-info/METADATA +312 -0
  146. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/RECORD +149 -148
  147. truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
  148. truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
  149. truthound_dashboard/core/plugins/hooks/manager.py +0 -403
  150. truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
  151. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
  152. truthound_dashboard/core/reporters/junit_reporter.py +0 -233
  153. truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
  154. truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
  155. truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
  156. truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
  157. truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
  158. truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
  159. truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
  160. truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
  161. truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
  162. truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
  163. truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
  164. truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
  165. truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
  166. truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
  167. truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
  168. truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
  169. truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
  170. truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
  171. truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
  172. truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
  173. truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
  174. truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
  175. truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
  176. truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
  177. truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
  178. truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
  179. truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
  180. truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
  181. truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
  182. truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
  183. truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
  184. truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
  185. truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
  186. truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
  187. truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
  188. truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
  189. truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
  190. truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
  191. truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
  192. truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
  193. truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
  194. truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
  195. truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
  196. truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
  197. truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
  198. truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
  199. truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
  200. truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
  201. truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
  202. truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
  203. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/WHEEL +0 -0
  204. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/entry_points.txt +0 -0
  205. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -30,18 +30,39 @@ from urllib.parse import quote_plus
30
30
  class SourceType(str, Enum):
31
31
  """Supported data source types."""
32
32
 
33
+ # File-based
33
34
  FILE = "file"
35
+ CSV = "csv"
36
+ PARQUET = "parquet"
37
+ JSON = "json"
38
+ NDJSON = "ndjson"
39
+ JSONL = "jsonl"
40
+
41
+ # Core SQL
34
42
  POSTGRESQL = "postgresql"
35
43
  MYSQL = "mysql"
36
44
  SQLITE = "sqlite"
45
+
46
+ # Cloud Data Warehouses
37
47
  SNOWFLAKE = "snowflake"
38
48
  BIGQUERY = "bigquery"
39
49
  REDSHIFT = "redshift"
40
50
  DATABRICKS = "databricks"
51
+
52
+ # Enterprise
41
53
  ORACLE = "oracle"
42
54
  SQLSERVER = "sqlserver"
55
+
56
+ # Big Data
43
57
  SPARK = "spark"
44
58
 
59
+ # NoSQL
60
+ MONGODB = "mongodb"
61
+ ELASTICSEARCH = "elasticsearch"
62
+
63
+ # Streaming
64
+ KAFKA = "kafka"
65
+
45
66
 
46
67
  class FieldType(str, Enum):
47
68
  """Field types for configuration forms."""
@@ -104,7 +125,7 @@ class SourceTypeDefinition:
104
125
  name: str
105
126
  description: str
106
127
  icon: str
107
- category: Literal["file", "database", "warehouse", "bigdata"]
128
+ category: Literal["file", "database", "warehouse", "bigdata", "nosql", "streaming"]
108
129
  fields: list[FieldDefinition]
109
130
  docs_url: str = ""
110
131
 
@@ -166,7 +187,7 @@ class FileConnectionBuilder(ConnectionBuilder):
166
187
  """Connection builder for file-based sources."""
167
188
 
168
189
  source_type = SourceType.FILE
169
- SUPPORTED_EXTENSIONS = {".csv", ".parquet", ".json", ".xlsx", ".xls"}
190
+ SUPPORTED_EXTENSIONS = {".csv", ".parquet", ".json", ".ndjson", ".jsonl"}
170
191
 
171
192
  def build(self, config: dict[str, Any]) -> str:
172
193
  """Build file path from config."""
@@ -195,7 +216,7 @@ class FileConnectionBuilder(ConnectionBuilder):
195
216
  return SourceTypeDefinition(
196
217
  type=SourceType.FILE.value,
197
218
  name="File",
198
- description="Local file (CSV, Parquet, JSON, Excel)",
219
+ description="Local file (CSV, Parquet, JSON, NDJSON, JSONL)",
199
220
  icon="file",
200
221
  category="file",
201
222
  fields=[
@@ -216,7 +237,8 @@ class FileConnectionBuilder(ConnectionBuilder):
216
237
  {"value": "csv", "label": "CSV"},
217
238
  {"value": "parquet", "label": "Parquet"},
218
239
  {"value": "json", "label": "JSON"},
219
- {"value": "excel", "label": "Excel"},
240
+ {"value": "ndjson", "label": "NDJSON"},
241
+ {"value": "jsonl", "label": "JSONL"},
220
242
  ],
221
243
  default="auto",
222
244
  description="File format (auto-detected from extension if not specified)",
@@ -250,13 +272,195 @@ class FileConnectionBuilder(ConnectionBuilder):
250
272
  default=True,
251
273
  description="First row contains column names",
252
274
  ),
275
+ ],
276
+ )
277
+
278
+
279
+ class CSVConnectionBuilder(FileConnectionBuilder):
280
+ """Connection builder for CSV files."""
281
+
282
+ source_type = SourceType.CSV
283
+
284
+ @classmethod
285
+ def get_definition(cls) -> SourceTypeDefinition:
286
+ return SourceTypeDefinition(
287
+ type=SourceType.CSV.value,
288
+ name="CSV",
289
+ description="Comma-separated values file",
290
+ icon="file",
291
+ category="file",
292
+ fields=[
253
293
  FieldDefinition(
254
- name="sheet",
255
- label="Sheet Name",
256
- placeholder="Sheet1",
257
- description="Excel sheet name (for Excel files)",
258
- depends_on="format",
259
- depends_value="excel",
294
+ name="path",
295
+ label="File Path",
296
+ type=FieldType.FILE_PATH,
297
+ required=True,
298
+ placeholder="/path/to/data.csv",
299
+ description="Path to the CSV file",
300
+ ),
301
+ FieldDefinition(
302
+ name="delimiter",
303
+ label="Delimiter",
304
+ placeholder=",",
305
+ default=",",
306
+ description="CSV delimiter character",
307
+ ),
308
+ FieldDefinition(
309
+ name="encoding",
310
+ label="Encoding",
311
+ type=FieldType.SELECT,
312
+ options=[
313
+ {"value": "utf-8", "label": "UTF-8"},
314
+ {"value": "utf-16", "label": "UTF-16"},
315
+ {"value": "iso-8859-1", "label": "ISO-8859-1 (Latin-1)"},
316
+ {"value": "cp1252", "label": "Windows-1252"},
317
+ ],
318
+ default="utf-8",
319
+ description="File encoding",
320
+ ),
321
+ FieldDefinition(
322
+ name="has_header",
323
+ label="Has Header Row",
324
+ type=FieldType.BOOLEAN,
325
+ default=True,
326
+ description="First row contains column names",
327
+ ),
328
+ ],
329
+ )
330
+
331
+
332
+ class ParquetConnectionBuilder(FileConnectionBuilder):
333
+ """Connection builder for Parquet files."""
334
+
335
+ source_type = SourceType.PARQUET
336
+
337
+ @classmethod
338
+ def get_definition(cls) -> SourceTypeDefinition:
339
+ return SourceTypeDefinition(
340
+ type=SourceType.PARQUET.value,
341
+ name="Parquet",
342
+ description="Apache Parquet columnar storage file",
343
+ icon="file",
344
+ category="file",
345
+ fields=[
346
+ FieldDefinition(
347
+ name="path",
348
+ label="File Path",
349
+ type=FieldType.FILE_PATH,
350
+ required=True,
351
+ placeholder="/path/to/data.parquet",
352
+ description="Path to the Parquet file",
353
+ ),
354
+ ],
355
+ )
356
+
357
+
358
+ class JSONConnectionBuilder(FileConnectionBuilder):
359
+ """Connection builder for JSON files."""
360
+
361
+ source_type = SourceType.JSON
362
+
363
+ @classmethod
364
+ def get_definition(cls) -> SourceTypeDefinition:
365
+ return SourceTypeDefinition(
366
+ type=SourceType.JSON.value,
367
+ name="JSON",
368
+ description="JSON file (array of objects)",
369
+ icon="file_json",
370
+ category="file",
371
+ fields=[
372
+ FieldDefinition(
373
+ name="path",
374
+ label="File Path",
375
+ type=FieldType.FILE_PATH,
376
+ required=True,
377
+ placeholder="/path/to/data.json",
378
+ description="Path to the JSON file",
379
+ ),
380
+ FieldDefinition(
381
+ name="encoding",
382
+ label="Encoding",
383
+ type=FieldType.SELECT,
384
+ options=[
385
+ {"value": "utf-8", "label": "UTF-8"},
386
+ {"value": "utf-16", "label": "UTF-16"},
387
+ ],
388
+ default="utf-8",
389
+ description="File encoding",
390
+ ),
391
+ ],
392
+ )
393
+
394
+
395
+ class NDJSONConnectionBuilder(FileConnectionBuilder):
396
+ """Connection builder for NDJSON files."""
397
+
398
+ source_type = SourceType.NDJSON
399
+
400
+ @classmethod
401
+ def get_definition(cls) -> SourceTypeDefinition:
402
+ return SourceTypeDefinition(
403
+ type=SourceType.NDJSON.value,
404
+ name="NDJSON",
405
+ description="Newline-delimited JSON file",
406
+ icon="file_json",
407
+ category="file",
408
+ fields=[
409
+ FieldDefinition(
410
+ name="path",
411
+ label="File Path",
412
+ type=FieldType.FILE_PATH,
413
+ required=True,
414
+ placeholder="/path/to/data.ndjson",
415
+ description="Path to the NDJSON file",
416
+ ),
417
+ FieldDefinition(
418
+ name="encoding",
419
+ label="Encoding",
420
+ type=FieldType.SELECT,
421
+ options=[
422
+ {"value": "utf-8", "label": "UTF-8"},
423
+ {"value": "utf-16", "label": "UTF-16"},
424
+ ],
425
+ default="utf-8",
426
+ description="File encoding",
427
+ ),
428
+ ],
429
+ )
430
+
431
+
432
+ class JSONLConnectionBuilder(FileConnectionBuilder):
433
+ """Connection builder for JSONL files."""
434
+
435
+ source_type = SourceType.JSONL
436
+
437
+ @classmethod
438
+ def get_definition(cls) -> SourceTypeDefinition:
439
+ return SourceTypeDefinition(
440
+ type=SourceType.JSONL.value,
441
+ name="JSONL",
442
+ description="JSON Lines file (one JSON object per line)",
443
+ icon="file_json",
444
+ category="file",
445
+ fields=[
446
+ FieldDefinition(
447
+ name="path",
448
+ label="File Path",
449
+ type=FieldType.FILE_PATH,
450
+ required=True,
451
+ placeholder="/path/to/data.jsonl",
452
+ description="Path to the JSONL file",
453
+ ),
454
+ FieldDefinition(
455
+ name="encoding",
456
+ label="Encoding",
457
+ type=FieldType.SELECT,
458
+ options=[
459
+ {"value": "utf-8", "label": "UTF-8"},
460
+ {"value": "utf-16", "label": "UTF-16"},
461
+ ],
462
+ default="utf-8",
463
+ description="File encoding",
260
464
  ),
261
465
  ],
262
466
  )
@@ -1195,19 +1399,395 @@ class SparkConnectionBuilder(ConnectionBuilder):
1195
1399
  )
1196
1400
 
1197
1401
 
1402
+ class MongoDBConnectionBuilder(ConnectionBuilder):
1403
+ """Connection builder for MongoDB."""
1404
+
1405
+ source_type = SourceType.MONGODB
1406
+
1407
+ def build(self, config: dict[str, Any]) -> str:
1408
+ """Build MongoDB connection string."""
1409
+ # Support direct connection string
1410
+ if config.get("connection_string"):
1411
+ return config["connection_string"]
1412
+
1413
+ host = config.get("host", "localhost")
1414
+ port = config.get("port", 27017)
1415
+ database = config.get("database", "")
1416
+ username = config.get("username", "")
1417
+ password = quote_plus(config.get("password", ""))
1418
+
1419
+ # Build connection string
1420
+ if username and password:
1421
+ conn = f"mongodb://{username}:{password}@{host}:{port}/{database}"
1422
+ else:
1423
+ conn = f"mongodb://{host}:{port}/{database}"
1424
+
1425
+ # Add options
1426
+ options = []
1427
+ if config.get("auth_source"):
1428
+ options.append(f"authSource={config['auth_source']}")
1429
+ if config.get("replica_set"):
1430
+ options.append(f"replicaSet={config['replica_set']}")
1431
+ if config.get("ssl"):
1432
+ options.append("ssl=true")
1433
+
1434
+ if options:
1435
+ conn += "?" + "&".join(options)
1436
+
1437
+ return conn
1438
+
1439
+ def validate_config(self, config: dict[str, Any]) -> list[str]:
1440
+ """Validate MongoDB configuration."""
1441
+ errors = []
1442
+
1443
+ # Either connection_string or host is required
1444
+ if not config.get("connection_string") and not config.get("host"):
1445
+ errors.append("Either connection_string or host is required")
1446
+
1447
+ if not config.get("database"):
1448
+ errors.append("database is required")
1449
+
1450
+ if not config.get("collection"):
1451
+ errors.append("collection is required")
1452
+
1453
+ return errors
1454
+
1455
+ @classmethod
1456
+ def get_definition(cls) -> SourceTypeDefinition:
1457
+ """Get the source type definition for UI rendering."""
1458
+ return SourceTypeDefinition(
1459
+ type=SourceType.MONGODB.value,
1460
+ name="MongoDB",
1461
+ description="MongoDB document database",
1462
+ icon="database",
1463
+ category="nosql",
1464
+ docs_url="https://www.mongodb.com/docs/",
1465
+ fields=[
1466
+ FieldDefinition(
1467
+ name="connection_string",
1468
+ label="Connection String",
1469
+ placeholder="mongodb://localhost:27017/mydb",
1470
+ description="Full MongoDB connection URI (alternative to individual fields)",
1471
+ ),
1472
+ FieldDefinition(
1473
+ name="host",
1474
+ label="Host",
1475
+ placeholder="localhost",
1476
+ description="MongoDB server hostname or IP",
1477
+ ),
1478
+ FieldDefinition(
1479
+ name="port",
1480
+ label="Port",
1481
+ type=FieldType.NUMBER,
1482
+ default=27017,
1483
+ min_value=1,
1484
+ max_value=65535,
1485
+ description="MongoDB server port",
1486
+ ),
1487
+ FieldDefinition(
1488
+ name="database",
1489
+ label="Database",
1490
+ required=True,
1491
+ placeholder="mydb",
1492
+ description="Database name",
1493
+ ),
1494
+ FieldDefinition(
1495
+ name="collection",
1496
+ label="Collection",
1497
+ required=True,
1498
+ placeholder="users",
1499
+ description="Collection name to validate",
1500
+ ),
1501
+ FieldDefinition(
1502
+ name="username",
1503
+ label="Username",
1504
+ description="Database username",
1505
+ ),
1506
+ FieldDefinition(
1507
+ name="password",
1508
+ label="Password",
1509
+ type=FieldType.PASSWORD,
1510
+ description="Database password",
1511
+ ),
1512
+ FieldDefinition(
1513
+ name="auth_source",
1514
+ label="Auth Source",
1515
+ placeholder="admin",
1516
+ default="admin",
1517
+ description="Authentication database",
1518
+ ),
1519
+ FieldDefinition(
1520
+ name="replica_set",
1521
+ label="Replica Set",
1522
+ placeholder="rs0",
1523
+ description="Replica set name (for replica set connections)",
1524
+ ),
1525
+ FieldDefinition(
1526
+ name="ssl",
1527
+ label="Use SSL/TLS",
1528
+ type=FieldType.BOOLEAN,
1529
+ default=False,
1530
+ description="Enable SSL/TLS connection",
1531
+ ),
1532
+ ],
1533
+ )
1534
+
1535
+
1536
+ class ElasticsearchConnectionBuilder(ConnectionBuilder):
1537
+ """Connection builder for Elasticsearch."""
1538
+
1539
+ source_type = SourceType.ELASTICSEARCH
1540
+
1541
+ def build(self, config: dict[str, Any]) -> str:
1542
+ """Build Elasticsearch connection URL."""
1543
+ hosts = config.get("hosts", config.get("host", "localhost"))
1544
+ port = config.get("port", 9200)
1545
+ scheme = "https" if config.get("use_ssl") else "http"
1546
+
1547
+ # Handle multiple hosts
1548
+ if isinstance(hosts, list):
1549
+ return ",".join([f"{scheme}://{h}:{port}" for h in hosts])
1550
+
1551
+ return f"{scheme}://{hosts}:{port}"
1552
+
1553
+ def validate_config(self, config: dict[str, Any]) -> list[str]:
1554
+ """Validate Elasticsearch configuration."""
1555
+ errors = []
1556
+
1557
+ if not config.get("hosts") and not config.get("host"):
1558
+ errors.append("Either hosts or host is required")
1559
+
1560
+ if not config.get("index"):
1561
+ errors.append("index is required")
1562
+
1563
+ return errors
1564
+
1565
+ @classmethod
1566
+ def get_definition(cls) -> SourceTypeDefinition:
1567
+ """Get the source type definition for UI rendering."""
1568
+ return SourceTypeDefinition(
1569
+ type=SourceType.ELASTICSEARCH.value,
1570
+ name="Elasticsearch",
1571
+ description="Elasticsearch search engine",
1572
+ icon="search",
1573
+ category="nosql",
1574
+ docs_url="https://www.elastic.co/guide/en/elasticsearch/reference/current/",
1575
+ fields=[
1576
+ FieldDefinition(
1577
+ name="host",
1578
+ label="Host",
1579
+ required=True,
1580
+ placeholder="localhost",
1581
+ description="Elasticsearch server hostname or IP",
1582
+ ),
1583
+ FieldDefinition(
1584
+ name="port",
1585
+ label="Port",
1586
+ type=FieldType.NUMBER,
1587
+ default=9200,
1588
+ min_value=1,
1589
+ max_value=65535,
1590
+ description="Elasticsearch server port",
1591
+ ),
1592
+ FieldDefinition(
1593
+ name="index",
1594
+ label="Index",
1595
+ required=True,
1596
+ placeholder="my_index",
1597
+ description="Index name to validate",
1598
+ ),
1599
+ FieldDefinition(
1600
+ name="username",
1601
+ label="Username",
1602
+ description="Elasticsearch username",
1603
+ ),
1604
+ FieldDefinition(
1605
+ name="password",
1606
+ label="Password",
1607
+ type=FieldType.PASSWORD,
1608
+ description="Elasticsearch password",
1609
+ ),
1610
+ FieldDefinition(
1611
+ name="api_key",
1612
+ label="API Key",
1613
+ type=FieldType.PASSWORD,
1614
+ description="API key for authentication (alternative to username/password)",
1615
+ ),
1616
+ FieldDefinition(
1617
+ name="use_ssl",
1618
+ label="Use SSL/TLS",
1619
+ type=FieldType.BOOLEAN,
1620
+ default=True,
1621
+ description="Enable SSL/TLS connection",
1622
+ ),
1623
+ FieldDefinition(
1624
+ name="verify_certs",
1625
+ label="Verify Certificates",
1626
+ type=FieldType.BOOLEAN,
1627
+ default=True,
1628
+ description="Verify SSL certificates",
1629
+ ),
1630
+ FieldDefinition(
1631
+ name="cloud_id",
1632
+ label="Cloud ID",
1633
+ placeholder="deployment:dXMtd2VzdC0...",
1634
+ description="Elastic Cloud deployment ID (for Elastic Cloud)",
1635
+ ),
1636
+ ],
1637
+ )
1638
+
1639
+
1640
+ class KafkaConnectionBuilder(ConnectionBuilder):
1641
+ """Connection builder for Apache Kafka."""
1642
+
1643
+ source_type = SourceType.KAFKA
1644
+
1645
+ def build(self, config: dict[str, Any]) -> str:
1646
+ """Build Kafka bootstrap servers string."""
1647
+ bootstrap_servers = config.get("bootstrap_servers", "localhost:9092")
1648
+ if isinstance(bootstrap_servers, list):
1649
+ return ",".join(bootstrap_servers)
1650
+ return bootstrap_servers
1651
+
1652
+ def validate_config(self, config: dict[str, Any]) -> list[str]:
1653
+ """Validate Kafka configuration."""
1654
+ errors = []
1655
+
1656
+ if not config.get("bootstrap_servers"):
1657
+ errors.append("bootstrap_servers is required")
1658
+
1659
+ if not config.get("topic"):
1660
+ errors.append("topic is required")
1661
+
1662
+ return errors
1663
+
1664
+ @classmethod
1665
+ def get_definition(cls) -> SourceTypeDefinition:
1666
+ """Get the source type definition for UI rendering."""
1667
+ return SourceTypeDefinition(
1668
+ type=SourceType.KAFKA.value,
1669
+ name="Apache Kafka",
1670
+ description="Apache Kafka streaming platform",
1671
+ icon="radio",
1672
+ category="streaming",
1673
+ docs_url="https://kafka.apache.org/documentation/",
1674
+ fields=[
1675
+ FieldDefinition(
1676
+ name="bootstrap_servers",
1677
+ label="Bootstrap Servers",
1678
+ required=True,
1679
+ placeholder="localhost:9092",
1680
+ description="Comma-separated list of Kafka broker addresses",
1681
+ ),
1682
+ FieldDefinition(
1683
+ name="topic",
1684
+ label="Topic",
1685
+ required=True,
1686
+ placeholder="my_topic",
1687
+ description="Kafka topic to consume from",
1688
+ ),
1689
+ FieldDefinition(
1690
+ name="group_id",
1691
+ label="Consumer Group ID",
1692
+ placeholder="truthound-validator",
1693
+ default="truthound-validator",
1694
+ description="Consumer group identifier",
1695
+ ),
1696
+ FieldDefinition(
1697
+ name="auto_offset_reset",
1698
+ label="Auto Offset Reset",
1699
+ type=FieldType.SELECT,
1700
+ options=[
1701
+ {"value": "earliest", "label": "Earliest (from beginning)"},
1702
+ {"value": "latest", "label": "Latest (only new messages)"},
1703
+ ],
1704
+ default="earliest",
1705
+ description="Where to start consuming if no offset is stored",
1706
+ ),
1707
+ FieldDefinition(
1708
+ name="max_messages",
1709
+ label="Max Messages",
1710
+ type=FieldType.NUMBER,
1711
+ default=10000,
1712
+ min_value=1,
1713
+ max_value=1000000,
1714
+ description="Maximum number of messages to consume per batch",
1715
+ ),
1716
+ FieldDefinition(
1717
+ name="security_protocol",
1718
+ label="Security Protocol",
1719
+ type=FieldType.SELECT,
1720
+ options=[
1721
+ {"value": "PLAINTEXT", "label": "Plaintext"},
1722
+ {"value": "SSL", "label": "SSL"},
1723
+ {"value": "SASL_PLAINTEXT", "label": "SASL Plaintext"},
1724
+ {"value": "SASL_SSL", "label": "SASL SSL"},
1725
+ ],
1726
+ default="PLAINTEXT",
1727
+ description="Security protocol for broker communication",
1728
+ ),
1729
+ FieldDefinition(
1730
+ name="sasl_mechanism",
1731
+ label="SASL Mechanism",
1732
+ type=FieldType.SELECT,
1733
+ options=[
1734
+ {"value": "PLAIN", "label": "PLAIN"},
1735
+ {"value": "SCRAM-SHA-256", "label": "SCRAM-SHA-256"},
1736
+ {"value": "SCRAM-SHA-512", "label": "SCRAM-SHA-512"},
1737
+ {"value": "OAUTHBEARER", "label": "OAuth Bearer"},
1738
+ ],
1739
+ default="PLAIN",
1740
+ description="SASL authentication mechanism",
1741
+ depends_on="security_protocol",
1742
+ depends_value="SASL_SSL",
1743
+ ),
1744
+ FieldDefinition(
1745
+ name="sasl_username",
1746
+ label="SASL Username",
1747
+ description="SASL authentication username",
1748
+ depends_on="security_protocol",
1749
+ depends_value="SASL_SSL",
1750
+ ),
1751
+ FieldDefinition(
1752
+ name="sasl_password",
1753
+ label="SASL Password",
1754
+ type=FieldType.PASSWORD,
1755
+ description="SASL authentication password",
1756
+ depends_on="security_protocol",
1757
+ depends_value="SASL_SSL",
1758
+ ),
1759
+ ],
1760
+ )
1761
+
1762
+
1198
1763
  # Registry of connection builders
1199
1764
  CONNECTION_BUILDERS: dict[str, type[ConnectionBuilder]] = {
1765
+ # File-based
1200
1766
  SourceType.FILE.value: FileConnectionBuilder,
1767
+ SourceType.CSV.value: CSVConnectionBuilder,
1768
+ SourceType.PARQUET.value: ParquetConnectionBuilder,
1769
+ SourceType.JSON.value: JSONConnectionBuilder,
1770
+ SourceType.NDJSON.value: NDJSONConnectionBuilder,
1771
+ SourceType.JSONL.value: JSONLConnectionBuilder,
1772
+ # Core SQL
1201
1773
  SourceType.POSTGRESQL.value: PostgreSQLConnectionBuilder,
1202
1774
  SourceType.MYSQL.value: MySQLConnectionBuilder,
1203
1775
  SourceType.SQLITE.value: SQLiteConnectionBuilder,
1776
+ # Cloud Data Warehouses
1204
1777
  SourceType.SNOWFLAKE.value: SnowflakeConnectionBuilder,
1205
1778
  SourceType.BIGQUERY.value: BigQueryConnectionBuilder,
1206
1779
  SourceType.REDSHIFT.value: RedshiftConnectionBuilder,
1207
1780
  SourceType.DATABRICKS.value: DatabricksConnectionBuilder,
1781
+ # Enterprise
1208
1782
  SourceType.ORACLE.value: OracleConnectionBuilder,
1209
1783
  SourceType.SQLSERVER.value: SQLServerConnectionBuilder,
1784
+ # Big Data
1210
1785
  SourceType.SPARK.value: SparkConnectionBuilder,
1786
+ # NoSQL
1787
+ SourceType.MONGODB.value: MongoDBConnectionBuilder,
1788
+ SourceType.ELASTICSEARCH.value: ElasticsearchConnectionBuilder,
1789
+ # Streaming
1790
+ SourceType.KAFKA.value: KafkaConnectionBuilder,
1211
1791
  }
1212
1792
 
1213
1793
 
@@ -1256,6 +1836,10 @@ def build_connection_string(source_type: str, config: dict[str, Any]) -> str:
1256
1836
  async def test_connection(source_type: str, config: dict[str, Any]) -> dict[str, Any]:
1257
1837
  """Test database connection.
1258
1838
 
1839
+ Updated for truthound 2.x API:
1840
+ - Uses DataSourceFactory with new datasource API
1841
+ - Delegates to datasource_factory.test_connection for better reuse
1842
+
1259
1843
  Args:
1260
1844
  source_type: Type of data source.
1261
1845
  config: Source-specific configuration.
@@ -1274,8 +1858,6 @@ async def test_connection(source_type: str, config: dict[str, Any]) -> dict[str,
1274
1858
  "error": f"Configuration errors: {'; '.join(errors)}",
1275
1859
  }
1276
1860
 
1277
- connection_string = builder.build(config)
1278
-
1279
1861
  if source_type == "file":
1280
1862
  # For files, just check if path exists
1281
1863
  path = Path(config["path"])
@@ -1286,15 +1868,39 @@ async def test_connection(source_type: str, config: dict[str, Any]) -> dict[str,
1286
1868
  "message": f"File exists: {path.name} ({path.stat().st_size:,} bytes)",
1287
1869
  }
1288
1870
 
1289
- # For databases, use truthound to test connection
1290
- import truthound as th
1871
+ # Use new DataSourceFactory test_connection for database sources
1872
+ from .datasource_factory import test_connection as factory_test_connection
1291
1873
 
1292
- # Quick profile to test connection
1293
- result = th.profile(connection_string)
1294
- return {
1295
- "success": True,
1296
- "message": f"Connected! Found {result.column_count} columns, {result.row_count:,} rows",
1874
+ # Build SourceConfig-compatible dict
1875
+ full_config = {"type": source_type, **config}
1876
+
1877
+ # Map field names from connections to datasource_factory
1878
+ field_mapping = {
1879
+ "username": "user", # connections uses username, factory uses user
1297
1880
  }
1881
+ for old_key, new_key in field_mapping.items():
1882
+ if old_key in full_config and new_key not in full_config:
1883
+ full_config[new_key] = full_config[old_key]
1884
+
1885
+ result = await factory_test_connection(full_config)
1886
+
1887
+ # Convert result format
1888
+ if result["success"]:
1889
+ metadata = result.get("metadata", {})
1890
+ row_count = metadata.get("row_count", "unknown")
1891
+ columns = metadata.get("columns", [])
1892
+ col_count = len(columns) if columns else "unknown"
1893
+
1894
+ return {
1895
+ "success": True,
1896
+ "message": f"Connected! Found {col_count} columns, {row_count} rows",
1897
+ "metadata": metadata,
1898
+ }
1899
+ else:
1900
+ return {
1901
+ "success": False,
1902
+ "error": result.get("message", "Connection failed"),
1903
+ }
1298
1904
 
1299
1905
  except ImportError:
1300
1906
  return {"success": False, "error": "truthound package not available"}
@@ -1312,11 +1918,18 @@ def get_supported_source_types() -> list[dict[str, Any]]:
1312
1918
  List of source type definitions.
1313
1919
  """
1314
1920
  result = []
1921
+ seen_types: set[str] = set()
1315
1922
  for source_type in SourceType:
1923
+ # Skip generic FILE type - specific format types (CSV, Parquet, etc.) cover it
1924
+ if source_type == SourceType.FILE:
1925
+ continue
1316
1926
  builder_class = CONNECTION_BUILDERS.get(source_type.value)
1317
1927
  if builder_class:
1318
1928
  definition = builder_class.get_definition()
1319
- result.append(definition.to_dict())
1929
+ # Deduplicate by type value
1930
+ if definition.type not in seen_types:
1931
+ seen_types.add(definition.type)
1932
+ result.append(definition.to_dict())
1320
1933
  return result
1321
1934
 
1322
1935
 
@@ -1331,6 +1944,8 @@ def get_source_type_categories() -> list[dict[str, str]]:
1331
1944
  {"value": "database", "label": "Databases", "description": "Relational databases"},
1332
1945
  {"value": "warehouse", "label": "Data Warehouses", "description": "Cloud data warehouses"},
1333
1946
  {"value": "bigdata", "label": "Big Data", "description": "Big data platforms"},
1947
+ {"value": "nosql", "label": "NoSQL", "description": "Document and search databases"},
1948
+ {"value": "streaming", "label": "Streaming", "description": "Streaming data platforms"},
1334
1949
  ]
1335
1950
 
1336
1951
 
@@ -1345,14 +1960,21 @@ def get_source_types_by_category() -> dict[str, list[dict[str, Any]]]:
1345
1960
  "database": [],
1346
1961
  "warehouse": [],
1347
1962
  "bigdata": [],
1963
+ "nosql": [],
1964
+ "streaming": [],
1348
1965
  }
1349
1966
 
1967
+ seen_types: set[str] = set()
1350
1968
  for source_type in SourceType:
1969
+ if source_type == SourceType.FILE:
1970
+ continue
1351
1971
  builder_class = CONNECTION_BUILDERS.get(source_type.value)
1352
1972
  if builder_class:
1353
1973
  definition = builder_class.get_definition()
1354
- category = definition.category
1355
- if category in categories:
1356
- categories[category].append(definition.to_dict())
1974
+ if definition.type not in seen_types:
1975
+ seen_types.add(definition.type)
1976
+ category = definition.category
1977
+ if category in categories:
1978
+ categories[category].append(definition.to_dict())
1357
1979
 
1358
1980
  return categories