truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. truthound_dashboard/api/alerts.py +75 -86
  2. truthound_dashboard/api/anomaly.py +7 -13
  3. truthound_dashboard/api/cross_alerts.py +38 -52
  4. truthound_dashboard/api/drift.py +49 -59
  5. truthound_dashboard/api/drift_monitor.py +234 -79
  6. truthound_dashboard/api/enterprise_sampling.py +498 -0
  7. truthound_dashboard/api/history.py +57 -5
  8. truthound_dashboard/api/lineage.py +3 -48
  9. truthound_dashboard/api/maintenance.py +104 -49
  10. truthound_dashboard/api/mask.py +1 -2
  11. truthound_dashboard/api/middleware.py +2 -1
  12. truthound_dashboard/api/model_monitoring.py +435 -311
  13. truthound_dashboard/api/notifications.py +227 -191
  14. truthound_dashboard/api/notifications_advanced.py +21 -20
  15. truthound_dashboard/api/observability.py +586 -0
  16. truthound_dashboard/api/plugins.py +2 -433
  17. truthound_dashboard/api/profile.py +199 -37
  18. truthound_dashboard/api/quality_reporter.py +701 -0
  19. truthound_dashboard/api/reports.py +7 -16
  20. truthound_dashboard/api/router.py +66 -0
  21. truthound_dashboard/api/rule_suggestions.py +5 -5
  22. truthound_dashboard/api/scan.py +17 -19
  23. truthound_dashboard/api/schedules.py +85 -50
  24. truthound_dashboard/api/schema_evolution.py +6 -6
  25. truthound_dashboard/api/schema_watcher.py +667 -0
  26. truthound_dashboard/api/sources.py +98 -27
  27. truthound_dashboard/api/tiering.py +1323 -0
  28. truthound_dashboard/api/triggers.py +14 -11
  29. truthound_dashboard/api/validations.py +12 -11
  30. truthound_dashboard/api/versioning.py +1 -6
  31. truthound_dashboard/core/__init__.py +129 -3
  32. truthound_dashboard/core/actions/__init__.py +62 -0
  33. truthound_dashboard/core/actions/custom.py +426 -0
  34. truthound_dashboard/core/actions/notifications.py +910 -0
  35. truthound_dashboard/core/actions/storage.py +472 -0
  36. truthound_dashboard/core/actions/webhook.py +281 -0
  37. truthound_dashboard/core/anomaly.py +262 -67
  38. truthound_dashboard/core/anomaly_explainer.py +4 -3
  39. truthound_dashboard/core/backends/__init__.py +67 -0
  40. truthound_dashboard/core/backends/base.py +299 -0
  41. truthound_dashboard/core/backends/errors.py +191 -0
  42. truthound_dashboard/core/backends/factory.py +423 -0
  43. truthound_dashboard/core/backends/mock_backend.py +451 -0
  44. truthound_dashboard/core/backends/truthound_backend.py +718 -0
  45. truthound_dashboard/core/checkpoint/__init__.py +87 -0
  46. truthound_dashboard/core/checkpoint/adapters.py +814 -0
  47. truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
  48. truthound_dashboard/core/checkpoint/runner.py +270 -0
  49. truthound_dashboard/core/connections.py +645 -23
  50. truthound_dashboard/core/converters/__init__.py +14 -0
  51. truthound_dashboard/core/converters/truthound.py +620 -0
  52. truthound_dashboard/core/cross_alerts.py +540 -320
  53. truthound_dashboard/core/datasource_factory.py +1672 -0
  54. truthound_dashboard/core/drift_monitor.py +216 -20
  55. truthound_dashboard/core/enterprise_sampling.py +1291 -0
  56. truthound_dashboard/core/interfaces/__init__.py +225 -0
  57. truthound_dashboard/core/interfaces/actions.py +652 -0
  58. truthound_dashboard/core/interfaces/base.py +247 -0
  59. truthound_dashboard/core/interfaces/checkpoint.py +676 -0
  60. truthound_dashboard/core/interfaces/protocols.py +664 -0
  61. truthound_dashboard/core/interfaces/reporters.py +650 -0
  62. truthound_dashboard/core/interfaces/routing.py +646 -0
  63. truthound_dashboard/core/interfaces/triggers.py +619 -0
  64. truthound_dashboard/core/lineage.py +407 -71
  65. truthound_dashboard/core/model_monitoring.py +431 -3
  66. truthound_dashboard/core/notifications/base.py +4 -0
  67. truthound_dashboard/core/notifications/channels.py +501 -1203
  68. truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
  69. truthound_dashboard/core/notifications/deduplication/service.py +131 -348
  70. truthound_dashboard/core/notifications/dispatcher.py +202 -11
  71. truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
  72. truthound_dashboard/core/notifications/escalation/engine.py +168 -358
  73. truthound_dashboard/core/notifications/routing/__init__.py +88 -128
  74. truthound_dashboard/core/notifications/routing/engine.py +90 -317
  75. truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
  76. truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
  77. truthound_dashboard/core/notifications/throttling/builder.py +117 -255
  78. truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
  79. truthound_dashboard/core/phase5/collaboration.py +1 -1
  80. truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
  81. truthound_dashboard/core/quality_reporter.py +1359 -0
  82. truthound_dashboard/core/report_history.py +0 -6
  83. truthound_dashboard/core/reporters/__init__.py +175 -14
  84. truthound_dashboard/core/reporters/adapters.py +943 -0
  85. truthound_dashboard/core/reporters/base.py +0 -3
  86. truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
  87. truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
  88. truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
  89. truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
  90. truthound_dashboard/core/reporters/compat.py +266 -0
  91. truthound_dashboard/core/reporters/csv_reporter.py +2 -35
  92. truthound_dashboard/core/reporters/factory.py +526 -0
  93. truthound_dashboard/core/reporters/interfaces.py +745 -0
  94. truthound_dashboard/core/reporters/registry.py +1 -10
  95. truthound_dashboard/core/scheduler.py +165 -0
  96. truthound_dashboard/core/schema_evolution.py +3 -3
  97. truthound_dashboard/core/schema_watcher.py +1528 -0
  98. truthound_dashboard/core/services.py +595 -76
  99. truthound_dashboard/core/store_manager.py +810 -0
  100. truthound_dashboard/core/streaming_anomaly.py +169 -4
  101. truthound_dashboard/core/tiering.py +1309 -0
  102. truthound_dashboard/core/triggers/evaluators.py +178 -8
  103. truthound_dashboard/core/truthound_adapter.py +2620 -197
  104. truthound_dashboard/core/unified_alerts.py +23 -20
  105. truthound_dashboard/db/__init__.py +8 -0
  106. truthound_dashboard/db/database.py +8 -2
  107. truthound_dashboard/db/models.py +944 -25
  108. truthound_dashboard/db/repository.py +2 -0
  109. truthound_dashboard/main.py +15 -0
  110. truthound_dashboard/schemas/__init__.py +177 -16
  111. truthound_dashboard/schemas/base.py +44 -23
  112. truthound_dashboard/schemas/collaboration.py +19 -6
  113. truthound_dashboard/schemas/cross_alerts.py +19 -3
  114. truthound_dashboard/schemas/drift.py +61 -55
  115. truthound_dashboard/schemas/drift_monitor.py +67 -23
  116. truthound_dashboard/schemas/enterprise_sampling.py +653 -0
  117. truthound_dashboard/schemas/lineage.py +0 -33
  118. truthound_dashboard/schemas/mask.py +10 -8
  119. truthound_dashboard/schemas/model_monitoring.py +89 -10
  120. truthound_dashboard/schemas/notifications_advanced.py +13 -0
  121. truthound_dashboard/schemas/observability.py +453 -0
  122. truthound_dashboard/schemas/plugins.py +0 -280
  123. truthound_dashboard/schemas/profile.py +154 -247
  124. truthound_dashboard/schemas/quality_reporter.py +403 -0
  125. truthound_dashboard/schemas/reports.py +2 -2
  126. truthound_dashboard/schemas/rule_suggestion.py +8 -1
  127. truthound_dashboard/schemas/scan.py +4 -24
  128. truthound_dashboard/schemas/schedule.py +11 -3
  129. truthound_dashboard/schemas/schema_watcher.py +727 -0
  130. truthound_dashboard/schemas/source.py +17 -2
  131. truthound_dashboard/schemas/tiering.py +822 -0
  132. truthound_dashboard/schemas/triggers.py +16 -0
  133. truthound_dashboard/schemas/unified_alerts.py +7 -0
  134. truthound_dashboard/schemas/validation.py +0 -13
  135. truthound_dashboard/schemas/validators/base.py +41 -21
  136. truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
  137. truthound_dashboard/schemas/validators/localization_validators.py +273 -0
  138. truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
  139. truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
  140. truthound_dashboard/schemas/validators/referential_validators.py +312 -0
  141. truthound_dashboard/schemas/validators/registry.py +93 -8
  142. truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
  143. truthound_dashboard/schemas/versioning.py +1 -6
  144. truthound_dashboard/static/index.html +2 -2
  145. truthound_dashboard-1.5.1.dist-info/METADATA +312 -0
  146. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/RECORD +149 -148
  147. truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
  148. truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
  149. truthound_dashboard/core/plugins/hooks/manager.py +0 -403
  150. truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
  151. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
  152. truthound_dashboard/core/reporters/junit_reporter.py +0 -233
  153. truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
  154. truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
  155. truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
  156. truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
  157. truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
  158. truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
  159. truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
  160. truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
  161. truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
  162. truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
  163. truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
  164. truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
  165. truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
  166. truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
  167. truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
  168. truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
  169. truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
  170. truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
  171. truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
  172. truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
  173. truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
  174. truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
  175. truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
  176. truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
  177. truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
  178. truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
  179. truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
  180. truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
  181. truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
  182. truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
  183. truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
  184. truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
  185. truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
  186. truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
  187. truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
  188. truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
  189. truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
  190. truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
  191. truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
  192. truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
  193. truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
  194. truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
  195. truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
  196. truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
  197. truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
  198. truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
  199. truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
  200. truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
  201. truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
  202. truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
  203. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/WHEEL +0 -0
  204. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/entry_points.txt +0 -0
  205. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1672 @@
1
+ """DataSource factory for truthound datasources.
2
+
3
+ This module provides a unified interface to create truthound DataSource objects
4
+ from various backend types (files, SQL databases, cloud warehouses, etc.).
5
+
6
+ The factory pattern allows the dashboard to support multiple data backends
7
+ through the truthound datasources API while maintaining a consistent interface
8
+ for services. The design prioritizes loose coupling with truthound for
9
+ maintainability and testability.
10
+
11
+ Architecture:
12
+ SourceConfig -> DataSourceFactory -> truthound.datasources.*
13
+
14
+ Updated for truthound 2.x API:
15
+ - Uses truthound.datasources.get_datasource() for auto-detection
16
+ - Uses SQLDataSourceConfig for SQL sources
17
+ - Uses DataSourceCapability for feature detection
18
+
19
+ Supported Data Sources:
20
+ - File: CSV, Parquet, JSON, NDJSON, JSONL
21
+ - DataFrame: Polars, Pandas
22
+ - SQL: SQLite, PostgreSQL, MySQL, DuckDB
23
+ - Cloud DW: BigQuery, Snowflake, Redshift, Databricks
24
+ - Enterprise: Oracle, SQL Server
25
+ - NoSQL: MongoDB, Elasticsearch (async)
26
+ - Streaming: Kafka (async)
27
+
28
+ Example:
29
+ factory = DataSourceFactory()
30
+ source = factory.create_from_config(source_config)
31
+ report = th.check(source=source)
32
+
33
+ # Or use the get_datasource convenience function
34
+ from truthound.datasources import get_datasource
35
+ ds = get_datasource("data.csv") # Auto-detect file type
36
+ ds = get_datasource("postgresql://user:pass@localhost/db", table="users")
37
+ """
38
+
39
+ from __future__ import annotations
40
+
41
+ import logging
42
+ from abc import ABC, abstractmethod
43
+ from dataclasses import dataclass, field
44
+ from enum import Enum
45
+ from pathlib import Path
46
+ from typing import Any, Protocol, runtime_checkable
47
+
48
+ logger = logging.getLogger(__name__)
49
+
50
+
51
+ # =============================================================================
52
+ # Source Type Enumeration
53
+ # =============================================================================
54
+
55
+
56
+ class SourceType(str, Enum):
57
+ """Supported data source types."""
58
+
59
+ # File-based
60
+ FILE = "file"
61
+ CSV = "csv"
62
+ PARQUET = "parquet"
63
+ JSON = "json"
64
+ NDJSON = "ndjson"
65
+ JSONL = "jsonl"
66
+
67
+ # DataFrame
68
+ POLARS = "polars"
69
+ PANDAS = "pandas"
70
+
71
+ # Core SQL
72
+ SQLITE = "sqlite"
73
+ POSTGRESQL = "postgresql"
74
+ MYSQL = "mysql"
75
+ DUCKDB = "duckdb"
76
+
77
+ # Cloud Data Warehouses
78
+ BIGQUERY = "bigquery"
79
+ SNOWFLAKE = "snowflake"
80
+ REDSHIFT = "redshift"
81
+ DATABRICKS = "databricks"
82
+
83
+ # Enterprise
84
+ ORACLE = "oracle"
85
+ SQLSERVER = "sqlserver"
86
+
87
+ # NoSQL (async)
88
+ MONGODB = "mongodb"
89
+ ELASTICSEARCH = "elasticsearch"
90
+
91
+ # Streaming (async)
92
+ KAFKA = "kafka"
93
+
94
+ @classmethod
95
+ def is_file_type(cls, source_type: str) -> bool:
96
+ """Check if source type is file-based."""
97
+ file_types = {
98
+ cls.FILE,
99
+ cls.CSV,
100
+ cls.PARQUET,
101
+ cls.JSON,
102
+ cls.NDJSON,
103
+ cls.JSONL,
104
+ }
105
+ try:
106
+ return cls(source_type) in file_types
107
+ except ValueError:
108
+ return False
109
+
110
+ @classmethod
111
+ def is_sql_type(cls, source_type: str) -> bool:
112
+ """Check if source type is SQL-based."""
113
+ sql_types = {
114
+ cls.SQLITE,
115
+ cls.POSTGRESQL,
116
+ cls.MYSQL,
117
+ cls.DUCKDB,
118
+ cls.BIGQUERY,
119
+ cls.SNOWFLAKE,
120
+ cls.REDSHIFT,
121
+ cls.DATABRICKS,
122
+ cls.ORACLE,
123
+ cls.SQLSERVER,
124
+ }
125
+ try:
126
+ return cls(source_type) in sql_types
127
+ except ValueError:
128
+ return False
129
+
130
+ @classmethod
131
+ def is_async_type(cls, source_type: str) -> bool:
132
+ """Check if source type requires async operations."""
133
+ async_types = {
134
+ cls.MONGODB,
135
+ cls.ELASTICSEARCH,
136
+ cls.KAFKA,
137
+ }
138
+ try:
139
+ return cls(source_type) in async_types
140
+ except ValueError:
141
+ return False
142
+
143
+
144
+ # =============================================================================
145
+ # Source Configuration
146
+ # =============================================================================
147
+
148
+
149
+ @dataclass
150
+ class SourceConfig:
151
+ """Configuration for creating a data source.
152
+
153
+ This dataclass holds all possible configuration options for any
154
+ data source type. Only relevant fields are used based on source_type.
155
+
156
+ Attributes:
157
+ source_type: Type of data source (file, postgresql, etc.)
158
+ name: Human-readable name for the source.
159
+
160
+ # File-based options
161
+ path: File path for file-based sources.
162
+
163
+ # SQL options
164
+ table: Table name for SQL sources.
165
+ query: Custom SQL query (alternative to table).
166
+ host: Database host.
167
+ port: Database port.
168
+ database: Database name.
169
+ schema_name: Database schema (e.g., "public" for PostgreSQL).
170
+ user: Database username.
171
+ password: Database password.
172
+ connection_string: Full connection string (alternative to individual params).
173
+
174
+ # Cloud DW specific
175
+ project: GCP project ID (BigQuery).
176
+ dataset: BigQuery dataset name.
177
+ account: Snowflake account identifier.
178
+ warehouse: Snowflake warehouse name.
179
+ credentials_path: Path to credentials file (BigQuery).
180
+ access_token: Access token (Databricks).
181
+ http_path: HTTP path for SQL warehouse (Databricks).
182
+ catalog: Unity Catalog name (Databricks).
183
+ cluster_identifier: Redshift cluster ID.
184
+ iam_auth: Use IAM authentication (Redshift).
185
+
186
+ # Enterprise DB specific
187
+ service_name: Oracle service name.
188
+ sid: Oracle SID.
189
+ trusted_connection: Windows auth (SQL Server).
190
+
191
+ # NoSQL specific
192
+ collection: MongoDB collection name.
193
+ index: Elasticsearch index name.
194
+
195
+ # Streaming specific
196
+ topic: Kafka topic name.
197
+ bootstrap_servers: Kafka bootstrap servers.
198
+ group_id: Kafka consumer group ID.
199
+
200
+ # General options
201
+ pool_size: Connection pool size.
202
+ query_timeout: Query timeout in seconds.
203
+ max_rows: Maximum rows to fetch.
204
+ sample_size: Sample size for large datasets.
205
+ """
206
+
207
+ source_type: str
208
+ name: str | None = None
209
+
210
+ # File-based
211
+ path: str | None = None
212
+
213
+ # SQL common
214
+ table: str | None = None
215
+ query: str | None = None
216
+ host: str | None = None
217
+ port: int | None = None
218
+ database: str | None = None
219
+ schema_name: str | None = None
220
+ user: str | None = None
221
+ password: str | None = None
222
+ connection_string: str | None = None
223
+
224
+ # Cloud DW
225
+ project: str | None = None
226
+ dataset: str | None = None
227
+ account: str | None = None
228
+ warehouse: str | None = None
229
+ credentials_path: str | None = None
230
+ access_token: str | None = None
231
+ http_path: str | None = None
232
+ catalog: str | None = None
233
+ cluster_identifier: str | None = None
234
+ iam_auth: bool = False
235
+
236
+ # Enterprise
237
+ service_name: str | None = None
238
+ sid: str | None = None
239
+ trusted_connection: bool = False
240
+
241
+ # NoSQL
242
+ collection: str | None = None
243
+ index: str | None = None
244
+
245
+ # Streaming
246
+ topic: str | None = None
247
+ bootstrap_servers: str | None = None
248
+ group_id: str | None = None
249
+ max_messages: int | None = None
250
+
251
+ # General
252
+ pool_size: int | None = None
253
+ query_timeout: float | None = None
254
+ max_rows: int | None = None
255
+ sample_size: int | None = None
256
+
257
+ # Extra options (for extensibility)
258
+ extra: dict[str, Any] = field(default_factory=dict)
259
+
260
+ @classmethod
261
+ def from_dict(cls, data: dict[str, Any]) -> "SourceConfig":
262
+ """Create SourceConfig from dictionary.
263
+
264
+ Args:
265
+ data: Dictionary with source configuration.
266
+ Must include 'type' or 'source_type' key.
267
+
268
+ Returns:
269
+ SourceConfig instance.
270
+
271
+ Raises:
272
+ ValueError: If source_type is missing.
273
+ """
274
+ # Handle 'type' as alias for 'source_type'
275
+ source_type = data.get("source_type") or data.get("type")
276
+ if not source_type:
277
+ raise ValueError("source_type or type is required")
278
+
279
+ # Extract known fields
280
+ known_fields = {
281
+ "name",
282
+ "path",
283
+ "table",
284
+ "query",
285
+ "host",
286
+ "port",
287
+ "database",
288
+ "schema_name",
289
+ "user",
290
+ "password",
291
+ "connection_string",
292
+ "project",
293
+ "dataset",
294
+ "account",
295
+ "warehouse",
296
+ "credentials_path",
297
+ "access_token",
298
+ "http_path",
299
+ "catalog",
300
+ "cluster_identifier",
301
+ "iam_auth",
302
+ "service_name",
303
+ "sid",
304
+ "trusted_connection",
305
+ "collection",
306
+ "index",
307
+ "topic",
308
+ "bootstrap_servers",
309
+ "group_id",
310
+ "max_messages",
311
+ "pool_size",
312
+ "query_timeout",
313
+ "max_rows",
314
+ "sample_size",
315
+ }
316
+
317
+ kwargs: dict[str, Any] = {"source_type": source_type}
318
+ extra: dict[str, Any] = {}
319
+
320
+ for key, value in data.items():
321
+ if key in ("type", "source_type"):
322
+ continue
323
+ if key in known_fields:
324
+ kwargs[key] = value
325
+ else:
326
+ extra[key] = value
327
+
328
+ if extra:
329
+ kwargs["extra"] = extra
330
+
331
+ return cls(**kwargs)
332
+
333
+ def to_dict(self) -> dict[str, Any]:
334
+ """Convert to dictionary representation."""
335
+ result: dict[str, Any] = {"type": self.source_type}
336
+
337
+ # Add non-None fields
338
+ for key in [
339
+ "name",
340
+ "path",
341
+ "table",
342
+ "query",
343
+ "host",
344
+ "port",
345
+ "database",
346
+ "schema_name",
347
+ "user",
348
+ "password",
349
+ "connection_string",
350
+ "project",
351
+ "dataset",
352
+ "account",
353
+ "warehouse",
354
+ "credentials_path",
355
+ "access_token",
356
+ "http_path",
357
+ "catalog",
358
+ "cluster_identifier",
359
+ "service_name",
360
+ "sid",
361
+ "collection",
362
+ "index",
363
+ "topic",
364
+ "bootstrap_servers",
365
+ "group_id",
366
+ "max_messages",
367
+ "pool_size",
368
+ "query_timeout",
369
+ "max_rows",
370
+ "sample_size",
371
+ ]:
372
+ value = getattr(self, key)
373
+ if value is not None:
374
+ result[key] = value
375
+
376
+ # Add boolean flags if True
377
+ if self.iam_auth:
378
+ result["iam_auth"] = True
379
+ if self.trusted_connection:
380
+ result["trusted_connection"] = True
381
+
382
+ # Add extra
383
+ if self.extra:
384
+ result.update(self.extra)
385
+
386
+ return result
387
+
388
+
389
+ # =============================================================================
390
+ # DataSource Protocol (for loose coupling)
391
+ # =============================================================================
392
+
393
+
394
+ @runtime_checkable
395
+ class DataSourceProtocol(Protocol):
396
+ """Protocol for truthound DataSource objects.
397
+
398
+ This protocol defines the interface that all DataSource implementations
399
+ must satisfy. It's used for type checking and loose coupling.
400
+ """
401
+
402
+ @property
403
+ def name(self) -> str:
404
+ """Get source name."""
405
+ ...
406
+
407
+ @property
408
+ def schema(self) -> dict[str, Any]:
409
+ """Get schema dictionary."""
410
+ ...
411
+
412
+ @property
413
+ def columns(self) -> list[str]:
414
+ """Get column names."""
415
+ ...
416
+
417
+ @property
418
+ def row_count(self) -> int | None:
419
+ """Get row count if available."""
420
+ ...
421
+
422
+ def to_polars_lazyframe(self) -> Any:
423
+ """Convert to Polars LazyFrame."""
424
+ ...
425
+
426
+
427
+ # =============================================================================
428
+ # Backend Strategy Pattern (for extensibility)
429
+ # =============================================================================
430
+
431
+
432
+ class DataSourceCreator(ABC):
433
+ """Abstract base class for data source creators.
434
+
435
+ Each data source type has its own creator class that handles
436
+ the specific logic for creating that type of source.
437
+ """
438
+
439
+ @abstractmethod
440
+ def can_create(self, config: SourceConfig) -> bool:
441
+ """Check if this creator can handle the given config."""
442
+ ...
443
+
444
+ @abstractmethod
445
+ def create(self, config: SourceConfig) -> Any:
446
+ """Create the data source from config."""
447
+ ...
448
+
449
+
450
+ class FileSourceCreator(DataSourceCreator):
451
+ """Creator for file-based data sources.
452
+
453
+ Updated for truthound 2.x API:
454
+ - Uses truthound.datasources.polars_source.FileDataSource
455
+ - Supports FileDataSourceConfig for advanced options
456
+ """
457
+
458
+ def can_create(self, config: SourceConfig) -> bool:
459
+ return SourceType.is_file_type(config.source_type)
460
+
461
+ def create(self, config: SourceConfig) -> Any:
462
+ """Create file-based data source using truthound's FileDataSource."""
463
+ if not config.path:
464
+ raise ValueError("path is required for file sources")
465
+
466
+ path = Path(config.path)
467
+ if not path.exists():
468
+ raise FileNotFoundError(f"File not found: {config.path}")
469
+
470
+ try:
471
+ # Try new truthound 2.x API first
472
+ from truthound.datasources.polars_source import (
473
+ FileDataSource,
474
+ FileDataSourceConfig,
475
+ )
476
+
477
+ # Build config if extra options provided
478
+ file_config = None
479
+ if config.extra:
480
+ file_config = FileDataSourceConfig(
481
+ infer_schema_length=config.extra.get("infer_schema_length", 10000),
482
+ ignore_errors=config.extra.get("ignore_errors", False),
483
+ encoding=config.extra.get("encoding", "utf8"),
484
+ separator=config.extra.get("separator", ","),
485
+ )
486
+ return FileDataSource(str(path), config=file_config)
487
+
488
+ return FileDataSource(str(path))
489
+
490
+ except ImportError:
491
+ try:
492
+ # Fallback: Try older truthound.datasources.FileDataSource
493
+ from truthound.datasources import FileDataSource
494
+ return FileDataSource(str(path))
495
+ except ImportError:
496
+ # Final fallback: return path string (backward compatible)
497
+ # truthound core functions also accept path strings
498
+ logger.debug("truthound.datasources not available, using path string")
499
+ return str(path)
500
+
501
+
502
+ class SQLiteSourceCreator(DataSourceCreator):
503
+ """Creator for SQLite data sources.
504
+
505
+ Updated for truthound 2.x API:
506
+ - Uses truthound.datasources.sql.sqlite.SQLiteDataSource
507
+ - Supports SQLiteDataSourceConfig for advanced options
508
+ """
509
+
510
+ def can_create(self, config: SourceConfig) -> bool:
511
+ return config.source_type.lower() == SourceType.SQLITE
512
+
513
+ def create(self, config: SourceConfig) -> Any:
514
+ if not config.database and not config.path:
515
+ raise ValueError("database or path is required for SQLite")
516
+
517
+ database = config.database or config.path
518
+
519
+ try:
520
+ # Try new truthound 2.x API with explicit import path
521
+ from truthound.datasources.sql.sqlite import (
522
+ SQLiteDataSource,
523
+ SQLiteDataSourceConfig,
524
+ )
525
+
526
+ # Build config if extra options provided
527
+ sqlite_config = None
528
+ if config.extra or config.query_timeout:
529
+ sqlite_config = SQLiteDataSourceConfig(
530
+ database=database,
531
+ timeout=config.query_timeout or 5.0,
532
+ )
533
+
534
+ if config.table:
535
+ if sqlite_config:
536
+ return SQLiteDataSource(table=config.table, database=database, config=sqlite_config)
537
+ return SQLiteDataSource(table=config.table, database=database)
538
+ elif config.query:
539
+ if sqlite_config:
540
+ return SQLiteDataSource(query=config.query, database=database, config=sqlite_config)
541
+ return SQLiteDataSource(query=config.query, database=database)
542
+ else:
543
+ raise ValueError("table or query is required for SQLite")
544
+
545
+ except ImportError:
546
+ # Fallback: Try older import path
547
+ from truthound.datasources.sql import SQLiteDataSource
548
+
549
+ if config.table:
550
+ return SQLiteDataSource(table=config.table, database=database)
551
+ elif config.query:
552
+ return SQLiteDataSource(query=config.query, database=database)
553
+ else:
554
+ raise ValueError("table or query is required for SQLite")
555
+
556
+
557
+ class DuckDBSourceCreator(DataSourceCreator):
558
+ """Creator for DuckDB data sources.
559
+
560
+ Note: DuckDB support depends on truthound's optional DuckDB backend.
561
+ If not available, falls back to direct Polars reading.
562
+ """
563
+
564
+ def can_create(self, config: SourceConfig) -> bool:
565
+ return config.source_type.lower() == SourceType.DUCKDB
566
+
567
+ def create(self, config: SourceConfig) -> Any:
568
+ if not config.database and not config.path:
569
+ raise ValueError("database or path is required for DuckDB")
570
+
571
+ database = config.database or config.path
572
+
573
+ # Try truthound's DuckDB support first
574
+ try:
575
+ from truthound.datasources.sql import DuckDBDataSource
576
+
577
+ if config.table:
578
+ return DuckDBDataSource(table=config.table, database=database)
579
+ elif config.query:
580
+ return DuckDBDataSource(query=config.query, database=database)
581
+ else:
582
+ raise ValueError("table or query is required for DuckDB")
583
+
584
+ except ImportError:
585
+ # Fallback: Use Polars to read from DuckDB directly
586
+ logger.debug("truthound DuckDB not available, using Polars fallback")
587
+ try:
588
+ import polars as pl
589
+
590
+ if not config.table and not config.query:
591
+ raise ValueError("table or query is required for DuckDB")
592
+
593
+ query = config.query or f"SELECT * FROM {config.table}"
594
+ # Use read_database_uri for DuckDB connections
595
+ try:
596
+ df = pl.read_database_uri(query, f"duckdb:///{database}")
597
+ except Exception as read_err:
598
+ raise ImportError(
599
+ f"Failed to read from DuckDB: {read_err}. "
600
+ "Install DuckDB connector with: pip install duckdb connectorx"
601
+ ) from read_err
602
+
603
+ # Return as PolarsDataSource for consistency
604
+ from truthound.datasources import PolarsDataSource
605
+ return PolarsDataSource(df, name=config.name or database)
606
+
607
+ except ImportError as ie:
608
+ raise ImportError(
609
+ f"DuckDB support requires additional packages. {ie}"
610
+ ) from ie
611
+
612
+
613
+ class PostgreSQLSourceCreator(DataSourceCreator):
614
+ """Creator for PostgreSQL data sources.
615
+
616
+ Updated for truthound 2.x API:
617
+ - Uses truthound.datasources.sql.postgresql.PostgreSQLDataSource
618
+ - Supports PostgreSQLDataSourceConfig for advanced options including:
619
+ - sslmode, application_name, pool_size, query_timeout
620
+ """
621
+
622
+ def can_create(self, config: SourceConfig) -> bool:
623
+ return config.source_type.lower() == SourceType.POSTGRESQL
624
+
625
+ def create(self, config: SourceConfig) -> Any:
626
+ try:
627
+ # Try new truthound 2.x API with explicit import path
628
+ from truthound.datasources.sql.postgresql import (
629
+ PostgreSQLDataSource,
630
+ PostgreSQLDataSourceConfig,
631
+ )
632
+
633
+ # Use connection string if provided
634
+ if config.connection_string:
635
+ if not config.table and not config.query:
636
+ raise ValueError("table or query is required")
637
+ return PostgreSQLDataSource.from_connection_string(
638
+ connection_string=config.connection_string,
639
+ table=config.table,
640
+ query=config.query,
641
+ schema_name=config.schema_name,
642
+ )
643
+
644
+ # Use individual parameters
645
+ if not config.host or not config.database:
646
+ raise ValueError("host and database are required for PostgreSQL")
647
+
648
+ # Build PostgreSQLDataSourceConfig for advanced options
649
+ pg_config = PostgreSQLDataSourceConfig(
650
+ host=config.host,
651
+ port=config.port or 5432,
652
+ database=config.database,
653
+ user=config.user or "postgres",
654
+ password=config.password,
655
+ sslmode=config.extra.get("sslmode", "prefer") if config.extra else "prefer",
656
+ application_name=config.extra.get("application_name", "truthound-dashboard") if config.extra else "truthound-dashboard",
657
+ schema_name=config.schema_name or "public",
658
+ pool_size=config.pool_size or 10,
659
+ query_timeout=config.query_timeout or 300.0,
660
+ )
661
+
662
+ if config.table:
663
+ return PostgreSQLDataSource(table=config.table, config=pg_config)
664
+ elif config.query:
665
+ return PostgreSQLDataSource(query=config.query, config=pg_config)
666
+ else:
667
+ raise ValueError("table or query is required")
668
+
669
+ except ImportError:
670
+ # Fallback: Try older import path
671
+ from truthound.datasources.sql import PostgreSQLDataSource
672
+
673
+ if config.connection_string:
674
+ if not config.table and not config.query:
675
+ raise ValueError("table or query is required")
676
+ return PostgreSQLDataSource.from_connection_string(
677
+ connection_string=config.connection_string,
678
+ table=config.table,
679
+ query=config.query,
680
+ schema_name=config.schema_name,
681
+ )
682
+
683
+ if not config.host or not config.database:
684
+ raise ValueError("host and database are required for PostgreSQL")
685
+
686
+ kwargs: dict[str, Any] = {
687
+ "host": config.host,
688
+ "database": config.database,
689
+ }
690
+
691
+ if config.table:
692
+ kwargs["table"] = config.table
693
+ elif config.query:
694
+ kwargs["query"] = config.query
695
+ else:
696
+ raise ValueError("table or query is required")
697
+
698
+ if config.port:
699
+ kwargs["port"] = config.port
700
+ if config.user:
701
+ kwargs["user"] = config.user
702
+ if config.password:
703
+ kwargs["password"] = config.password
704
+ if config.schema_name:
705
+ kwargs["schema_name"] = config.schema_name
706
+
707
+ return PostgreSQLDataSource(**kwargs)
708
+
709
+
710
+ class MySQLSourceCreator(DataSourceCreator):
711
+ """Creator for MySQL data sources.
712
+
713
+ Updated for truthound 2.x API:
714
+ - Uses truthound.datasources.sql.mysql.MySQLDataSource
715
+ - Supports MySQLDataSourceConfig for advanced options
716
+ """
717
+
718
+ def can_create(self, config: SourceConfig) -> bool:
719
+ return config.source_type.lower() == SourceType.MYSQL
720
+
721
+ def create(self, config: SourceConfig) -> Any:
722
+ try:
723
+ # Try new truthound 2.x API with explicit import path
724
+ from truthound.datasources.sql.mysql import (
725
+ MySQLDataSource,
726
+ MySQLDataSourceConfig,
727
+ )
728
+
729
+ if config.connection_string:
730
+ return MySQLDataSource.from_connection_string(
731
+ connection_string=config.connection_string,
732
+ table=config.table,
733
+ )
734
+
735
+ if not config.host or not config.database:
736
+ raise ValueError("host and database are required for MySQL")
737
+
738
+ # Build MySQLDataSourceConfig for advanced options
739
+ mysql_config = MySQLDataSourceConfig(
740
+ host=config.host,
741
+ port=config.port or 3306,
742
+ database=config.database,
743
+ user=config.user or "root",
744
+ password=config.password,
745
+ charset=config.extra.get("charset", "utf8mb4") if config.extra else "utf8mb4",
746
+ autocommit=config.extra.get("autocommit", True) if config.extra else True,
747
+ )
748
+
749
+ if config.table:
750
+ return MySQLDataSource(table=config.table, config=mysql_config)
751
+ elif config.query:
752
+ return MySQLDataSource(query=config.query, config=mysql_config)
753
+ else:
754
+ raise ValueError("table or query is required")
755
+
756
+ except ImportError:
757
+ # Fallback: Try older import path
758
+ from truthound.datasources.sql import MySQLDataSource
759
+
760
+ if config.connection_string:
761
+ return MySQLDataSource.from_connection_string(
762
+ connection_string=config.connection_string,
763
+ table=config.table,
764
+ )
765
+
766
+ if not config.host or not config.database:
767
+ raise ValueError("host and database are required for MySQL")
768
+
769
+ kwargs: dict[str, Any] = {
770
+ "host": config.host,
771
+ "database": config.database,
772
+ }
773
+
774
+ if config.table:
775
+ kwargs["table"] = config.table
776
+ elif config.query:
777
+ kwargs["query"] = config.query
778
+ else:
779
+ raise ValueError("table or query is required")
780
+
781
+ if config.port:
782
+ kwargs["port"] = config.port
783
+ if config.user:
784
+ kwargs["user"] = config.user
785
+ if config.password:
786
+ kwargs["password"] = config.password
787
+
788
+ return MySQLDataSource(**kwargs)
789
+
790
+
791
+ class BigQuerySourceCreator(DataSourceCreator):
792
+ """Creator for BigQuery data sources.
793
+
794
+ Updated for truthound 2.x API:
795
+ - Uses truthound.datasources.sql.bigquery.BigQueryDataSource
796
+ - Supports BigQueryConfig for cost control and advanced options
797
+ """
798
+
799
+ def can_create(self, config: SourceConfig) -> bool:
800
+ return config.source_type.lower() == SourceType.BIGQUERY
801
+
802
+ def create(self, config: SourceConfig) -> Any:
803
+ if not config.project:
804
+ raise ValueError("project is required for BigQuery")
805
+
806
+ try:
807
+ # Try new truthound 2.x API with explicit import path
808
+ from truthound.datasources.sql.bigquery import (
809
+ BigQueryDataSource,
810
+ BigQueryConfig,
811
+ )
812
+
813
+ # Build BigQueryConfig for advanced options
814
+ bq_config = BigQueryConfig(
815
+ dataset=config.dataset,
816
+ location=config.extra.get("location") if config.extra else None,
817
+ use_legacy_sql=config.extra.get("use_legacy_sql", False) if config.extra else False,
818
+ maximum_bytes_billed=config.extra.get("maximum_bytes_billed") if config.extra else None,
819
+ job_timeout=config.query_timeout or 300,
820
+ )
821
+
822
+ if config.table:
823
+ return BigQueryDataSource(
824
+ table=config.table,
825
+ project=config.project,
826
+ credentials_path=config.credentials_path,
827
+ config=bq_config,
828
+ )
829
+ elif config.query:
830
+ return BigQueryDataSource(
831
+ query=config.query,
832
+ project=config.project,
833
+ credentials_path=config.credentials_path,
834
+ config=bq_config,
835
+ )
836
+ else:
837
+ raise ValueError("table or query is required for BigQuery")
838
+
839
+ except ImportError:
840
+ # Fallback: Try older import path
841
+ from truthound.datasources.sql import BigQueryDataSource
842
+
843
+ kwargs: dict[str, Any] = {"project": config.project}
844
+
845
+ if config.dataset:
846
+ kwargs["dataset"] = config.dataset
847
+ if config.table:
848
+ kwargs["table"] = config.table
849
+ elif config.query:
850
+ kwargs["query"] = config.query
851
+ if config.credentials_path:
852
+ kwargs["credentials_path"] = config.credentials_path
853
+
854
+ return BigQueryDataSource(**kwargs)
855
+
856
+
857
+ class SnowflakeSourceCreator(DataSourceCreator):
858
+ """Creator for Snowflake data sources.
859
+
860
+ Updated for truthound 2.x API:
861
+ - Uses truthound.datasources.sql.snowflake.SnowflakeDataSource
862
+ - Supports SnowflakeConfig for advanced auth options
863
+ """
864
+
865
+ def can_create(self, config: SourceConfig) -> bool:
866
+ return config.source_type.lower() == SourceType.SNOWFLAKE
867
+
868
+ def create(self, config: SourceConfig) -> Any:
869
+ if not config.account:
870
+ raise ValueError("account is required for Snowflake")
871
+
872
+ try:
873
+ # Try new truthound 2.x API with explicit import path
874
+ from truthound.datasources.sql.snowflake import (
875
+ SnowflakeDataSource,
876
+ SnowflakeConfig,
877
+ )
878
+
879
+ # Build SnowflakeConfig for advanced options
880
+ sf_config = SnowflakeConfig(
881
+ account=config.account,
882
+ user=config.user,
883
+ password=config.password,
884
+ database=config.database,
885
+ schema_name=config.schema_name or "PUBLIC",
886
+ warehouse=config.warehouse,
887
+ role=config.extra.get("role") if config.extra else None,
888
+ authenticator=config.extra.get("authenticator", "snowflake") if config.extra else "snowflake",
889
+ private_key_path=config.extra.get("private_key_path") if config.extra else None,
890
+ private_key_passphrase=config.extra.get("private_key_passphrase") if config.extra else None,
891
+ client_session_keep_alive=config.extra.get("client_session_keep_alive", True) if config.extra else True,
892
+ )
893
+
894
+ if config.table:
895
+ return SnowflakeDataSource(table=config.table, config=sf_config)
896
+ elif config.query:
897
+ return SnowflakeDataSource(query=config.query, config=sf_config)
898
+ else:
899
+ raise ValueError("table or query is required")
900
+
901
+ except ImportError:
902
+ # Fallback: Try older import path
903
+ from truthound.datasources.sql import SnowflakeDataSource
904
+
905
+ kwargs: dict[str, Any] = {"account": config.account}
906
+
907
+ if config.table:
908
+ kwargs["table"] = config.table
909
+ elif config.query:
910
+ kwargs["query"] = config.query
911
+ else:
912
+ raise ValueError("table or query is required")
913
+
914
+ if config.database:
915
+ kwargs["database"] = config.database
916
+ if config.schema_name:
917
+ kwargs["schema"] = config.schema_name
918
+ if config.warehouse:
919
+ kwargs["warehouse"] = config.warehouse
920
+ if config.user:
921
+ kwargs["user"] = config.user
922
+ if config.password:
923
+ kwargs["password"] = config.password
924
+
925
+ return SnowflakeDataSource(**kwargs)
926
+
927
+
928
+ class RedshiftSourceCreator(DataSourceCreator):
929
+ """Creator for Redshift data sources."""
930
+
931
+ def can_create(self, config: SourceConfig) -> bool:
932
+ return config.source_type.lower() == SourceType.REDSHIFT
933
+
934
+ def create(self, config: SourceConfig) -> Any:
935
+ from truthound.datasources.sql import RedshiftDataSource
936
+
937
+ if not config.host or not config.database:
938
+ raise ValueError("host and database are required for Redshift")
939
+
940
+ kwargs: dict[str, Any] = {
941
+ "host": config.host,
942
+ "database": config.database,
943
+ }
944
+
945
+ if config.table:
946
+ kwargs["table"] = config.table
947
+ elif config.query:
948
+ kwargs["query"] = config.query
949
+ else:
950
+ raise ValueError("table or query is required")
951
+
952
+ if config.port:
953
+ kwargs["port"] = config.port
954
+ if config.user:
955
+ kwargs["user"] = config.user
956
+ if config.password:
957
+ kwargs["password"] = config.password
958
+ if config.schema_name:
959
+ kwargs["schema"] = config.schema_name
960
+ if config.cluster_identifier:
961
+ kwargs["cluster_identifier"] = config.cluster_identifier
962
+ if config.iam_auth:
963
+ kwargs["iam_auth"] = True
964
+
965
+ return RedshiftDataSource(**kwargs)
966
+
967
+
968
+ class DatabricksSourceCreator(DataSourceCreator):
969
+ """Creator for Databricks data sources.
970
+
971
+ Updated for truthound 2.x API:
972
+ - Uses truthound.datasources.sql.databricks.DatabricksDataSource
973
+ - Supports DatabricksConfig for Unity Catalog and OAuth
974
+ """
975
+
976
+ def can_create(self, config: SourceConfig) -> bool:
977
+ return config.source_type.lower() == SourceType.DATABRICKS
978
+
979
+ def create(self, config: SourceConfig) -> Any:
980
+ if not config.host or not config.http_path:
981
+ raise ValueError("host and http_path are required for Databricks")
982
+
983
+ try:
984
+ # Try new truthound 2.x API with explicit import path
985
+ from truthound.datasources.sql.databricks import (
986
+ DatabricksDataSource,
987
+ DatabricksConfig,
988
+ )
989
+
990
+ # Build DatabricksConfig for advanced options
991
+ db_config = DatabricksConfig(
992
+ host=config.host,
993
+ http_path=config.http_path,
994
+ access_token=config.access_token,
995
+ catalog=config.catalog,
996
+ use_cloud_fetch=config.extra.get("use_cloud_fetch", True) if config.extra else True,
997
+ max_download_threads=config.extra.get("max_download_threads", 10) if config.extra else 10,
998
+ client_id=config.extra.get("client_id") if config.extra else None,
999
+ client_secret=config.extra.get("client_secret") if config.extra else None,
1000
+ use_oauth=config.extra.get("use_oauth", False) if config.extra else False,
1001
+ )
1002
+
1003
+ if config.table:
1004
+ return DatabricksDataSource(table=config.table, schema=config.schema_name, config=db_config)
1005
+ elif config.query:
1006
+ return DatabricksDataSource(query=config.query, config=db_config)
1007
+ else:
1008
+ raise ValueError("table or query is required")
1009
+
1010
+ except ImportError:
1011
+ # Fallback: Try older import path
1012
+ from truthound.datasources.sql import DatabricksDataSource
1013
+
1014
+ kwargs: dict[str, Any] = {
1015
+ "host": config.host,
1016
+ "http_path": config.http_path,
1017
+ }
1018
+
1019
+ if config.table:
1020
+ kwargs["table"] = config.table
1021
+ elif config.query:
1022
+ kwargs["query"] = config.query
1023
+ else:
1024
+ raise ValueError("table or query is required")
1025
+
1026
+ if config.access_token:
1027
+ kwargs["access_token"] = config.access_token
1028
+ if config.catalog:
1029
+ kwargs["catalog"] = config.catalog
1030
+ if config.schema_name:
1031
+ kwargs["schema"] = config.schema_name
1032
+
1033
+ return DatabricksDataSource(**kwargs)
1034
+
1035
+
1036
+ class OracleSourceCreator(DataSourceCreator):
1037
+ """Creator for Oracle data sources."""
1038
+
1039
+ def can_create(self, config: SourceConfig) -> bool:
1040
+ return config.source_type.lower() == SourceType.ORACLE
1041
+
1042
+ def create(self, config: SourceConfig) -> Any:
1043
+ from truthound.datasources.sql import OracleDataSource
1044
+
1045
+ kwargs: dict[str, Any] = {}
1046
+
1047
+ if config.table:
1048
+ kwargs["table"] = config.table
1049
+ elif config.query:
1050
+ kwargs["query"] = config.query
1051
+ else:
1052
+ raise ValueError("table or query is required")
1053
+
1054
+ if config.host:
1055
+ kwargs["host"] = config.host
1056
+ if config.port:
1057
+ kwargs["port"] = config.port
1058
+ if config.service_name:
1059
+ kwargs["service_name"] = config.service_name
1060
+ elif config.sid:
1061
+ kwargs["sid"] = config.sid
1062
+ if config.user:
1063
+ kwargs["user"] = config.user
1064
+ if config.password:
1065
+ kwargs["password"] = config.password
1066
+
1067
+ return OracleDataSource(**kwargs)
1068
+
1069
+
1070
+ class SQLServerSourceCreator(DataSourceCreator):
1071
+ """Creator for SQL Server data sources."""
1072
+
1073
+ def can_create(self, config: SourceConfig) -> bool:
1074
+ return config.source_type.lower() == SourceType.SQLSERVER
1075
+
1076
+ def create(self, config: SourceConfig) -> Any:
1077
+ from truthound.datasources.sql import SQLServerDataSource
1078
+
1079
+ kwargs: dict[str, Any] = {}
1080
+
1081
+ if config.table:
1082
+ kwargs["table"] = config.table
1083
+ elif config.query:
1084
+ kwargs["query"] = config.query
1085
+ else:
1086
+ raise ValueError("table or query is required")
1087
+
1088
+ if config.host:
1089
+ kwargs["host"] = config.host
1090
+ if config.port:
1091
+ kwargs["port"] = config.port
1092
+ if config.database:
1093
+ kwargs["database"] = config.database
1094
+ if config.user:
1095
+ kwargs["user"] = config.user
1096
+ if config.password:
1097
+ kwargs["password"] = config.password
1098
+ if config.schema_name:
1099
+ kwargs["schema"] = config.schema_name
1100
+ if config.trusted_connection:
1101
+ kwargs["trusted_connection"] = True
1102
+
1103
+ return SQLServerDataSource(**kwargs)
1104
+
1105
+
1106
+ # =============================================================================
1107
+ # Async Source Creators
1108
+ # =============================================================================
1109
+
1110
+
1111
+ class MongoDBSourceCreator(DataSourceCreator):
1112
+ """Creator for MongoDB data sources (async)."""
1113
+
1114
+ def can_create(self, config: SourceConfig) -> bool:
1115
+ return config.source_type.lower() == SourceType.MONGODB
1116
+
1117
+ def create(self, config: SourceConfig) -> Any:
1118
+ raise ValueError(
1119
+ "MongoDB requires async creation. Use create_async() instead."
1120
+ )
1121
+
1122
+ async def create_async(self, config: SourceConfig) -> Any:
1123
+ from truthound.datasources import from_mongodb
1124
+
1125
+ if not config.connection_string and not config.host:
1126
+ raise ValueError("connection_string or host is required for MongoDB")
1127
+ if not config.database:
1128
+ raise ValueError("database is required for MongoDB")
1129
+ if not config.collection:
1130
+ raise ValueError("collection is required for MongoDB")
1131
+
1132
+ connection_string = config.connection_string
1133
+ if not connection_string:
1134
+ connection_string = f"mongodb://{config.host}:{config.port or 27017}"
1135
+
1136
+ return await from_mongodb(
1137
+ connection_string=connection_string,
1138
+ database=config.database,
1139
+ collection=config.collection,
1140
+ )
1141
+
1142
+
1143
+ class ElasticsearchSourceCreator(DataSourceCreator):
1144
+ """Creator for Elasticsearch data sources (async)."""
1145
+
1146
+ def can_create(self, config: SourceConfig) -> bool:
1147
+ return config.source_type.lower() == SourceType.ELASTICSEARCH
1148
+
1149
+ def create(self, config: SourceConfig) -> Any:
1150
+ raise ValueError(
1151
+ "Elasticsearch requires async creation. Use create_async() instead."
1152
+ )
1153
+
1154
+ async def create_async(self, config: SourceConfig) -> Any:
1155
+ from truthound.datasources import from_elasticsearch
1156
+
1157
+ if not config.host:
1158
+ raise ValueError("host is required for Elasticsearch")
1159
+ if not config.index:
1160
+ raise ValueError("index is required for Elasticsearch")
1161
+
1162
+ hosts = [config.host]
1163
+ if "://" not in config.host:
1164
+ hosts = [f"http://{config.host}:{config.port or 9200}"]
1165
+
1166
+ return await from_elasticsearch(
1167
+ hosts=hosts,
1168
+ index=config.index,
1169
+ )
1170
+
1171
+
1172
+ class KafkaSourceCreator(DataSourceCreator):
1173
+ """Creator for Kafka data sources (async)."""
1174
+
1175
+ def can_create(self, config: SourceConfig) -> bool:
1176
+ return config.source_type.lower() == SourceType.KAFKA
1177
+
1178
+ def create(self, config: SourceConfig) -> Any:
1179
+ raise ValueError(
1180
+ "Kafka requires async creation. Use create_async() instead."
1181
+ )
1182
+
1183
+ async def create_async(self, config: SourceConfig) -> Any:
1184
+ from truthound.datasources import from_kafka
1185
+
1186
+ if not config.bootstrap_servers:
1187
+ raise ValueError("bootstrap_servers is required for Kafka")
1188
+ if not config.topic:
1189
+ raise ValueError("topic is required for Kafka")
1190
+
1191
+ kwargs: dict[str, Any] = {
1192
+ "bootstrap_servers": config.bootstrap_servers,
1193
+ "topic": config.topic,
1194
+ }
1195
+
1196
+ if config.group_id:
1197
+ kwargs["group_id"] = config.group_id
1198
+ if config.max_messages:
1199
+ kwargs["max_messages"] = config.max_messages
1200
+
1201
+ return await from_kafka(**kwargs)
1202
+
1203
+
1204
+ # =============================================================================
1205
+ # Main Factory
1206
+ # =============================================================================
1207
+
1208
+
1209
+ class DataSourceFactory:
1210
+ """Factory for creating truthound DataSource objects.
1211
+
1212
+ This factory uses the Strategy pattern to delegate creation
1213
+ to specialized creator classes. This design provides:
1214
+ - Extensibility: Add new creators without modifying factory
1215
+ - Testability: Easy to mock individual creators
1216
+ - Loose coupling: Truthound imports are isolated in creators
1217
+
1218
+ Example:
1219
+ factory = DataSourceFactory()
1220
+
1221
+ # From file
1222
+ source = factory.create(SourceConfig(source_type="csv", path="data.csv"))
1223
+
1224
+ # From PostgreSQL
1225
+ source = factory.create(SourceConfig(
1226
+ source_type="postgresql",
1227
+ table="users",
1228
+ host="localhost",
1229
+ database="mydb",
1230
+ ))
1231
+
1232
+ # From existing DB model config
1233
+ source = factory.create_from_dict(db_source.config)
1234
+ """
1235
+
1236
+ def __init__(self) -> None:
1237
+ """Initialize factory with default creators."""
1238
+ self._creators: list[DataSourceCreator] = [
1239
+ FileSourceCreator(),
1240
+ SQLiteSourceCreator(),
1241
+ DuckDBSourceCreator(),
1242
+ PostgreSQLSourceCreator(),
1243
+ MySQLSourceCreator(),
1244
+ BigQuerySourceCreator(),
1245
+ SnowflakeSourceCreator(),
1246
+ RedshiftSourceCreator(),
1247
+ DatabricksSourceCreator(),
1248
+ OracleSourceCreator(),
1249
+ SQLServerSourceCreator(),
1250
+ MongoDBSourceCreator(),
1251
+ ElasticsearchSourceCreator(),
1252
+ KafkaSourceCreator(),
1253
+ ]
1254
+
1255
+ def register_creator(self, creator: DataSourceCreator) -> None:
1256
+ """Register a custom data source creator.
1257
+
1258
+ Args:
1259
+ creator: DataSourceCreator instance.
1260
+ """
1261
+ self._creators.insert(0, creator)
1262
+
1263
+ def create(self, config: SourceConfig) -> Any:
1264
+ """Create a DataSource from configuration.
1265
+
1266
+ Args:
1267
+ config: Source configuration.
1268
+
1269
+ Returns:
1270
+ Truthound DataSource instance.
1271
+
1272
+ Raises:
1273
+ ValueError: If source type is not supported or config is invalid.
1274
+ ImportError: If required driver is not installed.
1275
+ """
1276
+ source_type = config.source_type.lower()
1277
+
1278
+ # Check for async sources
1279
+ if SourceType.is_async_type(source_type):
1280
+ raise ValueError(
1281
+ f"Async source type '{source_type}' requires async creation. "
1282
+ "Use create_async() instead."
1283
+ )
1284
+
1285
+ # Find appropriate creator
1286
+ for creator in self._creators:
1287
+ if creator.can_create(config):
1288
+ return creator.create(config)
1289
+
1290
+ raise ValueError(f"Unsupported source type: {source_type}")
1291
+
1292
+ def create_from_dict(self, data: dict[str, Any]) -> Any:
1293
+ """Create a DataSource from a dictionary configuration.
1294
+
1295
+ Args:
1296
+ data: Dictionary with source configuration.
1297
+
1298
+ Returns:
1299
+ Truthound DataSource instance.
1300
+ """
1301
+ config = SourceConfig.from_dict(data)
1302
+ return self.create(config)
1303
+
1304
+ async def create_async(self, config: SourceConfig) -> Any:
1305
+ """Create an async DataSource from configuration.
1306
+
1307
+ Use this method for NoSQL and streaming sources that
1308
+ require async initialization.
1309
+
1310
+ Args:
1311
+ config: Source configuration.
1312
+
1313
+ Returns:
1314
+ Truthound async DataSource instance.
1315
+
1316
+ Raises:
1317
+ ValueError: If source type doesn't support async.
1318
+ """
1319
+ source_type = config.source_type.lower()
1320
+
1321
+ for creator in self._creators:
1322
+ if creator.can_create(config):
1323
+ if hasattr(creator, "create_async"):
1324
+ return await creator.create_async(config)
1325
+ raise ValueError(
1326
+ f"Source type '{source_type}' doesn't require async creation. "
1327
+ "Use create() instead."
1328
+ )
1329
+
1330
+ raise ValueError(f"Unsupported source type: {source_type}")
1331
+
1332
+
1333
+ # =============================================================================
1334
+ # Singleton and Convenience Functions
1335
+ # =============================================================================
1336
+
1337
+
1338
+ _factory: DataSourceFactory | None = None
1339
+
1340
+
1341
+ def get_datasource_factory() -> DataSourceFactory:
1342
+ """Get singleton DataSourceFactory instance.
1343
+
1344
+ Returns:
1345
+ DataSourceFactory singleton.
1346
+ """
1347
+ global _factory
1348
+ if _factory is None:
1349
+ _factory = DataSourceFactory()
1350
+ return _factory
1351
+
1352
+
1353
+ def create_datasource(config: dict[str, Any] | SourceConfig) -> Any:
1354
+ """Convenience function to create a data source.
1355
+
1356
+ Args:
1357
+ config: Source configuration (dict or SourceConfig).
1358
+
1359
+ Returns:
1360
+ Truthound DataSource instance.
1361
+ """
1362
+ factory = get_datasource_factory()
1363
+
1364
+ if isinstance(config, dict):
1365
+ return factory.create_from_dict(config)
1366
+ return factory.create(config)
1367
+
1368
+
1369
+ async def create_datasource_async(config: dict[str, Any] | SourceConfig) -> Any:
1370
+ """Convenience function to create an async data source.
1371
+
1372
+ Args:
1373
+ config: Source configuration (dict or SourceConfig).
1374
+
1375
+ Returns:
1376
+ Truthound async DataSource instance.
1377
+ """
1378
+ factory = get_datasource_factory()
1379
+
1380
+ if isinstance(config, dict):
1381
+ config = SourceConfig.from_dict(config)
1382
+ return await factory.create_async(config)
1383
+
1384
+
1385
+ def get_source_path_or_datasource(
1386
+ source_type: str,
1387
+ config: dict[str, Any],
1388
+ ) -> str | Any:
1389
+ """Get either a file path or DataSource based on source type.
1390
+
1391
+ This is a convenience function for backward compatibility.
1392
+ For file-based sources, returns the path string.
1393
+ For database sources, returns a DataSource object.
1394
+
1395
+ Args:
1396
+ source_type: Source type string.
1397
+ config: Source configuration dict.
1398
+
1399
+ Returns:
1400
+ File path string or DataSource object.
1401
+ """
1402
+ if SourceType.is_file_type(source_type):
1403
+ return config.get("path", "")
1404
+
1405
+ # Create DataSource for non-file sources
1406
+ full_config = {"type": source_type, **config}
1407
+ return create_datasource(full_config)
1408
+
1409
+
1410
+ # =============================================================================
1411
+ # Utility Functions
1412
+ # =============================================================================
1413
+
1414
+
1415
+ def detect_file_type(path: str | Path) -> str | None:
1416
+ """Detect file type from path extension.
1417
+
1418
+ Args:
1419
+ path: File path.
1420
+
1421
+ Returns:
1422
+ File type string or None if unknown.
1423
+ """
1424
+ ext_map = {
1425
+ ".csv": "csv",
1426
+ ".parquet": "parquet",
1427
+ ".pq": "parquet",
1428
+ ".json": "json",
1429
+ ".ndjson": "ndjson",
1430
+ ".jsonl": "jsonl",
1431
+ }
1432
+
1433
+ path = Path(path)
1434
+ ext = path.suffix.lower()
1435
+ return ext_map.get(ext)
1436
+
1437
+
1438
+ def is_truthound_available() -> bool:
1439
+ """Check if truthound library is available.
1440
+
1441
+ Returns:
1442
+ True if truthound can be imported.
1443
+ """
1444
+ try:
1445
+ import truthound
1446
+ return True
1447
+ except ImportError:
1448
+ return False
1449
+
1450
+
1451
+ def get_truthound_version() -> str | None:
1452
+ """Get truthound library version if available.
1453
+
1454
+ Returns:
1455
+ Version string or None.
1456
+ """
1457
+ try:
1458
+ import truthound
1459
+ return getattr(truthound, "__version__", None)
1460
+ except ImportError:
1461
+ return None
1462
+
1463
+
1464
+ def get_datasource_auto(
1465
+ data: Any,
1466
+ *,
1467
+ table: str | None = None,
1468
+ query: str | None = None,
1469
+ **kwargs: Any,
1470
+ ) -> Any:
1471
+ """Auto-detect and create a DataSource using truthound's get_datasource.
1472
+
1473
+ This function wraps truthound.datasources.get_datasource() for auto-detection
1474
+ of data source types. It's the recommended way to create DataSources when
1475
+ the type can be inferred from the input.
1476
+
1477
+ Args:
1478
+ data: One of:
1479
+ - Polars DataFrame/LazyFrame
1480
+ - Pandas DataFrame
1481
+ - PySpark DataFrame
1482
+ - Dictionary (column -> values)
1483
+ - File path string (csv, parquet, json, etc.)
1484
+ - SQL connection string (postgresql://, mysql://, etc.)
1485
+ table: Table name for SQL sources.
1486
+ query: Custom SQL query for SQL sources.
1487
+ **kwargs: Additional arguments passed to the DataSource constructor.
1488
+
1489
+ Returns:
1490
+ Appropriate DataSource for the input data type.
1491
+
1492
+ Raises:
1493
+ ImportError: If truthound is not installed.
1494
+ ValueError: If data type cannot be detected.
1495
+
1496
+ Example:
1497
+ # Auto-detect from Polars DataFrame
1498
+ ds = get_datasource_auto(pl_df)
1499
+
1500
+ # Auto-detect from file path
1501
+ ds = get_datasource_auto("data.parquet")
1502
+
1503
+ # Auto-detect from connection string
1504
+ ds = get_datasource_auto(
1505
+ "postgresql://user:pass@localhost/db",
1506
+ table="users",
1507
+ )
1508
+ """
1509
+ try:
1510
+ from truthound.datasources import get_datasource
1511
+ return get_datasource(data, table=table, query=query, **kwargs)
1512
+ except ImportError:
1513
+ raise ImportError(
1514
+ "truthound is not installed. Install with: pip install truthound"
1515
+ )
1516
+
1517
+
1518
+ # =============================================================================
1519
+ # Connection Testing
1520
+ # =============================================================================
1521
+
1522
+
1523
+ async def test_connection(config: SourceConfig | dict[str, Any]) -> dict[str, Any]:
1524
+ """Test connection to a data source.
1525
+
1526
+ This function attempts to connect to the data source and retrieve
1527
+ basic metadata to verify connectivity.
1528
+
1529
+ Args:
1530
+ config: Source configuration (SourceConfig or dict).
1531
+
1532
+ Returns:
1533
+ Dictionary with connection test results:
1534
+ - success: bool - Whether connection succeeded
1535
+ - message: str - Success or error message
1536
+ - metadata: dict | None - Source metadata if successful
1537
+ - name: str - Source name
1538
+ - row_count: int | None - Row count if available
1539
+ - columns: list[str] | None - Column names if available
1540
+ - capabilities: list[str] | None - Source capabilities
1541
+
1542
+ Example:
1543
+ result = await test_connection({
1544
+ "type": "postgresql",
1545
+ "host": "localhost",
1546
+ "database": "mydb",
1547
+ "table": "users",
1548
+ })
1549
+ if result["success"]:
1550
+ print(f"Connected! Found {result['metadata']['row_count']} rows")
1551
+ else:
1552
+ print(f"Connection failed: {result['message']}")
1553
+ """
1554
+ import asyncio
1555
+ from concurrent.futures import ThreadPoolExecutor
1556
+
1557
+ if isinstance(config, dict):
1558
+ config = SourceConfig.from_dict(config)
1559
+
1560
+ result = {
1561
+ "success": False,
1562
+ "message": "",
1563
+ "metadata": None,
1564
+ }
1565
+
1566
+ try:
1567
+ factory = get_datasource_factory()
1568
+
1569
+ # Create datasource (may be async for MongoDB, ES, Kafka)
1570
+ if SourceType.is_async_type(config.source_type):
1571
+ datasource = await factory.create_async(config)
1572
+ else:
1573
+ # Run sync creation in thread pool to not block
1574
+ loop = asyncio.get_event_loop()
1575
+ with ThreadPoolExecutor(max_workers=1) as executor:
1576
+ datasource = await loop.run_in_executor(
1577
+ executor, factory.create, config
1578
+ )
1579
+
1580
+ # Extract metadata
1581
+ metadata: dict[str, Any] = {
1582
+ "name": getattr(datasource, "name", config.name or "unknown"),
1583
+ }
1584
+
1585
+ # Try to get row count
1586
+ if hasattr(datasource, "row_count"):
1587
+ try:
1588
+ row_count = datasource.row_count
1589
+ metadata["row_count"] = row_count
1590
+ except Exception:
1591
+ metadata["row_count"] = None
1592
+
1593
+ # Try to get columns
1594
+ if hasattr(datasource, "columns"):
1595
+ try:
1596
+ columns = datasource.columns
1597
+ metadata["columns"] = columns
1598
+ except Exception:
1599
+ metadata["columns"] = None
1600
+
1601
+ # Try to get capabilities
1602
+ if hasattr(datasource, "capabilities"):
1603
+ try:
1604
+ capabilities = datasource.capabilities
1605
+ metadata["capabilities"] = [c.name for c in capabilities]
1606
+ except Exception:
1607
+ metadata["capabilities"] = None
1608
+
1609
+ result["success"] = True
1610
+ result["message"] = "Connection successful"
1611
+ result["metadata"] = metadata
1612
+
1613
+ except FileNotFoundError as e:
1614
+ result["message"] = f"File not found: {e}"
1615
+ except ImportError as e:
1616
+ result["message"] = f"Missing dependency: {e}"
1617
+ except ValueError as e:
1618
+ result["message"] = f"Configuration error: {e}"
1619
+ except Exception as e:
1620
+ result["message"] = f"Connection failed: {type(e).__name__}: {e}"
1621
+
1622
+ return result
1623
+
1624
+
1625
+ def get_source_capabilities(
1626
+ source_type: str,
1627
+ ) -> set[str]:
1628
+ """Get the capabilities supported by a source type.
1629
+
1630
+ Args:
1631
+ source_type: Source type string (e.g., "postgresql", "csv").
1632
+
1633
+ Returns:
1634
+ Set of capability names supported by the source type.
1635
+ """
1636
+ from truthound_dashboard.core.interfaces import DataSourceCapability
1637
+
1638
+ # Map source types to their capabilities
1639
+ capability_map: dict[str, set[DataSourceCapability]] = {
1640
+ # File sources
1641
+ "file": {DataSourceCapability.SCHEMA_INFERENCE, DataSourceCapability.LAZY_EVALUATION},
1642
+ "csv": {DataSourceCapability.SCHEMA_INFERENCE, DataSourceCapability.LAZY_EVALUATION},
1643
+ "parquet": {DataSourceCapability.SCHEMA_INFERENCE, DataSourceCapability.LAZY_EVALUATION, DataSourceCapability.ROW_COUNT},
1644
+ "json": {DataSourceCapability.SCHEMA_INFERENCE, DataSourceCapability.LAZY_EVALUATION},
1645
+ "ndjson": {DataSourceCapability.SCHEMA_INFERENCE, DataSourceCapability.LAZY_EVALUATION, DataSourceCapability.STREAMING},
1646
+ "jsonl": {DataSourceCapability.SCHEMA_INFERENCE, DataSourceCapability.LAZY_EVALUATION, DataSourceCapability.STREAMING},
1647
+ # SQL sources
1648
+ "sqlite": {DataSourceCapability.SQL_PUSHDOWN, DataSourceCapability.ROW_COUNT, DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
1649
+ "postgresql": {DataSourceCapability.SQL_PUSHDOWN, DataSourceCapability.ROW_COUNT, DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
1650
+ "mysql": {DataSourceCapability.SQL_PUSHDOWN, DataSourceCapability.ROW_COUNT, DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
1651
+ "duckdb": {DataSourceCapability.SQL_PUSHDOWN, DataSourceCapability.ROW_COUNT, DataSourceCapability.SAMPLING, DataSourceCapability.LAZY_EVALUATION},
1652
+ # Cloud DW
1653
+ "bigquery": {DataSourceCapability.SQL_PUSHDOWN, DataSourceCapability.ROW_COUNT, DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
1654
+ "snowflake": {DataSourceCapability.SQL_PUSHDOWN, DataSourceCapability.ROW_COUNT, DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
1655
+ "redshift": {DataSourceCapability.SQL_PUSHDOWN, DataSourceCapability.ROW_COUNT, DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
1656
+ "databricks": {DataSourceCapability.SQL_PUSHDOWN, DataSourceCapability.ROW_COUNT, DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
1657
+ # Enterprise
1658
+ "oracle": {DataSourceCapability.SQL_PUSHDOWN, DataSourceCapability.ROW_COUNT, DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
1659
+ "sqlserver": {DataSourceCapability.SQL_PUSHDOWN, DataSourceCapability.ROW_COUNT, DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
1660
+ # NoSQL
1661
+ "mongodb": {DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
1662
+ "elasticsearch": {DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
1663
+ # Streaming
1664
+ "kafka": {DataSourceCapability.STREAMING, DataSourceCapability.CONNECTION_TEST},
1665
+ # DataFrame
1666
+ "polars": {DataSourceCapability.LAZY_EVALUATION, DataSourceCapability.ROW_COUNT, DataSourceCapability.SCHEMA_INFERENCE},
1667
+ "pandas": {DataSourceCapability.ROW_COUNT, DataSourceCapability.SCHEMA_INFERENCE},
1668
+ }
1669
+
1670
+ source_type_lower = source_type.lower()
1671
+ capabilities = capability_map.get(source_type_lower, set())
1672
+ return {c.name for c in capabilities}