truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/alerts.py +75 -86
- truthound_dashboard/api/anomaly.py +7 -13
- truthound_dashboard/api/cross_alerts.py +38 -52
- truthound_dashboard/api/drift.py +49 -59
- truthound_dashboard/api/drift_monitor.py +234 -79
- truthound_dashboard/api/enterprise_sampling.py +498 -0
- truthound_dashboard/api/history.py +57 -5
- truthound_dashboard/api/lineage.py +3 -48
- truthound_dashboard/api/maintenance.py +104 -49
- truthound_dashboard/api/mask.py +1 -2
- truthound_dashboard/api/middleware.py +2 -1
- truthound_dashboard/api/model_monitoring.py +435 -311
- truthound_dashboard/api/notifications.py +227 -191
- truthound_dashboard/api/notifications_advanced.py +21 -20
- truthound_dashboard/api/observability.py +586 -0
- truthound_dashboard/api/plugins.py +2 -433
- truthound_dashboard/api/profile.py +199 -37
- truthound_dashboard/api/quality_reporter.py +701 -0
- truthound_dashboard/api/reports.py +7 -16
- truthound_dashboard/api/router.py +66 -0
- truthound_dashboard/api/rule_suggestions.py +5 -5
- truthound_dashboard/api/scan.py +17 -19
- truthound_dashboard/api/schedules.py +85 -50
- truthound_dashboard/api/schema_evolution.py +6 -6
- truthound_dashboard/api/schema_watcher.py +667 -0
- truthound_dashboard/api/sources.py +98 -27
- truthound_dashboard/api/tiering.py +1323 -0
- truthound_dashboard/api/triggers.py +14 -11
- truthound_dashboard/api/validations.py +12 -11
- truthound_dashboard/api/versioning.py +1 -6
- truthound_dashboard/core/__init__.py +129 -3
- truthound_dashboard/core/actions/__init__.py +62 -0
- truthound_dashboard/core/actions/custom.py +426 -0
- truthound_dashboard/core/actions/notifications.py +910 -0
- truthound_dashboard/core/actions/storage.py +472 -0
- truthound_dashboard/core/actions/webhook.py +281 -0
- truthound_dashboard/core/anomaly.py +262 -67
- truthound_dashboard/core/anomaly_explainer.py +4 -3
- truthound_dashboard/core/backends/__init__.py +67 -0
- truthound_dashboard/core/backends/base.py +299 -0
- truthound_dashboard/core/backends/errors.py +191 -0
- truthound_dashboard/core/backends/factory.py +423 -0
- truthound_dashboard/core/backends/mock_backend.py +451 -0
- truthound_dashboard/core/backends/truthound_backend.py +718 -0
- truthound_dashboard/core/checkpoint/__init__.py +87 -0
- truthound_dashboard/core/checkpoint/adapters.py +814 -0
- truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
- truthound_dashboard/core/checkpoint/runner.py +270 -0
- truthound_dashboard/core/connections.py +645 -23
- truthound_dashboard/core/converters/__init__.py +14 -0
- truthound_dashboard/core/converters/truthound.py +620 -0
- truthound_dashboard/core/cross_alerts.py +540 -320
- truthound_dashboard/core/datasource_factory.py +1672 -0
- truthound_dashboard/core/drift_monitor.py +216 -20
- truthound_dashboard/core/enterprise_sampling.py +1291 -0
- truthound_dashboard/core/interfaces/__init__.py +225 -0
- truthound_dashboard/core/interfaces/actions.py +652 -0
- truthound_dashboard/core/interfaces/base.py +247 -0
- truthound_dashboard/core/interfaces/checkpoint.py +676 -0
- truthound_dashboard/core/interfaces/protocols.py +664 -0
- truthound_dashboard/core/interfaces/reporters.py +650 -0
- truthound_dashboard/core/interfaces/routing.py +646 -0
- truthound_dashboard/core/interfaces/triggers.py +619 -0
- truthound_dashboard/core/lineage.py +407 -71
- truthound_dashboard/core/model_monitoring.py +431 -3
- truthound_dashboard/core/notifications/base.py +4 -0
- truthound_dashboard/core/notifications/channels.py +501 -1203
- truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
- truthound_dashboard/core/notifications/deduplication/service.py +131 -348
- truthound_dashboard/core/notifications/dispatcher.py +202 -11
- truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
- truthound_dashboard/core/notifications/escalation/engine.py +168 -358
- truthound_dashboard/core/notifications/routing/__init__.py +88 -128
- truthound_dashboard/core/notifications/routing/engine.py +90 -317
- truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
- truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
- truthound_dashboard/core/notifications/throttling/builder.py +117 -255
- truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
- truthound_dashboard/core/phase5/collaboration.py +1 -1
- truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
- truthound_dashboard/core/quality_reporter.py +1359 -0
- truthound_dashboard/core/report_history.py +0 -6
- truthound_dashboard/core/reporters/__init__.py +175 -14
- truthound_dashboard/core/reporters/adapters.py +943 -0
- truthound_dashboard/core/reporters/base.py +0 -3
- truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
- truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
- truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
- truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
- truthound_dashboard/core/reporters/compat.py +266 -0
- truthound_dashboard/core/reporters/csv_reporter.py +2 -35
- truthound_dashboard/core/reporters/factory.py +526 -0
- truthound_dashboard/core/reporters/interfaces.py +745 -0
- truthound_dashboard/core/reporters/registry.py +1 -10
- truthound_dashboard/core/scheduler.py +165 -0
- truthound_dashboard/core/schema_evolution.py +3 -3
- truthound_dashboard/core/schema_watcher.py +1528 -0
- truthound_dashboard/core/services.py +595 -76
- truthound_dashboard/core/store_manager.py +810 -0
- truthound_dashboard/core/streaming_anomaly.py +169 -4
- truthound_dashboard/core/tiering.py +1309 -0
- truthound_dashboard/core/triggers/evaluators.py +178 -8
- truthound_dashboard/core/truthound_adapter.py +2620 -197
- truthound_dashboard/core/unified_alerts.py +23 -20
- truthound_dashboard/db/__init__.py +8 -0
- truthound_dashboard/db/database.py +8 -2
- truthound_dashboard/db/models.py +944 -25
- truthound_dashboard/db/repository.py +2 -0
- truthound_dashboard/main.py +15 -0
- truthound_dashboard/schemas/__init__.py +177 -16
- truthound_dashboard/schemas/base.py +44 -23
- truthound_dashboard/schemas/collaboration.py +19 -6
- truthound_dashboard/schemas/cross_alerts.py +19 -3
- truthound_dashboard/schemas/drift.py +61 -55
- truthound_dashboard/schemas/drift_monitor.py +67 -23
- truthound_dashboard/schemas/enterprise_sampling.py +653 -0
- truthound_dashboard/schemas/lineage.py +0 -33
- truthound_dashboard/schemas/mask.py +10 -8
- truthound_dashboard/schemas/model_monitoring.py +89 -10
- truthound_dashboard/schemas/notifications_advanced.py +13 -0
- truthound_dashboard/schemas/observability.py +453 -0
- truthound_dashboard/schemas/plugins.py +0 -280
- truthound_dashboard/schemas/profile.py +154 -247
- truthound_dashboard/schemas/quality_reporter.py +403 -0
- truthound_dashboard/schemas/reports.py +2 -2
- truthound_dashboard/schemas/rule_suggestion.py +8 -1
- truthound_dashboard/schemas/scan.py +4 -24
- truthound_dashboard/schemas/schedule.py +11 -3
- truthound_dashboard/schemas/schema_watcher.py +727 -0
- truthound_dashboard/schemas/source.py +17 -2
- truthound_dashboard/schemas/tiering.py +822 -0
- truthound_dashboard/schemas/triggers.py +16 -0
- truthound_dashboard/schemas/unified_alerts.py +7 -0
- truthound_dashboard/schemas/validation.py +0 -13
- truthound_dashboard/schemas/validators/base.py +41 -21
- truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
- truthound_dashboard/schemas/validators/localization_validators.py +273 -0
- truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
- truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
- truthound_dashboard/schemas/validators/referential_validators.py +312 -0
- truthound_dashboard/schemas/validators/registry.py +93 -8
- truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
- truthound_dashboard/schemas/versioning.py +1 -6
- truthound_dashboard/static/index.html +2 -2
- truthound_dashboard-1.5.1.dist-info/METADATA +312 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/RECORD +149 -148
- truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
- truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
- truthound_dashboard/core/plugins/hooks/manager.py +0 -403
- truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
- truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
- truthound_dashboard/core/reporters/junit_reporter.py +0 -233
- truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
- truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
- truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
- truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
- truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
- truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
- truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
- truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
- truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
- truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
- truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
- truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
- truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
- truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
- truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
- truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
- truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
- truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
- truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
- truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
- truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
- truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
- truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
- truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
- truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
- truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
- truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
- truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
- truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
- truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
- truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
- truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
- truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
- truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
- truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
- truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
- truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
- truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
- truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
- truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
- truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
- truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
- truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
- truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
- truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
- truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
- truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
- truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
- truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
- truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,1672 @@
|
|
|
1
|
+
"""DataSource factory for truthound datasources.
|
|
2
|
+
|
|
3
|
+
This module provides a unified interface to create truthound DataSource objects
|
|
4
|
+
from various backend types (files, SQL databases, cloud warehouses, etc.).
|
|
5
|
+
|
|
6
|
+
The factory pattern allows the dashboard to support multiple data backends
|
|
7
|
+
through the truthound datasources API while maintaining a consistent interface
|
|
8
|
+
for services. The design prioritizes loose coupling with truthound for
|
|
9
|
+
maintainability and testability.
|
|
10
|
+
|
|
11
|
+
Architecture:
|
|
12
|
+
SourceConfig -> DataSourceFactory -> truthound.datasources.*
|
|
13
|
+
|
|
14
|
+
Updated for truthound 2.x API:
|
|
15
|
+
- Uses truthound.datasources.get_datasource() for auto-detection
|
|
16
|
+
- Uses SQLDataSourceConfig for SQL sources
|
|
17
|
+
- Uses DataSourceCapability for feature detection
|
|
18
|
+
|
|
19
|
+
Supported Data Sources:
|
|
20
|
+
- File: CSV, Parquet, JSON, NDJSON, JSONL
|
|
21
|
+
- DataFrame: Polars, Pandas
|
|
22
|
+
- SQL: SQLite, PostgreSQL, MySQL, DuckDB
|
|
23
|
+
- Cloud DW: BigQuery, Snowflake, Redshift, Databricks
|
|
24
|
+
- Enterprise: Oracle, SQL Server
|
|
25
|
+
- NoSQL: MongoDB, Elasticsearch (async)
|
|
26
|
+
- Streaming: Kafka (async)
|
|
27
|
+
|
|
28
|
+
Example:
|
|
29
|
+
factory = DataSourceFactory()
|
|
30
|
+
source = factory.create_from_config(source_config)
|
|
31
|
+
report = th.check(source=source)
|
|
32
|
+
|
|
33
|
+
# Or use the get_datasource convenience function
|
|
34
|
+
from truthound.datasources import get_datasource
|
|
35
|
+
ds = get_datasource("data.csv") # Auto-detect file type
|
|
36
|
+
ds = get_datasource("postgresql://user:pass@localhost/db", table="users")
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
from __future__ import annotations
|
|
40
|
+
|
|
41
|
+
import logging
|
|
42
|
+
from abc import ABC, abstractmethod
|
|
43
|
+
from dataclasses import dataclass, field
|
|
44
|
+
from enum import Enum
|
|
45
|
+
from pathlib import Path
|
|
46
|
+
from typing import Any, Protocol, runtime_checkable
|
|
47
|
+
|
|
48
|
+
logger = logging.getLogger(__name__)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# =============================================================================
|
|
52
|
+
# Source Type Enumeration
|
|
53
|
+
# =============================================================================
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class SourceType(str, Enum):
|
|
57
|
+
"""Supported data source types."""
|
|
58
|
+
|
|
59
|
+
# File-based
|
|
60
|
+
FILE = "file"
|
|
61
|
+
CSV = "csv"
|
|
62
|
+
PARQUET = "parquet"
|
|
63
|
+
JSON = "json"
|
|
64
|
+
NDJSON = "ndjson"
|
|
65
|
+
JSONL = "jsonl"
|
|
66
|
+
|
|
67
|
+
# DataFrame
|
|
68
|
+
POLARS = "polars"
|
|
69
|
+
PANDAS = "pandas"
|
|
70
|
+
|
|
71
|
+
# Core SQL
|
|
72
|
+
SQLITE = "sqlite"
|
|
73
|
+
POSTGRESQL = "postgresql"
|
|
74
|
+
MYSQL = "mysql"
|
|
75
|
+
DUCKDB = "duckdb"
|
|
76
|
+
|
|
77
|
+
# Cloud Data Warehouses
|
|
78
|
+
BIGQUERY = "bigquery"
|
|
79
|
+
SNOWFLAKE = "snowflake"
|
|
80
|
+
REDSHIFT = "redshift"
|
|
81
|
+
DATABRICKS = "databricks"
|
|
82
|
+
|
|
83
|
+
# Enterprise
|
|
84
|
+
ORACLE = "oracle"
|
|
85
|
+
SQLSERVER = "sqlserver"
|
|
86
|
+
|
|
87
|
+
# NoSQL (async)
|
|
88
|
+
MONGODB = "mongodb"
|
|
89
|
+
ELASTICSEARCH = "elasticsearch"
|
|
90
|
+
|
|
91
|
+
# Streaming (async)
|
|
92
|
+
KAFKA = "kafka"
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def is_file_type(cls, source_type: str) -> bool:
|
|
96
|
+
"""Check if source type is file-based."""
|
|
97
|
+
file_types = {
|
|
98
|
+
cls.FILE,
|
|
99
|
+
cls.CSV,
|
|
100
|
+
cls.PARQUET,
|
|
101
|
+
cls.JSON,
|
|
102
|
+
cls.NDJSON,
|
|
103
|
+
cls.JSONL,
|
|
104
|
+
}
|
|
105
|
+
try:
|
|
106
|
+
return cls(source_type) in file_types
|
|
107
|
+
except ValueError:
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def is_sql_type(cls, source_type: str) -> bool:
|
|
112
|
+
"""Check if source type is SQL-based."""
|
|
113
|
+
sql_types = {
|
|
114
|
+
cls.SQLITE,
|
|
115
|
+
cls.POSTGRESQL,
|
|
116
|
+
cls.MYSQL,
|
|
117
|
+
cls.DUCKDB,
|
|
118
|
+
cls.BIGQUERY,
|
|
119
|
+
cls.SNOWFLAKE,
|
|
120
|
+
cls.REDSHIFT,
|
|
121
|
+
cls.DATABRICKS,
|
|
122
|
+
cls.ORACLE,
|
|
123
|
+
cls.SQLSERVER,
|
|
124
|
+
}
|
|
125
|
+
try:
|
|
126
|
+
return cls(source_type) in sql_types
|
|
127
|
+
except ValueError:
|
|
128
|
+
return False
|
|
129
|
+
|
|
130
|
+
@classmethod
|
|
131
|
+
def is_async_type(cls, source_type: str) -> bool:
|
|
132
|
+
"""Check if source type requires async operations."""
|
|
133
|
+
async_types = {
|
|
134
|
+
cls.MONGODB,
|
|
135
|
+
cls.ELASTICSEARCH,
|
|
136
|
+
cls.KAFKA,
|
|
137
|
+
}
|
|
138
|
+
try:
|
|
139
|
+
return cls(source_type) in async_types
|
|
140
|
+
except ValueError:
|
|
141
|
+
return False
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# =============================================================================
|
|
145
|
+
# Source Configuration
|
|
146
|
+
# =============================================================================
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@dataclass
|
|
150
|
+
class SourceConfig:
|
|
151
|
+
"""Configuration for creating a data source.
|
|
152
|
+
|
|
153
|
+
This dataclass holds all possible configuration options for any
|
|
154
|
+
data source type. Only relevant fields are used based on source_type.
|
|
155
|
+
|
|
156
|
+
Attributes:
|
|
157
|
+
source_type: Type of data source (file, postgresql, etc.)
|
|
158
|
+
name: Human-readable name for the source.
|
|
159
|
+
|
|
160
|
+
# File-based options
|
|
161
|
+
path: File path for file-based sources.
|
|
162
|
+
|
|
163
|
+
# SQL options
|
|
164
|
+
table: Table name for SQL sources.
|
|
165
|
+
query: Custom SQL query (alternative to table).
|
|
166
|
+
host: Database host.
|
|
167
|
+
port: Database port.
|
|
168
|
+
database: Database name.
|
|
169
|
+
schema_name: Database schema (e.g., "public" for PostgreSQL).
|
|
170
|
+
user: Database username.
|
|
171
|
+
password: Database password.
|
|
172
|
+
connection_string: Full connection string (alternative to individual params).
|
|
173
|
+
|
|
174
|
+
# Cloud DW specific
|
|
175
|
+
project: GCP project ID (BigQuery).
|
|
176
|
+
dataset: BigQuery dataset name.
|
|
177
|
+
account: Snowflake account identifier.
|
|
178
|
+
warehouse: Snowflake warehouse name.
|
|
179
|
+
credentials_path: Path to credentials file (BigQuery).
|
|
180
|
+
access_token: Access token (Databricks).
|
|
181
|
+
http_path: HTTP path for SQL warehouse (Databricks).
|
|
182
|
+
catalog: Unity Catalog name (Databricks).
|
|
183
|
+
cluster_identifier: Redshift cluster ID.
|
|
184
|
+
iam_auth: Use IAM authentication (Redshift).
|
|
185
|
+
|
|
186
|
+
# Enterprise DB specific
|
|
187
|
+
service_name: Oracle service name.
|
|
188
|
+
sid: Oracle SID.
|
|
189
|
+
trusted_connection: Windows auth (SQL Server).
|
|
190
|
+
|
|
191
|
+
# NoSQL specific
|
|
192
|
+
collection: MongoDB collection name.
|
|
193
|
+
index: Elasticsearch index name.
|
|
194
|
+
|
|
195
|
+
# Streaming specific
|
|
196
|
+
topic: Kafka topic name.
|
|
197
|
+
bootstrap_servers: Kafka bootstrap servers.
|
|
198
|
+
group_id: Kafka consumer group ID.
|
|
199
|
+
|
|
200
|
+
# General options
|
|
201
|
+
pool_size: Connection pool size.
|
|
202
|
+
query_timeout: Query timeout in seconds.
|
|
203
|
+
max_rows: Maximum rows to fetch.
|
|
204
|
+
sample_size: Sample size for large datasets.
|
|
205
|
+
"""
|
|
206
|
+
|
|
207
|
+
source_type: str
|
|
208
|
+
name: str | None = None
|
|
209
|
+
|
|
210
|
+
# File-based
|
|
211
|
+
path: str | None = None
|
|
212
|
+
|
|
213
|
+
# SQL common
|
|
214
|
+
table: str | None = None
|
|
215
|
+
query: str | None = None
|
|
216
|
+
host: str | None = None
|
|
217
|
+
port: int | None = None
|
|
218
|
+
database: str | None = None
|
|
219
|
+
schema_name: str | None = None
|
|
220
|
+
user: str | None = None
|
|
221
|
+
password: str | None = None
|
|
222
|
+
connection_string: str | None = None
|
|
223
|
+
|
|
224
|
+
# Cloud DW
|
|
225
|
+
project: str | None = None
|
|
226
|
+
dataset: str | None = None
|
|
227
|
+
account: str | None = None
|
|
228
|
+
warehouse: str | None = None
|
|
229
|
+
credentials_path: str | None = None
|
|
230
|
+
access_token: str | None = None
|
|
231
|
+
http_path: str | None = None
|
|
232
|
+
catalog: str | None = None
|
|
233
|
+
cluster_identifier: str | None = None
|
|
234
|
+
iam_auth: bool = False
|
|
235
|
+
|
|
236
|
+
# Enterprise
|
|
237
|
+
service_name: str | None = None
|
|
238
|
+
sid: str | None = None
|
|
239
|
+
trusted_connection: bool = False
|
|
240
|
+
|
|
241
|
+
# NoSQL
|
|
242
|
+
collection: str | None = None
|
|
243
|
+
index: str | None = None
|
|
244
|
+
|
|
245
|
+
# Streaming
|
|
246
|
+
topic: str | None = None
|
|
247
|
+
bootstrap_servers: str | None = None
|
|
248
|
+
group_id: str | None = None
|
|
249
|
+
max_messages: int | None = None
|
|
250
|
+
|
|
251
|
+
# General
|
|
252
|
+
pool_size: int | None = None
|
|
253
|
+
query_timeout: float | None = None
|
|
254
|
+
max_rows: int | None = None
|
|
255
|
+
sample_size: int | None = None
|
|
256
|
+
|
|
257
|
+
# Extra options (for extensibility)
|
|
258
|
+
extra: dict[str, Any] = field(default_factory=dict)
|
|
259
|
+
|
|
260
|
+
@classmethod
|
|
261
|
+
def from_dict(cls, data: dict[str, Any]) -> "SourceConfig":
|
|
262
|
+
"""Create SourceConfig from dictionary.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
data: Dictionary with source configuration.
|
|
266
|
+
Must include 'type' or 'source_type' key.
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
SourceConfig instance.
|
|
270
|
+
|
|
271
|
+
Raises:
|
|
272
|
+
ValueError: If source_type is missing.
|
|
273
|
+
"""
|
|
274
|
+
# Handle 'type' as alias for 'source_type'
|
|
275
|
+
source_type = data.get("source_type") or data.get("type")
|
|
276
|
+
if not source_type:
|
|
277
|
+
raise ValueError("source_type or type is required")
|
|
278
|
+
|
|
279
|
+
# Extract known fields
|
|
280
|
+
known_fields = {
|
|
281
|
+
"name",
|
|
282
|
+
"path",
|
|
283
|
+
"table",
|
|
284
|
+
"query",
|
|
285
|
+
"host",
|
|
286
|
+
"port",
|
|
287
|
+
"database",
|
|
288
|
+
"schema_name",
|
|
289
|
+
"user",
|
|
290
|
+
"password",
|
|
291
|
+
"connection_string",
|
|
292
|
+
"project",
|
|
293
|
+
"dataset",
|
|
294
|
+
"account",
|
|
295
|
+
"warehouse",
|
|
296
|
+
"credentials_path",
|
|
297
|
+
"access_token",
|
|
298
|
+
"http_path",
|
|
299
|
+
"catalog",
|
|
300
|
+
"cluster_identifier",
|
|
301
|
+
"iam_auth",
|
|
302
|
+
"service_name",
|
|
303
|
+
"sid",
|
|
304
|
+
"trusted_connection",
|
|
305
|
+
"collection",
|
|
306
|
+
"index",
|
|
307
|
+
"topic",
|
|
308
|
+
"bootstrap_servers",
|
|
309
|
+
"group_id",
|
|
310
|
+
"max_messages",
|
|
311
|
+
"pool_size",
|
|
312
|
+
"query_timeout",
|
|
313
|
+
"max_rows",
|
|
314
|
+
"sample_size",
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
kwargs: dict[str, Any] = {"source_type": source_type}
|
|
318
|
+
extra: dict[str, Any] = {}
|
|
319
|
+
|
|
320
|
+
for key, value in data.items():
|
|
321
|
+
if key in ("type", "source_type"):
|
|
322
|
+
continue
|
|
323
|
+
if key in known_fields:
|
|
324
|
+
kwargs[key] = value
|
|
325
|
+
else:
|
|
326
|
+
extra[key] = value
|
|
327
|
+
|
|
328
|
+
if extra:
|
|
329
|
+
kwargs["extra"] = extra
|
|
330
|
+
|
|
331
|
+
return cls(**kwargs)
|
|
332
|
+
|
|
333
|
+
def to_dict(self) -> dict[str, Any]:
|
|
334
|
+
"""Convert to dictionary representation."""
|
|
335
|
+
result: dict[str, Any] = {"type": self.source_type}
|
|
336
|
+
|
|
337
|
+
# Add non-None fields
|
|
338
|
+
for key in [
|
|
339
|
+
"name",
|
|
340
|
+
"path",
|
|
341
|
+
"table",
|
|
342
|
+
"query",
|
|
343
|
+
"host",
|
|
344
|
+
"port",
|
|
345
|
+
"database",
|
|
346
|
+
"schema_name",
|
|
347
|
+
"user",
|
|
348
|
+
"password",
|
|
349
|
+
"connection_string",
|
|
350
|
+
"project",
|
|
351
|
+
"dataset",
|
|
352
|
+
"account",
|
|
353
|
+
"warehouse",
|
|
354
|
+
"credentials_path",
|
|
355
|
+
"access_token",
|
|
356
|
+
"http_path",
|
|
357
|
+
"catalog",
|
|
358
|
+
"cluster_identifier",
|
|
359
|
+
"service_name",
|
|
360
|
+
"sid",
|
|
361
|
+
"collection",
|
|
362
|
+
"index",
|
|
363
|
+
"topic",
|
|
364
|
+
"bootstrap_servers",
|
|
365
|
+
"group_id",
|
|
366
|
+
"max_messages",
|
|
367
|
+
"pool_size",
|
|
368
|
+
"query_timeout",
|
|
369
|
+
"max_rows",
|
|
370
|
+
"sample_size",
|
|
371
|
+
]:
|
|
372
|
+
value = getattr(self, key)
|
|
373
|
+
if value is not None:
|
|
374
|
+
result[key] = value
|
|
375
|
+
|
|
376
|
+
# Add boolean flags if True
|
|
377
|
+
if self.iam_auth:
|
|
378
|
+
result["iam_auth"] = True
|
|
379
|
+
if self.trusted_connection:
|
|
380
|
+
result["trusted_connection"] = True
|
|
381
|
+
|
|
382
|
+
# Add extra
|
|
383
|
+
if self.extra:
|
|
384
|
+
result.update(self.extra)
|
|
385
|
+
|
|
386
|
+
return result
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
# =============================================================================
|
|
390
|
+
# DataSource Protocol (for loose coupling)
|
|
391
|
+
# =============================================================================
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
@runtime_checkable
|
|
395
|
+
class DataSourceProtocol(Protocol):
|
|
396
|
+
"""Protocol for truthound DataSource objects.
|
|
397
|
+
|
|
398
|
+
This protocol defines the interface that all DataSource implementations
|
|
399
|
+
must satisfy. It's used for type checking and loose coupling.
|
|
400
|
+
"""
|
|
401
|
+
|
|
402
|
+
@property
|
|
403
|
+
def name(self) -> str:
|
|
404
|
+
"""Get source name."""
|
|
405
|
+
...
|
|
406
|
+
|
|
407
|
+
@property
|
|
408
|
+
def schema(self) -> dict[str, Any]:
|
|
409
|
+
"""Get schema dictionary."""
|
|
410
|
+
...
|
|
411
|
+
|
|
412
|
+
@property
|
|
413
|
+
def columns(self) -> list[str]:
|
|
414
|
+
"""Get column names."""
|
|
415
|
+
...
|
|
416
|
+
|
|
417
|
+
@property
|
|
418
|
+
def row_count(self) -> int | None:
|
|
419
|
+
"""Get row count if available."""
|
|
420
|
+
...
|
|
421
|
+
|
|
422
|
+
def to_polars_lazyframe(self) -> Any:
|
|
423
|
+
"""Convert to Polars LazyFrame."""
|
|
424
|
+
...
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
# =============================================================================
|
|
428
|
+
# Backend Strategy Pattern (for extensibility)
|
|
429
|
+
# =============================================================================
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
class DataSourceCreator(ABC):
|
|
433
|
+
"""Abstract base class for data source creators.
|
|
434
|
+
|
|
435
|
+
Each data source type has its own creator class that handles
|
|
436
|
+
the specific logic for creating that type of source.
|
|
437
|
+
"""
|
|
438
|
+
|
|
439
|
+
@abstractmethod
|
|
440
|
+
def can_create(self, config: SourceConfig) -> bool:
|
|
441
|
+
"""Check if this creator can handle the given config."""
|
|
442
|
+
...
|
|
443
|
+
|
|
444
|
+
@abstractmethod
|
|
445
|
+
def create(self, config: SourceConfig) -> Any:
|
|
446
|
+
"""Create the data source from config."""
|
|
447
|
+
...
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
class FileSourceCreator(DataSourceCreator):
|
|
451
|
+
"""Creator for file-based data sources.
|
|
452
|
+
|
|
453
|
+
Updated for truthound 2.x API:
|
|
454
|
+
- Uses truthound.datasources.polars_source.FileDataSource
|
|
455
|
+
- Supports FileDataSourceConfig for advanced options
|
|
456
|
+
"""
|
|
457
|
+
|
|
458
|
+
def can_create(self, config: SourceConfig) -> bool:
|
|
459
|
+
return SourceType.is_file_type(config.source_type)
|
|
460
|
+
|
|
461
|
+
def create(self, config: SourceConfig) -> Any:
|
|
462
|
+
"""Create file-based data source using truthound's FileDataSource."""
|
|
463
|
+
if not config.path:
|
|
464
|
+
raise ValueError("path is required for file sources")
|
|
465
|
+
|
|
466
|
+
path = Path(config.path)
|
|
467
|
+
if not path.exists():
|
|
468
|
+
raise FileNotFoundError(f"File not found: {config.path}")
|
|
469
|
+
|
|
470
|
+
try:
|
|
471
|
+
# Try new truthound 2.x API first
|
|
472
|
+
from truthound.datasources.polars_source import (
|
|
473
|
+
FileDataSource,
|
|
474
|
+
FileDataSourceConfig,
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
# Build config if extra options provided
|
|
478
|
+
file_config = None
|
|
479
|
+
if config.extra:
|
|
480
|
+
file_config = FileDataSourceConfig(
|
|
481
|
+
infer_schema_length=config.extra.get("infer_schema_length", 10000),
|
|
482
|
+
ignore_errors=config.extra.get("ignore_errors", False),
|
|
483
|
+
encoding=config.extra.get("encoding", "utf8"),
|
|
484
|
+
separator=config.extra.get("separator", ","),
|
|
485
|
+
)
|
|
486
|
+
return FileDataSource(str(path), config=file_config)
|
|
487
|
+
|
|
488
|
+
return FileDataSource(str(path))
|
|
489
|
+
|
|
490
|
+
except ImportError:
|
|
491
|
+
try:
|
|
492
|
+
# Fallback: Try older truthound.datasources.FileDataSource
|
|
493
|
+
from truthound.datasources import FileDataSource
|
|
494
|
+
return FileDataSource(str(path))
|
|
495
|
+
except ImportError:
|
|
496
|
+
# Final fallback: return path string (backward compatible)
|
|
497
|
+
# truthound core functions also accept path strings
|
|
498
|
+
logger.debug("truthound.datasources not available, using path string")
|
|
499
|
+
return str(path)
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
class SQLiteSourceCreator(DataSourceCreator):
|
|
503
|
+
"""Creator for SQLite data sources.
|
|
504
|
+
|
|
505
|
+
Updated for truthound 2.x API:
|
|
506
|
+
- Uses truthound.datasources.sql.sqlite.SQLiteDataSource
|
|
507
|
+
- Supports SQLiteDataSourceConfig for advanced options
|
|
508
|
+
"""
|
|
509
|
+
|
|
510
|
+
def can_create(self, config: SourceConfig) -> bool:
|
|
511
|
+
return config.source_type.lower() == SourceType.SQLITE
|
|
512
|
+
|
|
513
|
+
def create(self, config: SourceConfig) -> Any:
|
|
514
|
+
if not config.database and not config.path:
|
|
515
|
+
raise ValueError("database or path is required for SQLite")
|
|
516
|
+
|
|
517
|
+
database = config.database or config.path
|
|
518
|
+
|
|
519
|
+
try:
|
|
520
|
+
# Try new truthound 2.x API with explicit import path
|
|
521
|
+
from truthound.datasources.sql.sqlite import (
|
|
522
|
+
SQLiteDataSource,
|
|
523
|
+
SQLiteDataSourceConfig,
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
# Build config if extra options provided
|
|
527
|
+
sqlite_config = None
|
|
528
|
+
if config.extra or config.query_timeout:
|
|
529
|
+
sqlite_config = SQLiteDataSourceConfig(
|
|
530
|
+
database=database,
|
|
531
|
+
timeout=config.query_timeout or 5.0,
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
if config.table:
|
|
535
|
+
if sqlite_config:
|
|
536
|
+
return SQLiteDataSource(table=config.table, database=database, config=sqlite_config)
|
|
537
|
+
return SQLiteDataSource(table=config.table, database=database)
|
|
538
|
+
elif config.query:
|
|
539
|
+
if sqlite_config:
|
|
540
|
+
return SQLiteDataSource(query=config.query, database=database, config=sqlite_config)
|
|
541
|
+
return SQLiteDataSource(query=config.query, database=database)
|
|
542
|
+
else:
|
|
543
|
+
raise ValueError("table or query is required for SQLite")
|
|
544
|
+
|
|
545
|
+
except ImportError:
|
|
546
|
+
# Fallback: Try older import path
|
|
547
|
+
from truthound.datasources.sql import SQLiteDataSource
|
|
548
|
+
|
|
549
|
+
if config.table:
|
|
550
|
+
return SQLiteDataSource(table=config.table, database=database)
|
|
551
|
+
elif config.query:
|
|
552
|
+
return SQLiteDataSource(query=config.query, database=database)
|
|
553
|
+
else:
|
|
554
|
+
raise ValueError("table or query is required for SQLite")
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
class DuckDBSourceCreator(DataSourceCreator):
|
|
558
|
+
"""Creator for DuckDB data sources.
|
|
559
|
+
|
|
560
|
+
Note: DuckDB support depends on truthound's optional DuckDB backend.
|
|
561
|
+
If not available, falls back to direct Polars reading.
|
|
562
|
+
"""
|
|
563
|
+
|
|
564
|
+
def can_create(self, config: SourceConfig) -> bool:
|
|
565
|
+
return config.source_type.lower() == SourceType.DUCKDB
|
|
566
|
+
|
|
567
|
+
def create(self, config: SourceConfig) -> Any:
|
|
568
|
+
if not config.database and not config.path:
|
|
569
|
+
raise ValueError("database or path is required for DuckDB")
|
|
570
|
+
|
|
571
|
+
database = config.database or config.path
|
|
572
|
+
|
|
573
|
+
# Try truthound's DuckDB support first
|
|
574
|
+
try:
|
|
575
|
+
from truthound.datasources.sql import DuckDBDataSource
|
|
576
|
+
|
|
577
|
+
if config.table:
|
|
578
|
+
return DuckDBDataSource(table=config.table, database=database)
|
|
579
|
+
elif config.query:
|
|
580
|
+
return DuckDBDataSource(query=config.query, database=database)
|
|
581
|
+
else:
|
|
582
|
+
raise ValueError("table or query is required for DuckDB")
|
|
583
|
+
|
|
584
|
+
except ImportError:
|
|
585
|
+
# Fallback: Use Polars to read from DuckDB directly
|
|
586
|
+
logger.debug("truthound DuckDB not available, using Polars fallback")
|
|
587
|
+
try:
|
|
588
|
+
import polars as pl
|
|
589
|
+
|
|
590
|
+
if not config.table and not config.query:
|
|
591
|
+
raise ValueError("table or query is required for DuckDB")
|
|
592
|
+
|
|
593
|
+
query = config.query or f"SELECT * FROM {config.table}"
|
|
594
|
+
# Use read_database_uri for DuckDB connections
|
|
595
|
+
try:
|
|
596
|
+
df = pl.read_database_uri(query, f"duckdb:///{database}")
|
|
597
|
+
except Exception as read_err:
|
|
598
|
+
raise ImportError(
|
|
599
|
+
f"Failed to read from DuckDB: {read_err}. "
|
|
600
|
+
"Install DuckDB connector with: pip install duckdb connectorx"
|
|
601
|
+
) from read_err
|
|
602
|
+
|
|
603
|
+
# Return as PolarsDataSource for consistency
|
|
604
|
+
from truthound.datasources import PolarsDataSource
|
|
605
|
+
return PolarsDataSource(df, name=config.name or database)
|
|
606
|
+
|
|
607
|
+
except ImportError as ie:
|
|
608
|
+
raise ImportError(
|
|
609
|
+
f"DuckDB support requires additional packages. {ie}"
|
|
610
|
+
) from ie
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
class PostgreSQLSourceCreator(DataSourceCreator):
|
|
614
|
+
"""Creator for PostgreSQL data sources.
|
|
615
|
+
|
|
616
|
+
Updated for truthound 2.x API:
|
|
617
|
+
- Uses truthound.datasources.sql.postgresql.PostgreSQLDataSource
|
|
618
|
+
- Supports PostgreSQLDataSourceConfig for advanced options including:
|
|
619
|
+
- sslmode, application_name, pool_size, query_timeout
|
|
620
|
+
"""
|
|
621
|
+
|
|
622
|
+
def can_create(self, config: SourceConfig) -> bool:
|
|
623
|
+
return config.source_type.lower() == SourceType.POSTGRESQL
|
|
624
|
+
|
|
625
|
+
def create(self, config: SourceConfig) -> Any:
|
|
626
|
+
try:
|
|
627
|
+
# Try new truthound 2.x API with explicit import path
|
|
628
|
+
from truthound.datasources.sql.postgresql import (
|
|
629
|
+
PostgreSQLDataSource,
|
|
630
|
+
PostgreSQLDataSourceConfig,
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
# Use connection string if provided
|
|
634
|
+
if config.connection_string:
|
|
635
|
+
if not config.table and not config.query:
|
|
636
|
+
raise ValueError("table or query is required")
|
|
637
|
+
return PostgreSQLDataSource.from_connection_string(
|
|
638
|
+
connection_string=config.connection_string,
|
|
639
|
+
table=config.table,
|
|
640
|
+
query=config.query,
|
|
641
|
+
schema_name=config.schema_name,
|
|
642
|
+
)
|
|
643
|
+
|
|
644
|
+
# Use individual parameters
|
|
645
|
+
if not config.host or not config.database:
|
|
646
|
+
raise ValueError("host and database are required for PostgreSQL")
|
|
647
|
+
|
|
648
|
+
# Build PostgreSQLDataSourceConfig for advanced options
|
|
649
|
+
pg_config = PostgreSQLDataSourceConfig(
|
|
650
|
+
host=config.host,
|
|
651
|
+
port=config.port or 5432,
|
|
652
|
+
database=config.database,
|
|
653
|
+
user=config.user or "postgres",
|
|
654
|
+
password=config.password,
|
|
655
|
+
sslmode=config.extra.get("sslmode", "prefer") if config.extra else "prefer",
|
|
656
|
+
application_name=config.extra.get("application_name", "truthound-dashboard") if config.extra else "truthound-dashboard",
|
|
657
|
+
schema_name=config.schema_name or "public",
|
|
658
|
+
pool_size=config.pool_size or 10,
|
|
659
|
+
query_timeout=config.query_timeout or 300.0,
|
|
660
|
+
)
|
|
661
|
+
|
|
662
|
+
if config.table:
|
|
663
|
+
return PostgreSQLDataSource(table=config.table, config=pg_config)
|
|
664
|
+
elif config.query:
|
|
665
|
+
return PostgreSQLDataSource(query=config.query, config=pg_config)
|
|
666
|
+
else:
|
|
667
|
+
raise ValueError("table or query is required")
|
|
668
|
+
|
|
669
|
+
except ImportError:
|
|
670
|
+
# Fallback: Try older import path
|
|
671
|
+
from truthound.datasources.sql import PostgreSQLDataSource
|
|
672
|
+
|
|
673
|
+
if config.connection_string:
|
|
674
|
+
if not config.table and not config.query:
|
|
675
|
+
raise ValueError("table or query is required")
|
|
676
|
+
return PostgreSQLDataSource.from_connection_string(
|
|
677
|
+
connection_string=config.connection_string,
|
|
678
|
+
table=config.table,
|
|
679
|
+
query=config.query,
|
|
680
|
+
schema_name=config.schema_name,
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
if not config.host or not config.database:
|
|
684
|
+
raise ValueError("host and database are required for PostgreSQL")
|
|
685
|
+
|
|
686
|
+
kwargs: dict[str, Any] = {
|
|
687
|
+
"host": config.host,
|
|
688
|
+
"database": config.database,
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
if config.table:
|
|
692
|
+
kwargs["table"] = config.table
|
|
693
|
+
elif config.query:
|
|
694
|
+
kwargs["query"] = config.query
|
|
695
|
+
else:
|
|
696
|
+
raise ValueError("table or query is required")
|
|
697
|
+
|
|
698
|
+
if config.port:
|
|
699
|
+
kwargs["port"] = config.port
|
|
700
|
+
if config.user:
|
|
701
|
+
kwargs["user"] = config.user
|
|
702
|
+
if config.password:
|
|
703
|
+
kwargs["password"] = config.password
|
|
704
|
+
if config.schema_name:
|
|
705
|
+
kwargs["schema_name"] = config.schema_name
|
|
706
|
+
|
|
707
|
+
return PostgreSQLDataSource(**kwargs)
|
|
708
|
+
|
|
709
|
+
|
|
710
|
+
class MySQLSourceCreator(DataSourceCreator):
|
|
711
|
+
"""Creator for MySQL data sources.
|
|
712
|
+
|
|
713
|
+
Updated for truthound 2.x API:
|
|
714
|
+
- Uses truthound.datasources.sql.mysql.MySQLDataSource
|
|
715
|
+
- Supports MySQLDataSourceConfig for advanced options
|
|
716
|
+
"""
|
|
717
|
+
|
|
718
|
+
def can_create(self, config: SourceConfig) -> bool:
|
|
719
|
+
return config.source_type.lower() == SourceType.MYSQL
|
|
720
|
+
|
|
721
|
+
def create(self, config: SourceConfig) -> Any:
|
|
722
|
+
try:
|
|
723
|
+
# Try new truthound 2.x API with explicit import path
|
|
724
|
+
from truthound.datasources.sql.mysql import (
|
|
725
|
+
MySQLDataSource,
|
|
726
|
+
MySQLDataSourceConfig,
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
if config.connection_string:
|
|
730
|
+
return MySQLDataSource.from_connection_string(
|
|
731
|
+
connection_string=config.connection_string,
|
|
732
|
+
table=config.table,
|
|
733
|
+
)
|
|
734
|
+
|
|
735
|
+
if not config.host or not config.database:
|
|
736
|
+
raise ValueError("host and database are required for MySQL")
|
|
737
|
+
|
|
738
|
+
# Build MySQLDataSourceConfig for advanced options
|
|
739
|
+
mysql_config = MySQLDataSourceConfig(
|
|
740
|
+
host=config.host,
|
|
741
|
+
port=config.port or 3306,
|
|
742
|
+
database=config.database,
|
|
743
|
+
user=config.user or "root",
|
|
744
|
+
password=config.password,
|
|
745
|
+
charset=config.extra.get("charset", "utf8mb4") if config.extra else "utf8mb4",
|
|
746
|
+
autocommit=config.extra.get("autocommit", True) if config.extra else True,
|
|
747
|
+
)
|
|
748
|
+
|
|
749
|
+
if config.table:
|
|
750
|
+
return MySQLDataSource(table=config.table, config=mysql_config)
|
|
751
|
+
elif config.query:
|
|
752
|
+
return MySQLDataSource(query=config.query, config=mysql_config)
|
|
753
|
+
else:
|
|
754
|
+
raise ValueError("table or query is required")
|
|
755
|
+
|
|
756
|
+
except ImportError:
|
|
757
|
+
# Fallback: Try older import path
|
|
758
|
+
from truthound.datasources.sql import MySQLDataSource
|
|
759
|
+
|
|
760
|
+
if config.connection_string:
|
|
761
|
+
return MySQLDataSource.from_connection_string(
|
|
762
|
+
connection_string=config.connection_string,
|
|
763
|
+
table=config.table,
|
|
764
|
+
)
|
|
765
|
+
|
|
766
|
+
if not config.host or not config.database:
|
|
767
|
+
raise ValueError("host and database are required for MySQL")
|
|
768
|
+
|
|
769
|
+
kwargs: dict[str, Any] = {
|
|
770
|
+
"host": config.host,
|
|
771
|
+
"database": config.database,
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
if config.table:
|
|
775
|
+
kwargs["table"] = config.table
|
|
776
|
+
elif config.query:
|
|
777
|
+
kwargs["query"] = config.query
|
|
778
|
+
else:
|
|
779
|
+
raise ValueError("table or query is required")
|
|
780
|
+
|
|
781
|
+
if config.port:
|
|
782
|
+
kwargs["port"] = config.port
|
|
783
|
+
if config.user:
|
|
784
|
+
kwargs["user"] = config.user
|
|
785
|
+
if config.password:
|
|
786
|
+
kwargs["password"] = config.password
|
|
787
|
+
|
|
788
|
+
return MySQLDataSource(**kwargs)
|
|
789
|
+
|
|
790
|
+
|
|
791
|
+
class BigQuerySourceCreator(DataSourceCreator):
|
|
792
|
+
"""Creator for BigQuery data sources.
|
|
793
|
+
|
|
794
|
+
Updated for truthound 2.x API:
|
|
795
|
+
- Uses truthound.datasources.sql.bigquery.BigQueryDataSource
|
|
796
|
+
- Supports BigQueryConfig for cost control and advanced options
|
|
797
|
+
"""
|
|
798
|
+
|
|
799
|
+
def can_create(self, config: SourceConfig) -> bool:
|
|
800
|
+
return config.source_type.lower() == SourceType.BIGQUERY
|
|
801
|
+
|
|
802
|
+
def create(self, config: SourceConfig) -> Any:
|
|
803
|
+
if not config.project:
|
|
804
|
+
raise ValueError("project is required for BigQuery")
|
|
805
|
+
|
|
806
|
+
try:
|
|
807
|
+
# Try new truthound 2.x API with explicit import path
|
|
808
|
+
from truthound.datasources.sql.bigquery import (
|
|
809
|
+
BigQueryDataSource,
|
|
810
|
+
BigQueryConfig,
|
|
811
|
+
)
|
|
812
|
+
|
|
813
|
+
# Build BigQueryConfig for advanced options
|
|
814
|
+
bq_config = BigQueryConfig(
|
|
815
|
+
dataset=config.dataset,
|
|
816
|
+
location=config.extra.get("location") if config.extra else None,
|
|
817
|
+
use_legacy_sql=config.extra.get("use_legacy_sql", False) if config.extra else False,
|
|
818
|
+
maximum_bytes_billed=config.extra.get("maximum_bytes_billed") if config.extra else None,
|
|
819
|
+
job_timeout=config.query_timeout or 300,
|
|
820
|
+
)
|
|
821
|
+
|
|
822
|
+
if config.table:
|
|
823
|
+
return BigQueryDataSource(
|
|
824
|
+
table=config.table,
|
|
825
|
+
project=config.project,
|
|
826
|
+
credentials_path=config.credentials_path,
|
|
827
|
+
config=bq_config,
|
|
828
|
+
)
|
|
829
|
+
elif config.query:
|
|
830
|
+
return BigQueryDataSource(
|
|
831
|
+
query=config.query,
|
|
832
|
+
project=config.project,
|
|
833
|
+
credentials_path=config.credentials_path,
|
|
834
|
+
config=bq_config,
|
|
835
|
+
)
|
|
836
|
+
else:
|
|
837
|
+
raise ValueError("table or query is required for BigQuery")
|
|
838
|
+
|
|
839
|
+
except ImportError:
|
|
840
|
+
# Fallback: Try older import path
|
|
841
|
+
from truthound.datasources.sql import BigQueryDataSource
|
|
842
|
+
|
|
843
|
+
kwargs: dict[str, Any] = {"project": config.project}
|
|
844
|
+
|
|
845
|
+
if config.dataset:
|
|
846
|
+
kwargs["dataset"] = config.dataset
|
|
847
|
+
if config.table:
|
|
848
|
+
kwargs["table"] = config.table
|
|
849
|
+
elif config.query:
|
|
850
|
+
kwargs["query"] = config.query
|
|
851
|
+
if config.credentials_path:
|
|
852
|
+
kwargs["credentials_path"] = config.credentials_path
|
|
853
|
+
|
|
854
|
+
return BigQueryDataSource(**kwargs)
|
|
855
|
+
|
|
856
|
+
|
|
857
|
+
class SnowflakeSourceCreator(DataSourceCreator):
|
|
858
|
+
"""Creator for Snowflake data sources.
|
|
859
|
+
|
|
860
|
+
Updated for truthound 2.x API:
|
|
861
|
+
- Uses truthound.datasources.sql.snowflake.SnowflakeDataSource
|
|
862
|
+
- Supports SnowflakeConfig for advanced auth options
|
|
863
|
+
"""
|
|
864
|
+
|
|
865
|
+
def can_create(self, config: SourceConfig) -> bool:
|
|
866
|
+
return config.source_type.lower() == SourceType.SNOWFLAKE
|
|
867
|
+
|
|
868
|
+
def create(self, config: SourceConfig) -> Any:
|
|
869
|
+
if not config.account:
|
|
870
|
+
raise ValueError("account is required for Snowflake")
|
|
871
|
+
|
|
872
|
+
try:
|
|
873
|
+
# Try new truthound 2.x API with explicit import path
|
|
874
|
+
from truthound.datasources.sql.snowflake import (
|
|
875
|
+
SnowflakeDataSource,
|
|
876
|
+
SnowflakeConfig,
|
|
877
|
+
)
|
|
878
|
+
|
|
879
|
+
# Build SnowflakeConfig for advanced options
|
|
880
|
+
sf_config = SnowflakeConfig(
|
|
881
|
+
account=config.account,
|
|
882
|
+
user=config.user,
|
|
883
|
+
password=config.password,
|
|
884
|
+
database=config.database,
|
|
885
|
+
schema_name=config.schema_name or "PUBLIC",
|
|
886
|
+
warehouse=config.warehouse,
|
|
887
|
+
role=config.extra.get("role") if config.extra else None,
|
|
888
|
+
authenticator=config.extra.get("authenticator", "snowflake") if config.extra else "snowflake",
|
|
889
|
+
private_key_path=config.extra.get("private_key_path") if config.extra else None,
|
|
890
|
+
private_key_passphrase=config.extra.get("private_key_passphrase") if config.extra else None,
|
|
891
|
+
client_session_keep_alive=config.extra.get("client_session_keep_alive", True) if config.extra else True,
|
|
892
|
+
)
|
|
893
|
+
|
|
894
|
+
if config.table:
|
|
895
|
+
return SnowflakeDataSource(table=config.table, config=sf_config)
|
|
896
|
+
elif config.query:
|
|
897
|
+
return SnowflakeDataSource(query=config.query, config=sf_config)
|
|
898
|
+
else:
|
|
899
|
+
raise ValueError("table or query is required")
|
|
900
|
+
|
|
901
|
+
except ImportError:
|
|
902
|
+
# Fallback: Try older import path
|
|
903
|
+
from truthound.datasources.sql import SnowflakeDataSource
|
|
904
|
+
|
|
905
|
+
kwargs: dict[str, Any] = {"account": config.account}
|
|
906
|
+
|
|
907
|
+
if config.table:
|
|
908
|
+
kwargs["table"] = config.table
|
|
909
|
+
elif config.query:
|
|
910
|
+
kwargs["query"] = config.query
|
|
911
|
+
else:
|
|
912
|
+
raise ValueError("table or query is required")
|
|
913
|
+
|
|
914
|
+
if config.database:
|
|
915
|
+
kwargs["database"] = config.database
|
|
916
|
+
if config.schema_name:
|
|
917
|
+
kwargs["schema"] = config.schema_name
|
|
918
|
+
if config.warehouse:
|
|
919
|
+
kwargs["warehouse"] = config.warehouse
|
|
920
|
+
if config.user:
|
|
921
|
+
kwargs["user"] = config.user
|
|
922
|
+
if config.password:
|
|
923
|
+
kwargs["password"] = config.password
|
|
924
|
+
|
|
925
|
+
return SnowflakeDataSource(**kwargs)
|
|
926
|
+
|
|
927
|
+
|
|
928
|
+
class RedshiftSourceCreator(DataSourceCreator):
|
|
929
|
+
"""Creator for Redshift data sources."""
|
|
930
|
+
|
|
931
|
+
def can_create(self, config: SourceConfig) -> bool:
|
|
932
|
+
return config.source_type.lower() == SourceType.REDSHIFT
|
|
933
|
+
|
|
934
|
+
def create(self, config: SourceConfig) -> Any:
|
|
935
|
+
from truthound.datasources.sql import RedshiftDataSource
|
|
936
|
+
|
|
937
|
+
if not config.host or not config.database:
|
|
938
|
+
raise ValueError("host and database are required for Redshift")
|
|
939
|
+
|
|
940
|
+
kwargs: dict[str, Any] = {
|
|
941
|
+
"host": config.host,
|
|
942
|
+
"database": config.database,
|
|
943
|
+
}
|
|
944
|
+
|
|
945
|
+
if config.table:
|
|
946
|
+
kwargs["table"] = config.table
|
|
947
|
+
elif config.query:
|
|
948
|
+
kwargs["query"] = config.query
|
|
949
|
+
else:
|
|
950
|
+
raise ValueError("table or query is required")
|
|
951
|
+
|
|
952
|
+
if config.port:
|
|
953
|
+
kwargs["port"] = config.port
|
|
954
|
+
if config.user:
|
|
955
|
+
kwargs["user"] = config.user
|
|
956
|
+
if config.password:
|
|
957
|
+
kwargs["password"] = config.password
|
|
958
|
+
if config.schema_name:
|
|
959
|
+
kwargs["schema"] = config.schema_name
|
|
960
|
+
if config.cluster_identifier:
|
|
961
|
+
kwargs["cluster_identifier"] = config.cluster_identifier
|
|
962
|
+
if config.iam_auth:
|
|
963
|
+
kwargs["iam_auth"] = True
|
|
964
|
+
|
|
965
|
+
return RedshiftDataSource(**kwargs)
|
|
966
|
+
|
|
967
|
+
|
|
968
|
+
class DatabricksSourceCreator(DataSourceCreator):
|
|
969
|
+
"""Creator for Databricks data sources.
|
|
970
|
+
|
|
971
|
+
Updated for truthound 2.x API:
|
|
972
|
+
- Uses truthound.datasources.sql.databricks.DatabricksDataSource
|
|
973
|
+
- Supports DatabricksConfig for Unity Catalog and OAuth
|
|
974
|
+
"""
|
|
975
|
+
|
|
976
|
+
def can_create(self, config: SourceConfig) -> bool:
|
|
977
|
+
return config.source_type.lower() == SourceType.DATABRICKS
|
|
978
|
+
|
|
979
|
+
def create(self, config: SourceConfig) -> Any:
|
|
980
|
+
if not config.host or not config.http_path:
|
|
981
|
+
raise ValueError("host and http_path are required for Databricks")
|
|
982
|
+
|
|
983
|
+
try:
|
|
984
|
+
# Try new truthound 2.x API with explicit import path
|
|
985
|
+
from truthound.datasources.sql.databricks import (
|
|
986
|
+
DatabricksDataSource,
|
|
987
|
+
DatabricksConfig,
|
|
988
|
+
)
|
|
989
|
+
|
|
990
|
+
# Build DatabricksConfig for advanced options
|
|
991
|
+
db_config = DatabricksConfig(
|
|
992
|
+
host=config.host,
|
|
993
|
+
http_path=config.http_path,
|
|
994
|
+
access_token=config.access_token,
|
|
995
|
+
catalog=config.catalog,
|
|
996
|
+
use_cloud_fetch=config.extra.get("use_cloud_fetch", True) if config.extra else True,
|
|
997
|
+
max_download_threads=config.extra.get("max_download_threads", 10) if config.extra else 10,
|
|
998
|
+
client_id=config.extra.get("client_id") if config.extra else None,
|
|
999
|
+
client_secret=config.extra.get("client_secret") if config.extra else None,
|
|
1000
|
+
use_oauth=config.extra.get("use_oauth", False) if config.extra else False,
|
|
1001
|
+
)
|
|
1002
|
+
|
|
1003
|
+
if config.table:
|
|
1004
|
+
return DatabricksDataSource(table=config.table, schema=config.schema_name, config=db_config)
|
|
1005
|
+
elif config.query:
|
|
1006
|
+
return DatabricksDataSource(query=config.query, config=db_config)
|
|
1007
|
+
else:
|
|
1008
|
+
raise ValueError("table or query is required")
|
|
1009
|
+
|
|
1010
|
+
except ImportError:
|
|
1011
|
+
# Fallback: Try older import path
|
|
1012
|
+
from truthound.datasources.sql import DatabricksDataSource
|
|
1013
|
+
|
|
1014
|
+
kwargs: dict[str, Any] = {
|
|
1015
|
+
"host": config.host,
|
|
1016
|
+
"http_path": config.http_path,
|
|
1017
|
+
}
|
|
1018
|
+
|
|
1019
|
+
if config.table:
|
|
1020
|
+
kwargs["table"] = config.table
|
|
1021
|
+
elif config.query:
|
|
1022
|
+
kwargs["query"] = config.query
|
|
1023
|
+
else:
|
|
1024
|
+
raise ValueError("table or query is required")
|
|
1025
|
+
|
|
1026
|
+
if config.access_token:
|
|
1027
|
+
kwargs["access_token"] = config.access_token
|
|
1028
|
+
if config.catalog:
|
|
1029
|
+
kwargs["catalog"] = config.catalog
|
|
1030
|
+
if config.schema_name:
|
|
1031
|
+
kwargs["schema"] = config.schema_name
|
|
1032
|
+
|
|
1033
|
+
return DatabricksDataSource(**kwargs)
|
|
1034
|
+
|
|
1035
|
+
|
|
1036
|
+
class OracleSourceCreator(DataSourceCreator):
|
|
1037
|
+
"""Creator for Oracle data sources."""
|
|
1038
|
+
|
|
1039
|
+
def can_create(self, config: SourceConfig) -> bool:
|
|
1040
|
+
return config.source_type.lower() == SourceType.ORACLE
|
|
1041
|
+
|
|
1042
|
+
def create(self, config: SourceConfig) -> Any:
|
|
1043
|
+
from truthound.datasources.sql import OracleDataSource
|
|
1044
|
+
|
|
1045
|
+
kwargs: dict[str, Any] = {}
|
|
1046
|
+
|
|
1047
|
+
if config.table:
|
|
1048
|
+
kwargs["table"] = config.table
|
|
1049
|
+
elif config.query:
|
|
1050
|
+
kwargs["query"] = config.query
|
|
1051
|
+
else:
|
|
1052
|
+
raise ValueError("table or query is required")
|
|
1053
|
+
|
|
1054
|
+
if config.host:
|
|
1055
|
+
kwargs["host"] = config.host
|
|
1056
|
+
if config.port:
|
|
1057
|
+
kwargs["port"] = config.port
|
|
1058
|
+
if config.service_name:
|
|
1059
|
+
kwargs["service_name"] = config.service_name
|
|
1060
|
+
elif config.sid:
|
|
1061
|
+
kwargs["sid"] = config.sid
|
|
1062
|
+
if config.user:
|
|
1063
|
+
kwargs["user"] = config.user
|
|
1064
|
+
if config.password:
|
|
1065
|
+
kwargs["password"] = config.password
|
|
1066
|
+
|
|
1067
|
+
return OracleDataSource(**kwargs)
|
|
1068
|
+
|
|
1069
|
+
|
|
1070
|
+
class SQLServerSourceCreator(DataSourceCreator):
|
|
1071
|
+
"""Creator for SQL Server data sources."""
|
|
1072
|
+
|
|
1073
|
+
def can_create(self, config: SourceConfig) -> bool:
|
|
1074
|
+
return config.source_type.lower() == SourceType.SQLSERVER
|
|
1075
|
+
|
|
1076
|
+
def create(self, config: SourceConfig) -> Any:
|
|
1077
|
+
from truthound.datasources.sql import SQLServerDataSource
|
|
1078
|
+
|
|
1079
|
+
kwargs: dict[str, Any] = {}
|
|
1080
|
+
|
|
1081
|
+
if config.table:
|
|
1082
|
+
kwargs["table"] = config.table
|
|
1083
|
+
elif config.query:
|
|
1084
|
+
kwargs["query"] = config.query
|
|
1085
|
+
else:
|
|
1086
|
+
raise ValueError("table or query is required")
|
|
1087
|
+
|
|
1088
|
+
if config.host:
|
|
1089
|
+
kwargs["host"] = config.host
|
|
1090
|
+
if config.port:
|
|
1091
|
+
kwargs["port"] = config.port
|
|
1092
|
+
if config.database:
|
|
1093
|
+
kwargs["database"] = config.database
|
|
1094
|
+
if config.user:
|
|
1095
|
+
kwargs["user"] = config.user
|
|
1096
|
+
if config.password:
|
|
1097
|
+
kwargs["password"] = config.password
|
|
1098
|
+
if config.schema_name:
|
|
1099
|
+
kwargs["schema"] = config.schema_name
|
|
1100
|
+
if config.trusted_connection:
|
|
1101
|
+
kwargs["trusted_connection"] = True
|
|
1102
|
+
|
|
1103
|
+
return SQLServerDataSource(**kwargs)
|
|
1104
|
+
|
|
1105
|
+
|
|
1106
|
+
# =============================================================================
|
|
1107
|
+
# Async Source Creators
|
|
1108
|
+
# =============================================================================
|
|
1109
|
+
|
|
1110
|
+
|
|
1111
|
+
class MongoDBSourceCreator(DataSourceCreator):
|
|
1112
|
+
"""Creator for MongoDB data sources (async)."""
|
|
1113
|
+
|
|
1114
|
+
def can_create(self, config: SourceConfig) -> bool:
|
|
1115
|
+
return config.source_type.lower() == SourceType.MONGODB
|
|
1116
|
+
|
|
1117
|
+
def create(self, config: SourceConfig) -> Any:
|
|
1118
|
+
raise ValueError(
|
|
1119
|
+
"MongoDB requires async creation. Use create_async() instead."
|
|
1120
|
+
)
|
|
1121
|
+
|
|
1122
|
+
async def create_async(self, config: SourceConfig) -> Any:
|
|
1123
|
+
from truthound.datasources import from_mongodb
|
|
1124
|
+
|
|
1125
|
+
if not config.connection_string and not config.host:
|
|
1126
|
+
raise ValueError("connection_string or host is required for MongoDB")
|
|
1127
|
+
if not config.database:
|
|
1128
|
+
raise ValueError("database is required for MongoDB")
|
|
1129
|
+
if not config.collection:
|
|
1130
|
+
raise ValueError("collection is required for MongoDB")
|
|
1131
|
+
|
|
1132
|
+
connection_string = config.connection_string
|
|
1133
|
+
if not connection_string:
|
|
1134
|
+
connection_string = f"mongodb://{config.host}:{config.port or 27017}"
|
|
1135
|
+
|
|
1136
|
+
return await from_mongodb(
|
|
1137
|
+
connection_string=connection_string,
|
|
1138
|
+
database=config.database,
|
|
1139
|
+
collection=config.collection,
|
|
1140
|
+
)
|
|
1141
|
+
|
|
1142
|
+
|
|
1143
|
+
class ElasticsearchSourceCreator(DataSourceCreator):
|
|
1144
|
+
"""Creator for Elasticsearch data sources (async)."""
|
|
1145
|
+
|
|
1146
|
+
def can_create(self, config: SourceConfig) -> bool:
|
|
1147
|
+
return config.source_type.lower() == SourceType.ELASTICSEARCH
|
|
1148
|
+
|
|
1149
|
+
def create(self, config: SourceConfig) -> Any:
|
|
1150
|
+
raise ValueError(
|
|
1151
|
+
"Elasticsearch requires async creation. Use create_async() instead."
|
|
1152
|
+
)
|
|
1153
|
+
|
|
1154
|
+
async def create_async(self, config: SourceConfig) -> Any:
|
|
1155
|
+
from truthound.datasources import from_elasticsearch
|
|
1156
|
+
|
|
1157
|
+
if not config.host:
|
|
1158
|
+
raise ValueError("host is required for Elasticsearch")
|
|
1159
|
+
if not config.index:
|
|
1160
|
+
raise ValueError("index is required for Elasticsearch")
|
|
1161
|
+
|
|
1162
|
+
hosts = [config.host]
|
|
1163
|
+
if "://" not in config.host:
|
|
1164
|
+
hosts = [f"http://{config.host}:{config.port or 9200}"]
|
|
1165
|
+
|
|
1166
|
+
return await from_elasticsearch(
|
|
1167
|
+
hosts=hosts,
|
|
1168
|
+
index=config.index,
|
|
1169
|
+
)
|
|
1170
|
+
|
|
1171
|
+
|
|
1172
|
+
class KafkaSourceCreator(DataSourceCreator):
|
|
1173
|
+
"""Creator for Kafka data sources (async)."""
|
|
1174
|
+
|
|
1175
|
+
def can_create(self, config: SourceConfig) -> bool:
|
|
1176
|
+
return config.source_type.lower() == SourceType.KAFKA
|
|
1177
|
+
|
|
1178
|
+
def create(self, config: SourceConfig) -> Any:
|
|
1179
|
+
raise ValueError(
|
|
1180
|
+
"Kafka requires async creation. Use create_async() instead."
|
|
1181
|
+
)
|
|
1182
|
+
|
|
1183
|
+
async def create_async(self, config: SourceConfig) -> Any:
|
|
1184
|
+
from truthound.datasources import from_kafka
|
|
1185
|
+
|
|
1186
|
+
if not config.bootstrap_servers:
|
|
1187
|
+
raise ValueError("bootstrap_servers is required for Kafka")
|
|
1188
|
+
if not config.topic:
|
|
1189
|
+
raise ValueError("topic is required for Kafka")
|
|
1190
|
+
|
|
1191
|
+
kwargs: dict[str, Any] = {
|
|
1192
|
+
"bootstrap_servers": config.bootstrap_servers,
|
|
1193
|
+
"topic": config.topic,
|
|
1194
|
+
}
|
|
1195
|
+
|
|
1196
|
+
if config.group_id:
|
|
1197
|
+
kwargs["group_id"] = config.group_id
|
|
1198
|
+
if config.max_messages:
|
|
1199
|
+
kwargs["max_messages"] = config.max_messages
|
|
1200
|
+
|
|
1201
|
+
return await from_kafka(**kwargs)
|
|
1202
|
+
|
|
1203
|
+
|
|
1204
|
+
# =============================================================================
|
|
1205
|
+
# Main Factory
|
|
1206
|
+
# =============================================================================
|
|
1207
|
+
|
|
1208
|
+
|
|
1209
|
+
class DataSourceFactory:
|
|
1210
|
+
"""Factory for creating truthound DataSource objects.
|
|
1211
|
+
|
|
1212
|
+
This factory uses the Strategy pattern to delegate creation
|
|
1213
|
+
to specialized creator classes. This design provides:
|
|
1214
|
+
- Extensibility: Add new creators without modifying factory
|
|
1215
|
+
- Testability: Easy to mock individual creators
|
|
1216
|
+
- Loose coupling: Truthound imports are isolated in creators
|
|
1217
|
+
|
|
1218
|
+
Example:
|
|
1219
|
+
factory = DataSourceFactory()
|
|
1220
|
+
|
|
1221
|
+
# From file
|
|
1222
|
+
source = factory.create(SourceConfig(source_type="csv", path="data.csv"))
|
|
1223
|
+
|
|
1224
|
+
# From PostgreSQL
|
|
1225
|
+
source = factory.create(SourceConfig(
|
|
1226
|
+
source_type="postgresql",
|
|
1227
|
+
table="users",
|
|
1228
|
+
host="localhost",
|
|
1229
|
+
database="mydb",
|
|
1230
|
+
))
|
|
1231
|
+
|
|
1232
|
+
# From existing DB model config
|
|
1233
|
+
source = factory.create_from_dict(db_source.config)
|
|
1234
|
+
"""
|
|
1235
|
+
|
|
1236
|
+
def __init__(self) -> None:
|
|
1237
|
+
"""Initialize factory with default creators."""
|
|
1238
|
+
self._creators: list[DataSourceCreator] = [
|
|
1239
|
+
FileSourceCreator(),
|
|
1240
|
+
SQLiteSourceCreator(),
|
|
1241
|
+
DuckDBSourceCreator(),
|
|
1242
|
+
PostgreSQLSourceCreator(),
|
|
1243
|
+
MySQLSourceCreator(),
|
|
1244
|
+
BigQuerySourceCreator(),
|
|
1245
|
+
SnowflakeSourceCreator(),
|
|
1246
|
+
RedshiftSourceCreator(),
|
|
1247
|
+
DatabricksSourceCreator(),
|
|
1248
|
+
OracleSourceCreator(),
|
|
1249
|
+
SQLServerSourceCreator(),
|
|
1250
|
+
MongoDBSourceCreator(),
|
|
1251
|
+
ElasticsearchSourceCreator(),
|
|
1252
|
+
KafkaSourceCreator(),
|
|
1253
|
+
]
|
|
1254
|
+
|
|
1255
|
+
def register_creator(self, creator: DataSourceCreator) -> None:
|
|
1256
|
+
"""Register a custom data source creator.
|
|
1257
|
+
|
|
1258
|
+
Args:
|
|
1259
|
+
creator: DataSourceCreator instance.
|
|
1260
|
+
"""
|
|
1261
|
+
self._creators.insert(0, creator)
|
|
1262
|
+
|
|
1263
|
+
def create(self, config: SourceConfig) -> Any:
|
|
1264
|
+
"""Create a DataSource from configuration.
|
|
1265
|
+
|
|
1266
|
+
Args:
|
|
1267
|
+
config: Source configuration.
|
|
1268
|
+
|
|
1269
|
+
Returns:
|
|
1270
|
+
Truthound DataSource instance.
|
|
1271
|
+
|
|
1272
|
+
Raises:
|
|
1273
|
+
ValueError: If source type is not supported or config is invalid.
|
|
1274
|
+
ImportError: If required driver is not installed.
|
|
1275
|
+
"""
|
|
1276
|
+
source_type = config.source_type.lower()
|
|
1277
|
+
|
|
1278
|
+
# Check for async sources
|
|
1279
|
+
if SourceType.is_async_type(source_type):
|
|
1280
|
+
raise ValueError(
|
|
1281
|
+
f"Async source type '{source_type}' requires async creation. "
|
|
1282
|
+
"Use create_async() instead."
|
|
1283
|
+
)
|
|
1284
|
+
|
|
1285
|
+
# Find appropriate creator
|
|
1286
|
+
for creator in self._creators:
|
|
1287
|
+
if creator.can_create(config):
|
|
1288
|
+
return creator.create(config)
|
|
1289
|
+
|
|
1290
|
+
raise ValueError(f"Unsupported source type: {source_type}")
|
|
1291
|
+
|
|
1292
|
+
def create_from_dict(self, data: dict[str, Any]) -> Any:
|
|
1293
|
+
"""Create a DataSource from a dictionary configuration.
|
|
1294
|
+
|
|
1295
|
+
Args:
|
|
1296
|
+
data: Dictionary with source configuration.
|
|
1297
|
+
|
|
1298
|
+
Returns:
|
|
1299
|
+
Truthound DataSource instance.
|
|
1300
|
+
"""
|
|
1301
|
+
config = SourceConfig.from_dict(data)
|
|
1302
|
+
return self.create(config)
|
|
1303
|
+
|
|
1304
|
+
async def create_async(self, config: SourceConfig) -> Any:
|
|
1305
|
+
"""Create an async DataSource from configuration.
|
|
1306
|
+
|
|
1307
|
+
Use this method for NoSQL and streaming sources that
|
|
1308
|
+
require async initialization.
|
|
1309
|
+
|
|
1310
|
+
Args:
|
|
1311
|
+
config: Source configuration.
|
|
1312
|
+
|
|
1313
|
+
Returns:
|
|
1314
|
+
Truthound async DataSource instance.
|
|
1315
|
+
|
|
1316
|
+
Raises:
|
|
1317
|
+
ValueError: If source type doesn't support async.
|
|
1318
|
+
"""
|
|
1319
|
+
source_type = config.source_type.lower()
|
|
1320
|
+
|
|
1321
|
+
for creator in self._creators:
|
|
1322
|
+
if creator.can_create(config):
|
|
1323
|
+
if hasattr(creator, "create_async"):
|
|
1324
|
+
return await creator.create_async(config)
|
|
1325
|
+
raise ValueError(
|
|
1326
|
+
f"Source type '{source_type}' doesn't require async creation. "
|
|
1327
|
+
"Use create() instead."
|
|
1328
|
+
)
|
|
1329
|
+
|
|
1330
|
+
raise ValueError(f"Unsupported source type: {source_type}")
|
|
1331
|
+
|
|
1332
|
+
|
|
1333
|
+
# =============================================================================
|
|
1334
|
+
# Singleton and Convenience Functions
|
|
1335
|
+
# =============================================================================
|
|
1336
|
+
|
|
1337
|
+
|
|
1338
|
+
_factory: DataSourceFactory | None = None
|
|
1339
|
+
|
|
1340
|
+
|
|
1341
|
+
def get_datasource_factory() -> DataSourceFactory:
|
|
1342
|
+
"""Get singleton DataSourceFactory instance.
|
|
1343
|
+
|
|
1344
|
+
Returns:
|
|
1345
|
+
DataSourceFactory singleton.
|
|
1346
|
+
"""
|
|
1347
|
+
global _factory
|
|
1348
|
+
if _factory is None:
|
|
1349
|
+
_factory = DataSourceFactory()
|
|
1350
|
+
return _factory
|
|
1351
|
+
|
|
1352
|
+
|
|
1353
|
+
def create_datasource(config: dict[str, Any] | SourceConfig) -> Any:
|
|
1354
|
+
"""Convenience function to create a data source.
|
|
1355
|
+
|
|
1356
|
+
Args:
|
|
1357
|
+
config: Source configuration (dict or SourceConfig).
|
|
1358
|
+
|
|
1359
|
+
Returns:
|
|
1360
|
+
Truthound DataSource instance.
|
|
1361
|
+
"""
|
|
1362
|
+
factory = get_datasource_factory()
|
|
1363
|
+
|
|
1364
|
+
if isinstance(config, dict):
|
|
1365
|
+
return factory.create_from_dict(config)
|
|
1366
|
+
return factory.create(config)
|
|
1367
|
+
|
|
1368
|
+
|
|
1369
|
+
async def create_datasource_async(config: dict[str, Any] | SourceConfig) -> Any:
|
|
1370
|
+
"""Convenience function to create an async data source.
|
|
1371
|
+
|
|
1372
|
+
Args:
|
|
1373
|
+
config: Source configuration (dict or SourceConfig).
|
|
1374
|
+
|
|
1375
|
+
Returns:
|
|
1376
|
+
Truthound async DataSource instance.
|
|
1377
|
+
"""
|
|
1378
|
+
factory = get_datasource_factory()
|
|
1379
|
+
|
|
1380
|
+
if isinstance(config, dict):
|
|
1381
|
+
config = SourceConfig.from_dict(config)
|
|
1382
|
+
return await factory.create_async(config)
|
|
1383
|
+
|
|
1384
|
+
|
|
1385
|
+
def get_source_path_or_datasource(
|
|
1386
|
+
source_type: str,
|
|
1387
|
+
config: dict[str, Any],
|
|
1388
|
+
) -> str | Any:
|
|
1389
|
+
"""Get either a file path or DataSource based on source type.
|
|
1390
|
+
|
|
1391
|
+
This is a convenience function for backward compatibility.
|
|
1392
|
+
For file-based sources, returns the path string.
|
|
1393
|
+
For database sources, returns a DataSource object.
|
|
1394
|
+
|
|
1395
|
+
Args:
|
|
1396
|
+
source_type: Source type string.
|
|
1397
|
+
config: Source configuration dict.
|
|
1398
|
+
|
|
1399
|
+
Returns:
|
|
1400
|
+
File path string or DataSource object.
|
|
1401
|
+
"""
|
|
1402
|
+
if SourceType.is_file_type(source_type):
|
|
1403
|
+
return config.get("path", "")
|
|
1404
|
+
|
|
1405
|
+
# Create DataSource for non-file sources
|
|
1406
|
+
full_config = {"type": source_type, **config}
|
|
1407
|
+
return create_datasource(full_config)
|
|
1408
|
+
|
|
1409
|
+
|
|
1410
|
+
# =============================================================================
|
|
1411
|
+
# Utility Functions
|
|
1412
|
+
# =============================================================================
|
|
1413
|
+
|
|
1414
|
+
|
|
1415
|
+
def detect_file_type(path: str | Path) -> str | None:
|
|
1416
|
+
"""Detect file type from path extension.
|
|
1417
|
+
|
|
1418
|
+
Args:
|
|
1419
|
+
path: File path.
|
|
1420
|
+
|
|
1421
|
+
Returns:
|
|
1422
|
+
File type string or None if unknown.
|
|
1423
|
+
"""
|
|
1424
|
+
ext_map = {
|
|
1425
|
+
".csv": "csv",
|
|
1426
|
+
".parquet": "parquet",
|
|
1427
|
+
".pq": "parquet",
|
|
1428
|
+
".json": "json",
|
|
1429
|
+
".ndjson": "ndjson",
|
|
1430
|
+
".jsonl": "jsonl",
|
|
1431
|
+
}
|
|
1432
|
+
|
|
1433
|
+
path = Path(path)
|
|
1434
|
+
ext = path.suffix.lower()
|
|
1435
|
+
return ext_map.get(ext)
|
|
1436
|
+
|
|
1437
|
+
|
|
1438
|
+
def is_truthound_available() -> bool:
|
|
1439
|
+
"""Check if truthound library is available.
|
|
1440
|
+
|
|
1441
|
+
Returns:
|
|
1442
|
+
True if truthound can be imported.
|
|
1443
|
+
"""
|
|
1444
|
+
try:
|
|
1445
|
+
import truthound
|
|
1446
|
+
return True
|
|
1447
|
+
except ImportError:
|
|
1448
|
+
return False
|
|
1449
|
+
|
|
1450
|
+
|
|
1451
|
+
def get_truthound_version() -> str | None:
|
|
1452
|
+
"""Get truthound library version if available.
|
|
1453
|
+
|
|
1454
|
+
Returns:
|
|
1455
|
+
Version string or None.
|
|
1456
|
+
"""
|
|
1457
|
+
try:
|
|
1458
|
+
import truthound
|
|
1459
|
+
return getattr(truthound, "__version__", None)
|
|
1460
|
+
except ImportError:
|
|
1461
|
+
return None
|
|
1462
|
+
|
|
1463
|
+
|
|
1464
|
+
def get_datasource_auto(
|
|
1465
|
+
data: Any,
|
|
1466
|
+
*,
|
|
1467
|
+
table: str | None = None,
|
|
1468
|
+
query: str | None = None,
|
|
1469
|
+
**kwargs: Any,
|
|
1470
|
+
) -> Any:
|
|
1471
|
+
"""Auto-detect and create a DataSource using truthound's get_datasource.
|
|
1472
|
+
|
|
1473
|
+
This function wraps truthound.datasources.get_datasource() for auto-detection
|
|
1474
|
+
of data source types. It's the recommended way to create DataSources when
|
|
1475
|
+
the type can be inferred from the input.
|
|
1476
|
+
|
|
1477
|
+
Args:
|
|
1478
|
+
data: One of:
|
|
1479
|
+
- Polars DataFrame/LazyFrame
|
|
1480
|
+
- Pandas DataFrame
|
|
1481
|
+
- PySpark DataFrame
|
|
1482
|
+
- Dictionary (column -> values)
|
|
1483
|
+
- File path string (csv, parquet, json, etc.)
|
|
1484
|
+
- SQL connection string (postgresql://, mysql://, etc.)
|
|
1485
|
+
table: Table name for SQL sources.
|
|
1486
|
+
query: Custom SQL query for SQL sources.
|
|
1487
|
+
**kwargs: Additional arguments passed to the DataSource constructor.
|
|
1488
|
+
|
|
1489
|
+
Returns:
|
|
1490
|
+
Appropriate DataSource for the input data type.
|
|
1491
|
+
|
|
1492
|
+
Raises:
|
|
1493
|
+
ImportError: If truthound is not installed.
|
|
1494
|
+
ValueError: If data type cannot be detected.
|
|
1495
|
+
|
|
1496
|
+
Example:
|
|
1497
|
+
# Auto-detect from Polars DataFrame
|
|
1498
|
+
ds = get_datasource_auto(pl_df)
|
|
1499
|
+
|
|
1500
|
+
# Auto-detect from file path
|
|
1501
|
+
ds = get_datasource_auto("data.parquet")
|
|
1502
|
+
|
|
1503
|
+
# Auto-detect from connection string
|
|
1504
|
+
ds = get_datasource_auto(
|
|
1505
|
+
"postgresql://user:pass@localhost/db",
|
|
1506
|
+
table="users",
|
|
1507
|
+
)
|
|
1508
|
+
"""
|
|
1509
|
+
try:
|
|
1510
|
+
from truthound.datasources import get_datasource
|
|
1511
|
+
return get_datasource(data, table=table, query=query, **kwargs)
|
|
1512
|
+
except ImportError:
|
|
1513
|
+
raise ImportError(
|
|
1514
|
+
"truthound is not installed. Install with: pip install truthound"
|
|
1515
|
+
)
|
|
1516
|
+
|
|
1517
|
+
|
|
1518
|
+
# =============================================================================
|
|
1519
|
+
# Connection Testing
|
|
1520
|
+
# =============================================================================
|
|
1521
|
+
|
|
1522
|
+
|
|
1523
|
+
async def test_connection(config: SourceConfig | dict[str, Any]) -> dict[str, Any]:
|
|
1524
|
+
"""Test connection to a data source.
|
|
1525
|
+
|
|
1526
|
+
This function attempts to connect to the data source and retrieve
|
|
1527
|
+
basic metadata to verify connectivity.
|
|
1528
|
+
|
|
1529
|
+
Args:
|
|
1530
|
+
config: Source configuration (SourceConfig or dict).
|
|
1531
|
+
|
|
1532
|
+
Returns:
|
|
1533
|
+
Dictionary with connection test results:
|
|
1534
|
+
- success: bool - Whether connection succeeded
|
|
1535
|
+
- message: str - Success or error message
|
|
1536
|
+
- metadata: dict | None - Source metadata if successful
|
|
1537
|
+
- name: str - Source name
|
|
1538
|
+
- row_count: int | None - Row count if available
|
|
1539
|
+
- columns: list[str] | None - Column names if available
|
|
1540
|
+
- capabilities: list[str] | None - Source capabilities
|
|
1541
|
+
|
|
1542
|
+
Example:
|
|
1543
|
+
result = await test_connection({
|
|
1544
|
+
"type": "postgresql",
|
|
1545
|
+
"host": "localhost",
|
|
1546
|
+
"database": "mydb",
|
|
1547
|
+
"table": "users",
|
|
1548
|
+
})
|
|
1549
|
+
if result["success"]:
|
|
1550
|
+
print(f"Connected! Found {result['metadata']['row_count']} rows")
|
|
1551
|
+
else:
|
|
1552
|
+
print(f"Connection failed: {result['message']}")
|
|
1553
|
+
"""
|
|
1554
|
+
import asyncio
|
|
1555
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
1556
|
+
|
|
1557
|
+
if isinstance(config, dict):
|
|
1558
|
+
config = SourceConfig.from_dict(config)
|
|
1559
|
+
|
|
1560
|
+
result = {
|
|
1561
|
+
"success": False,
|
|
1562
|
+
"message": "",
|
|
1563
|
+
"metadata": None,
|
|
1564
|
+
}
|
|
1565
|
+
|
|
1566
|
+
try:
|
|
1567
|
+
factory = get_datasource_factory()
|
|
1568
|
+
|
|
1569
|
+
# Create datasource (may be async for MongoDB, ES, Kafka)
|
|
1570
|
+
if SourceType.is_async_type(config.source_type):
|
|
1571
|
+
datasource = await factory.create_async(config)
|
|
1572
|
+
else:
|
|
1573
|
+
# Run sync creation in thread pool to not block
|
|
1574
|
+
loop = asyncio.get_event_loop()
|
|
1575
|
+
with ThreadPoolExecutor(max_workers=1) as executor:
|
|
1576
|
+
datasource = await loop.run_in_executor(
|
|
1577
|
+
executor, factory.create, config
|
|
1578
|
+
)
|
|
1579
|
+
|
|
1580
|
+
# Extract metadata
|
|
1581
|
+
metadata: dict[str, Any] = {
|
|
1582
|
+
"name": getattr(datasource, "name", config.name or "unknown"),
|
|
1583
|
+
}
|
|
1584
|
+
|
|
1585
|
+
# Try to get row count
|
|
1586
|
+
if hasattr(datasource, "row_count"):
|
|
1587
|
+
try:
|
|
1588
|
+
row_count = datasource.row_count
|
|
1589
|
+
metadata["row_count"] = row_count
|
|
1590
|
+
except Exception:
|
|
1591
|
+
metadata["row_count"] = None
|
|
1592
|
+
|
|
1593
|
+
# Try to get columns
|
|
1594
|
+
if hasattr(datasource, "columns"):
|
|
1595
|
+
try:
|
|
1596
|
+
columns = datasource.columns
|
|
1597
|
+
metadata["columns"] = columns
|
|
1598
|
+
except Exception:
|
|
1599
|
+
metadata["columns"] = None
|
|
1600
|
+
|
|
1601
|
+
# Try to get capabilities
|
|
1602
|
+
if hasattr(datasource, "capabilities"):
|
|
1603
|
+
try:
|
|
1604
|
+
capabilities = datasource.capabilities
|
|
1605
|
+
metadata["capabilities"] = [c.name for c in capabilities]
|
|
1606
|
+
except Exception:
|
|
1607
|
+
metadata["capabilities"] = None
|
|
1608
|
+
|
|
1609
|
+
result["success"] = True
|
|
1610
|
+
result["message"] = "Connection successful"
|
|
1611
|
+
result["metadata"] = metadata
|
|
1612
|
+
|
|
1613
|
+
except FileNotFoundError as e:
|
|
1614
|
+
result["message"] = f"File not found: {e}"
|
|
1615
|
+
except ImportError as e:
|
|
1616
|
+
result["message"] = f"Missing dependency: {e}"
|
|
1617
|
+
except ValueError as e:
|
|
1618
|
+
result["message"] = f"Configuration error: {e}"
|
|
1619
|
+
except Exception as e:
|
|
1620
|
+
result["message"] = f"Connection failed: {type(e).__name__}: {e}"
|
|
1621
|
+
|
|
1622
|
+
return result
|
|
1623
|
+
|
|
1624
|
+
|
|
1625
|
+
def get_source_capabilities(
|
|
1626
|
+
source_type: str,
|
|
1627
|
+
) -> set[str]:
|
|
1628
|
+
"""Get the capabilities supported by a source type.
|
|
1629
|
+
|
|
1630
|
+
Args:
|
|
1631
|
+
source_type: Source type string (e.g., "postgresql", "csv").
|
|
1632
|
+
|
|
1633
|
+
Returns:
|
|
1634
|
+
Set of capability names supported by the source type.
|
|
1635
|
+
"""
|
|
1636
|
+
from truthound_dashboard.core.interfaces import DataSourceCapability
|
|
1637
|
+
|
|
1638
|
+
# Map source types to their capabilities
|
|
1639
|
+
capability_map: dict[str, set[DataSourceCapability]] = {
|
|
1640
|
+
# File sources
|
|
1641
|
+
"file": {DataSourceCapability.SCHEMA_INFERENCE, DataSourceCapability.LAZY_EVALUATION},
|
|
1642
|
+
"csv": {DataSourceCapability.SCHEMA_INFERENCE, DataSourceCapability.LAZY_EVALUATION},
|
|
1643
|
+
"parquet": {DataSourceCapability.SCHEMA_INFERENCE, DataSourceCapability.LAZY_EVALUATION, DataSourceCapability.ROW_COUNT},
|
|
1644
|
+
"json": {DataSourceCapability.SCHEMA_INFERENCE, DataSourceCapability.LAZY_EVALUATION},
|
|
1645
|
+
"ndjson": {DataSourceCapability.SCHEMA_INFERENCE, DataSourceCapability.LAZY_EVALUATION, DataSourceCapability.STREAMING},
|
|
1646
|
+
"jsonl": {DataSourceCapability.SCHEMA_INFERENCE, DataSourceCapability.LAZY_EVALUATION, DataSourceCapability.STREAMING},
|
|
1647
|
+
# SQL sources
|
|
1648
|
+
"sqlite": {DataSourceCapability.SQL_PUSHDOWN, DataSourceCapability.ROW_COUNT, DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
|
|
1649
|
+
"postgresql": {DataSourceCapability.SQL_PUSHDOWN, DataSourceCapability.ROW_COUNT, DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
|
|
1650
|
+
"mysql": {DataSourceCapability.SQL_PUSHDOWN, DataSourceCapability.ROW_COUNT, DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
|
|
1651
|
+
"duckdb": {DataSourceCapability.SQL_PUSHDOWN, DataSourceCapability.ROW_COUNT, DataSourceCapability.SAMPLING, DataSourceCapability.LAZY_EVALUATION},
|
|
1652
|
+
# Cloud DW
|
|
1653
|
+
"bigquery": {DataSourceCapability.SQL_PUSHDOWN, DataSourceCapability.ROW_COUNT, DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
|
|
1654
|
+
"snowflake": {DataSourceCapability.SQL_PUSHDOWN, DataSourceCapability.ROW_COUNT, DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
|
|
1655
|
+
"redshift": {DataSourceCapability.SQL_PUSHDOWN, DataSourceCapability.ROW_COUNT, DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
|
|
1656
|
+
"databricks": {DataSourceCapability.SQL_PUSHDOWN, DataSourceCapability.ROW_COUNT, DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
|
|
1657
|
+
# Enterprise
|
|
1658
|
+
"oracle": {DataSourceCapability.SQL_PUSHDOWN, DataSourceCapability.ROW_COUNT, DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
|
|
1659
|
+
"sqlserver": {DataSourceCapability.SQL_PUSHDOWN, DataSourceCapability.ROW_COUNT, DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
|
|
1660
|
+
# NoSQL
|
|
1661
|
+
"mongodb": {DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
|
|
1662
|
+
"elasticsearch": {DataSourceCapability.SAMPLING, DataSourceCapability.CONNECTION_TEST},
|
|
1663
|
+
# Streaming
|
|
1664
|
+
"kafka": {DataSourceCapability.STREAMING, DataSourceCapability.CONNECTION_TEST},
|
|
1665
|
+
# DataFrame
|
|
1666
|
+
"polars": {DataSourceCapability.LAZY_EVALUATION, DataSourceCapability.ROW_COUNT, DataSourceCapability.SCHEMA_INFERENCE},
|
|
1667
|
+
"pandas": {DataSourceCapability.ROW_COUNT, DataSourceCapability.SCHEMA_INFERENCE},
|
|
1668
|
+
}
|
|
1669
|
+
|
|
1670
|
+
source_type_lower = source_type.lower()
|
|
1671
|
+
capabilities = capability_map.get(source_type_lower, set())
|
|
1672
|
+
return {c.name for c in capabilities}
|