sqlspec 0.13.1__py3-none-any.whl → 0.16.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- sqlspec/__init__.py +71 -8
- sqlspec/__main__.py +12 -0
- sqlspec/__metadata__.py +1 -3
- sqlspec/_serialization.py +1 -2
- sqlspec/_sql.py +930 -136
- sqlspec/_typing.py +278 -142
- sqlspec/adapters/adbc/__init__.py +4 -3
- sqlspec/adapters/adbc/_types.py +12 -0
- sqlspec/adapters/adbc/config.py +116 -285
- sqlspec/adapters/adbc/driver.py +462 -340
- sqlspec/adapters/aiosqlite/__init__.py +18 -3
- sqlspec/adapters/aiosqlite/_types.py +13 -0
- sqlspec/adapters/aiosqlite/config.py +202 -150
- sqlspec/adapters/aiosqlite/driver.py +226 -247
- sqlspec/adapters/asyncmy/__init__.py +18 -3
- sqlspec/adapters/asyncmy/_types.py +12 -0
- sqlspec/adapters/asyncmy/config.py +80 -199
- sqlspec/adapters/asyncmy/driver.py +257 -215
- sqlspec/adapters/asyncpg/__init__.py +19 -4
- sqlspec/adapters/asyncpg/_types.py +17 -0
- sqlspec/adapters/asyncpg/config.py +81 -214
- sqlspec/adapters/asyncpg/driver.py +284 -359
- sqlspec/adapters/bigquery/__init__.py +17 -3
- sqlspec/adapters/bigquery/_types.py +12 -0
- sqlspec/adapters/bigquery/config.py +191 -299
- sqlspec/adapters/bigquery/driver.py +474 -634
- sqlspec/adapters/duckdb/__init__.py +14 -3
- sqlspec/adapters/duckdb/_types.py +12 -0
- sqlspec/adapters/duckdb/config.py +414 -397
- sqlspec/adapters/duckdb/driver.py +342 -393
- sqlspec/adapters/oracledb/__init__.py +19 -5
- sqlspec/adapters/oracledb/_types.py +14 -0
- sqlspec/adapters/oracledb/config.py +123 -458
- sqlspec/adapters/oracledb/driver.py +505 -531
- sqlspec/adapters/psqlpy/__init__.py +13 -3
- sqlspec/adapters/psqlpy/_types.py +11 -0
- sqlspec/adapters/psqlpy/config.py +93 -307
- sqlspec/adapters/psqlpy/driver.py +504 -213
- sqlspec/adapters/psycopg/__init__.py +19 -5
- sqlspec/adapters/psycopg/_types.py +17 -0
- sqlspec/adapters/psycopg/config.py +143 -472
- sqlspec/adapters/psycopg/driver.py +704 -825
- sqlspec/adapters/sqlite/__init__.py +14 -3
- sqlspec/adapters/sqlite/_types.py +11 -0
- sqlspec/adapters/sqlite/config.py +208 -142
- sqlspec/adapters/sqlite/driver.py +263 -278
- sqlspec/base.py +105 -9
- sqlspec/{statement/builder → builder}/__init__.py +12 -14
- sqlspec/{statement/builder/base.py → builder/_base.py} +184 -86
- sqlspec/{statement/builder/column.py → builder/_column.py} +97 -60
- sqlspec/{statement/builder/ddl.py → builder/_ddl.py} +61 -131
- sqlspec/{statement/builder → builder}/_ddl_utils.py +4 -10
- sqlspec/{statement/builder/delete.py → builder/_delete.py} +10 -30
- sqlspec/builder/_insert.py +421 -0
- sqlspec/builder/_merge.py +71 -0
- sqlspec/{statement/builder → builder}/_parsing_utils.py +49 -26
- sqlspec/builder/_select.py +170 -0
- sqlspec/{statement/builder/update.py → builder/_update.py} +16 -20
- sqlspec/builder/mixins/__init__.py +55 -0
- sqlspec/builder/mixins/_cte_and_set_ops.py +222 -0
- sqlspec/{statement/builder/mixins/_delete_from.py → builder/mixins/_delete_operations.py} +8 -1
- sqlspec/builder/mixins/_insert_operations.py +244 -0
- sqlspec/{statement/builder/mixins/_join.py → builder/mixins/_join_operations.py} +45 -13
- sqlspec/{statement/builder/mixins/_merge_clauses.py → builder/mixins/_merge_operations.py} +188 -30
- sqlspec/builder/mixins/_order_limit_operations.py +135 -0
- sqlspec/builder/mixins/_pivot_operations.py +153 -0
- sqlspec/builder/mixins/_select_operations.py +604 -0
- sqlspec/builder/mixins/_update_operations.py +202 -0
- sqlspec/builder/mixins/_where_clause.py +644 -0
- sqlspec/cli.py +247 -0
- sqlspec/config.py +183 -138
- sqlspec/core/__init__.py +63 -0
- sqlspec/core/cache.py +871 -0
- sqlspec/core/compiler.py +417 -0
- sqlspec/core/filters.py +830 -0
- sqlspec/core/hashing.py +310 -0
- sqlspec/core/parameters.py +1237 -0
- sqlspec/core/result.py +677 -0
- sqlspec/{statement → core}/splitter.py +321 -191
- sqlspec/core/statement.py +676 -0
- sqlspec/driver/__init__.py +7 -10
- sqlspec/driver/_async.py +422 -163
- sqlspec/driver/_common.py +545 -287
- sqlspec/driver/_sync.py +426 -160
- sqlspec/driver/mixins/__init__.py +2 -13
- sqlspec/driver/mixins/_result_tools.py +193 -0
- sqlspec/driver/mixins/_sql_translator.py +65 -14
- sqlspec/exceptions.py +5 -252
- sqlspec/extensions/aiosql/adapter.py +93 -96
- sqlspec/extensions/litestar/__init__.py +2 -1
- sqlspec/extensions/litestar/cli.py +48 -0
- sqlspec/extensions/litestar/config.py +0 -1
- sqlspec/extensions/litestar/handlers.py +15 -26
- sqlspec/extensions/litestar/plugin.py +21 -16
- sqlspec/extensions/litestar/providers.py +17 -52
- sqlspec/loader.py +423 -104
- sqlspec/migrations/__init__.py +35 -0
- sqlspec/migrations/base.py +414 -0
- sqlspec/migrations/commands.py +443 -0
- sqlspec/migrations/loaders.py +402 -0
- sqlspec/migrations/runner.py +213 -0
- sqlspec/migrations/tracker.py +140 -0
- sqlspec/migrations/utils.py +129 -0
- sqlspec/protocols.py +51 -186
- sqlspec/storage/__init__.py +1 -1
- sqlspec/storage/backends/base.py +37 -40
- sqlspec/storage/backends/fsspec.py +136 -112
- sqlspec/storage/backends/obstore.py +138 -160
- sqlspec/storage/capabilities.py +5 -4
- sqlspec/storage/registry.py +57 -106
- sqlspec/typing.py +136 -115
- sqlspec/utils/__init__.py +2 -2
- sqlspec/utils/correlation.py +0 -3
- sqlspec/utils/deprecation.py +6 -6
- sqlspec/utils/fixtures.py +6 -6
- sqlspec/utils/logging.py +0 -2
- sqlspec/utils/module_loader.py +7 -12
- sqlspec/utils/singleton.py +0 -1
- sqlspec/utils/sync_tools.py +17 -38
- sqlspec/utils/text.py +12 -51
- sqlspec/utils/type_guards.py +482 -235
- {sqlspec-0.13.1.dist-info → sqlspec-0.16.2.dist-info}/METADATA +7 -2
- sqlspec-0.16.2.dist-info/RECORD +134 -0
- sqlspec-0.16.2.dist-info/entry_points.txt +2 -0
- sqlspec/driver/connection.py +0 -207
- sqlspec/driver/mixins/_csv_writer.py +0 -91
- sqlspec/driver/mixins/_pipeline.py +0 -512
- sqlspec/driver/mixins/_result_utils.py +0 -140
- sqlspec/driver/mixins/_storage.py +0 -926
- sqlspec/driver/mixins/_type_coercion.py +0 -130
- sqlspec/driver/parameters.py +0 -138
- sqlspec/service/__init__.py +0 -4
- sqlspec/service/_util.py +0 -147
- sqlspec/service/base.py +0 -1131
- sqlspec/service/pagination.py +0 -26
- sqlspec/statement/__init__.py +0 -21
- sqlspec/statement/builder/insert.py +0 -288
- sqlspec/statement/builder/merge.py +0 -95
- sqlspec/statement/builder/mixins/__init__.py +0 -65
- sqlspec/statement/builder/mixins/_aggregate_functions.py +0 -250
- sqlspec/statement/builder/mixins/_case_builder.py +0 -91
- sqlspec/statement/builder/mixins/_common_table_expr.py +0 -90
- sqlspec/statement/builder/mixins/_from.py +0 -63
- sqlspec/statement/builder/mixins/_group_by.py +0 -118
- sqlspec/statement/builder/mixins/_having.py +0 -35
- sqlspec/statement/builder/mixins/_insert_from_select.py +0 -47
- sqlspec/statement/builder/mixins/_insert_into.py +0 -36
- sqlspec/statement/builder/mixins/_insert_values.py +0 -67
- sqlspec/statement/builder/mixins/_limit_offset.py +0 -53
- sqlspec/statement/builder/mixins/_order_by.py +0 -46
- sqlspec/statement/builder/mixins/_pivot.py +0 -79
- sqlspec/statement/builder/mixins/_returning.py +0 -37
- sqlspec/statement/builder/mixins/_select_columns.py +0 -61
- sqlspec/statement/builder/mixins/_set_ops.py +0 -122
- sqlspec/statement/builder/mixins/_unpivot.py +0 -77
- sqlspec/statement/builder/mixins/_update_from.py +0 -55
- sqlspec/statement/builder/mixins/_update_set.py +0 -94
- sqlspec/statement/builder/mixins/_update_table.py +0 -29
- sqlspec/statement/builder/mixins/_where.py +0 -401
- sqlspec/statement/builder/mixins/_window_functions.py +0 -86
- sqlspec/statement/builder/select.py +0 -221
- sqlspec/statement/filters.py +0 -596
- sqlspec/statement/parameter_manager.py +0 -220
- sqlspec/statement/parameters.py +0 -867
- sqlspec/statement/pipelines/__init__.py +0 -210
- sqlspec/statement/pipelines/analyzers/__init__.py +0 -9
- sqlspec/statement/pipelines/analyzers/_analyzer.py +0 -646
- sqlspec/statement/pipelines/context.py +0 -115
- sqlspec/statement/pipelines/transformers/__init__.py +0 -7
- sqlspec/statement/pipelines/transformers/_expression_simplifier.py +0 -88
- sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +0 -1247
- sqlspec/statement/pipelines/transformers/_remove_comments_and_hints.py +0 -76
- sqlspec/statement/pipelines/validators/__init__.py +0 -23
- sqlspec/statement/pipelines/validators/_dml_safety.py +0 -290
- sqlspec/statement/pipelines/validators/_parameter_style.py +0 -370
- sqlspec/statement/pipelines/validators/_performance.py +0 -718
- sqlspec/statement/pipelines/validators/_security.py +0 -967
- sqlspec/statement/result.py +0 -435
- sqlspec/statement/sql.py +0 -1704
- sqlspec/statement/sql_compiler.py +0 -140
- sqlspec/utils/cached_property.py +0 -25
- sqlspec-0.13.1.dist-info/RECORD +0 -150
- {sqlspec-0.13.1.dist-info → sqlspec-0.16.2.dist-info}/WHEEL +0 -0
- {sqlspec-0.13.1.dist-info → sqlspec-0.16.2.dist-info}/licenses/LICENSE +0 -0
- {sqlspec-0.13.1.dist-info → sqlspec-0.16.2.dist-info}/licenses/NOTICE +0 -0
|
@@ -1,926 +0,0 @@
|
|
|
1
|
-
"""Unified storage operations for database drivers.
|
|
2
|
-
|
|
3
|
-
This module provides the new simplified storage architecture that replaces
|
|
4
|
-
the complex web of Arrow, Export, Copy, and ResultConverter mixins with
|
|
5
|
-
just two comprehensive mixins: SyncStorageMixin and AsyncStorageMixin.
|
|
6
|
-
|
|
7
|
-
These mixins provide intelligent routing between native database capabilities
|
|
8
|
-
and storage backend operations for optimal performance.
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
# pyright: reportCallIssue=false, reportAttributeAccessIssue=false, reportArgumentType=false
|
|
12
|
-
import logging
|
|
13
|
-
import tempfile
|
|
14
|
-
from abc import ABC
|
|
15
|
-
from dataclasses import replace
|
|
16
|
-
from pathlib import Path
|
|
17
|
-
from typing import TYPE_CHECKING, Any, ClassVar, Optional, Union, cast
|
|
18
|
-
from urllib.parse import urlparse
|
|
19
|
-
|
|
20
|
-
from sqlspec.driver.mixins._csv_writer import write_csv
|
|
21
|
-
from sqlspec.driver.parameters import separate_filters_and_parameters
|
|
22
|
-
from sqlspec.exceptions import MissingDependencyError
|
|
23
|
-
from sqlspec.statement import SQL, ArrowResult, StatementFilter
|
|
24
|
-
from sqlspec.storage import storage_registry
|
|
25
|
-
from sqlspec.typing import ArrowTable, RowT, StatementParameters
|
|
26
|
-
from sqlspec.utils.serializers import to_json
|
|
27
|
-
from sqlspec.utils.sync_tools import async_
|
|
28
|
-
|
|
29
|
-
if TYPE_CHECKING:
|
|
30
|
-
from sqlglot.dialects.dialect import DialectType
|
|
31
|
-
|
|
32
|
-
from sqlspec.protocols import ObjectStoreProtocol
|
|
33
|
-
from sqlspec.statement import SQLResult, Statement
|
|
34
|
-
from sqlspec.statement.sql import SQLConfig
|
|
35
|
-
from sqlspec.typing import ConnectionT
|
|
36
|
-
|
|
37
|
-
__all__ = ("AsyncStorageMixin", "SyncStorageMixin")
|
|
38
|
-
|
|
39
|
-
logger = logging.getLogger(__name__)
|
|
40
|
-
|
|
41
|
-
WINDOWS_PATH_MIN_LENGTH = 3
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
class StorageMixinBase(ABC):
|
|
45
|
-
"""Base class with common storage functionality."""
|
|
46
|
-
|
|
47
|
-
__slots__ = ()
|
|
48
|
-
|
|
49
|
-
config: Any
|
|
50
|
-
_connection: Any
|
|
51
|
-
dialect: "DialectType"
|
|
52
|
-
supports_native_parquet_export: "ClassVar[bool]"
|
|
53
|
-
supports_native_parquet_import: "ClassVar[bool]"
|
|
54
|
-
|
|
55
|
-
@staticmethod
|
|
56
|
-
def _ensure_pyarrow_installed() -> None:
|
|
57
|
-
"""Ensure PyArrow is installed for Arrow operations."""
|
|
58
|
-
from sqlspec.typing import PYARROW_INSTALLED
|
|
59
|
-
|
|
60
|
-
if not PYARROW_INSTALLED:
|
|
61
|
-
msg = "pyarrow is required for Arrow operations. Install with: pip install pyarrow"
|
|
62
|
-
raise MissingDependencyError(msg)
|
|
63
|
-
|
|
64
|
-
@staticmethod
|
|
65
|
-
def _get_storage_backend(uri_or_key: "Union[str, Path]") -> "ObjectStoreProtocol":
|
|
66
|
-
"""Get storage backend by URI or key with intelligent routing."""
|
|
67
|
-
if isinstance(uri_or_key, Path):
|
|
68
|
-
return storage_registry.get(uri_or_key)
|
|
69
|
-
return storage_registry.get(str(uri_or_key))
|
|
70
|
-
|
|
71
|
-
@staticmethod
|
|
72
|
-
def _is_uri(path_or_uri: "Union[str, Path]") -> bool:
|
|
73
|
-
"""Check if input is a URI rather than a relative path."""
|
|
74
|
-
path_str = str(path_or_uri)
|
|
75
|
-
schemes = {"s3", "gs", "gcs", "az", "azure", "abfs", "abfss", "file", "http", "https"}
|
|
76
|
-
if "://" in path_str:
|
|
77
|
-
scheme = path_str.split("://", maxsplit=1)[0].lower()
|
|
78
|
-
return scheme in schemes
|
|
79
|
-
if len(path_str) >= WINDOWS_PATH_MIN_LENGTH and path_str[1:3] == ":\\":
|
|
80
|
-
return True
|
|
81
|
-
return bool(path_str.startswith("/"))
|
|
82
|
-
|
|
83
|
-
@staticmethod
|
|
84
|
-
def _detect_format(uri: "Union[str, Path]") -> str:
|
|
85
|
-
"""Detect file format from URI extension."""
|
|
86
|
-
uri_str = str(uri)
|
|
87
|
-
parsed = urlparse(uri_str)
|
|
88
|
-
path = Path(parsed.path)
|
|
89
|
-
extension = path.suffix.lower().lstrip(".")
|
|
90
|
-
|
|
91
|
-
format_map = {
|
|
92
|
-
"csv": "csv",
|
|
93
|
-
"tsv": "csv",
|
|
94
|
-
"txt": "csv",
|
|
95
|
-
"parquet": "parquet",
|
|
96
|
-
"pq": "parquet",
|
|
97
|
-
"json": "json",
|
|
98
|
-
"jsonl": "jsonl",
|
|
99
|
-
"ndjson": "jsonl",
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
return format_map.get(extension, "csv")
|
|
103
|
-
|
|
104
|
-
def _resolve_backend_and_path(self, uri: "Union[str, Path]") -> "tuple[ObjectStoreProtocol, str]":
|
|
105
|
-
"""Resolve backend and path from URI with Phase 3 URI-first routing.
|
|
106
|
-
|
|
107
|
-
Args:
|
|
108
|
-
uri: URI to resolve (e.g., "s3://bucket/path", "file:///local/path", Path object)
|
|
109
|
-
|
|
110
|
-
Returns:
|
|
111
|
-
Tuple of (backend, path) where path is relative to the backend's base path
|
|
112
|
-
"""
|
|
113
|
-
uri_str = str(uri)
|
|
114
|
-
original_path = uri_str
|
|
115
|
-
|
|
116
|
-
if self._is_uri(uri_str) and "://" not in uri_str:
|
|
117
|
-
uri_str = f"file://{uri_str}"
|
|
118
|
-
|
|
119
|
-
backend = self._get_storage_backend(uri_str)
|
|
120
|
-
|
|
121
|
-
path = uri_str[7:] if uri_str.startswith("file://") else original_path
|
|
122
|
-
|
|
123
|
-
return backend, path
|
|
124
|
-
|
|
125
|
-
@staticmethod
|
|
126
|
-
def _rows_to_arrow_table(rows: "list[RowT]", columns: "list[str]") -> ArrowTable:
|
|
127
|
-
"""Convert rows to Arrow table."""
|
|
128
|
-
import pyarrow as pa
|
|
129
|
-
|
|
130
|
-
if not rows:
|
|
131
|
-
empty_data: dict[str, list[Any]] = {col: [] for col in columns}
|
|
132
|
-
return pa.table(empty_data)
|
|
133
|
-
|
|
134
|
-
if isinstance(rows[0], dict):
|
|
135
|
-
# Dict rows
|
|
136
|
-
data = {col: [cast("dict[str, Any]", row).get(col) for row in rows] for col in columns}
|
|
137
|
-
else:
|
|
138
|
-
# Tuple/list rows
|
|
139
|
-
data = {col: [cast("tuple[Any, ...]", row)[i] for row in rows] for i, col in enumerate(columns)}
|
|
140
|
-
|
|
141
|
-
return pa.table(data)
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
class SyncStorageMixin(StorageMixinBase):
|
|
145
|
-
"""Unified storage operations for synchronous drivers."""
|
|
146
|
-
|
|
147
|
-
__slots__ = ()
|
|
148
|
-
|
|
149
|
-
def ingest_arrow_table(self, table: "ArrowTable", table_name: str, mode: str = "create", **options: Any) -> int:
|
|
150
|
-
"""Ingest an Arrow table into the database.
|
|
151
|
-
|
|
152
|
-
This public method provides a consistent entry point and can be used for
|
|
153
|
-
instrumentation, logging, etc., while delegating the actual work to the
|
|
154
|
-
driver-specific `_ingest_arrow_table` implementation.
|
|
155
|
-
"""
|
|
156
|
-
return self._ingest_arrow_table(table, table_name, mode, **options)
|
|
157
|
-
|
|
158
|
-
def _ingest_arrow_table(self, table: "ArrowTable", table_name: str, mode: str = "create", **options: Any) -> int:
|
|
159
|
-
"""Generic fallback for ingesting an Arrow table.
|
|
160
|
-
|
|
161
|
-
This implementation writes the Arrow table to a temporary Parquet file
|
|
162
|
-
and then uses the driver's generic `_bulk_load_file` capability.
|
|
163
|
-
Drivers with more efficient, native Arrow ingestion methods should override this.
|
|
164
|
-
"""
|
|
165
|
-
import pyarrow.parquet as pq
|
|
166
|
-
|
|
167
|
-
with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as tmp:
|
|
168
|
-
tmp_path = Path(tmp.name)
|
|
169
|
-
pq.write_table(table, tmp_path) # pyright: ignore
|
|
170
|
-
|
|
171
|
-
try:
|
|
172
|
-
# Use database's bulk load capabilities for Parquet
|
|
173
|
-
return self._bulk_load_file(tmp_path, table_name, "parquet", mode, **options)
|
|
174
|
-
finally:
|
|
175
|
-
tmp_path.unlink(missing_ok=True)
|
|
176
|
-
|
|
177
|
-
# ============================================================================
|
|
178
|
-
# Core Arrow Operations
|
|
179
|
-
# ============================================================================
|
|
180
|
-
|
|
181
|
-
def fetch_arrow_table(
|
|
182
|
-
self,
|
|
183
|
-
statement: "Statement",
|
|
184
|
-
/,
|
|
185
|
-
*parameters: "Union[StatementParameters, StatementFilter]",
|
|
186
|
-
_connection: "Optional[ConnectionT]" = None,
|
|
187
|
-
_config: "Optional[SQLConfig]" = None,
|
|
188
|
-
**kwargs: Any,
|
|
189
|
-
) -> "ArrowResult":
|
|
190
|
-
"""Fetch query results as Arrow table with intelligent routing.
|
|
191
|
-
|
|
192
|
-
Args:
|
|
193
|
-
statement: SQL statement (string, SQL object, or sqlglot Expression)
|
|
194
|
-
*parameters: Mixed parameters and filters
|
|
195
|
-
_connection: Optional connection override
|
|
196
|
-
_config: Optional SQL config override
|
|
197
|
-
**kwargs: Additional options
|
|
198
|
-
|
|
199
|
-
Returns:
|
|
200
|
-
ArrowResult wrapping the Arrow table
|
|
201
|
-
"""
|
|
202
|
-
self._ensure_pyarrow_installed()
|
|
203
|
-
|
|
204
|
-
filters, params = separate_filters_and_parameters(parameters)
|
|
205
|
-
# Convert to SQL object for processing
|
|
206
|
-
# Use a custom config if transformations will add parameters
|
|
207
|
-
if _config is None:
|
|
208
|
-
_config = self.config
|
|
209
|
-
|
|
210
|
-
# If no parameters provided but we have transformations enabled,
|
|
211
|
-
# disable parameter validation entirely to allow transformer-added parameters
|
|
212
|
-
if params is None and _config and _config.enable_transformations:
|
|
213
|
-
# Disable validation entirely for transformer-generated parameters
|
|
214
|
-
_config = replace(_config, strict_mode=False, enable_validation=False)
|
|
215
|
-
|
|
216
|
-
# Only pass params if it's not None to avoid adding None as a parameter
|
|
217
|
-
if params is not None:
|
|
218
|
-
sql = SQL(statement, params, *filters, config=_config, **kwargs)
|
|
219
|
-
else:
|
|
220
|
-
sql = SQL(statement, *filters, config=_config, **kwargs)
|
|
221
|
-
|
|
222
|
-
return self._fetch_arrow_table(sql, connection=_connection, **kwargs)
|
|
223
|
-
|
|
224
|
-
def _fetch_arrow_table(self, sql: SQL, connection: "Optional[ConnectionT]" = None, **kwargs: Any) -> "ArrowResult":
|
|
225
|
-
"""Generic fallback for Arrow table fetching.
|
|
226
|
-
|
|
227
|
-
This method executes a regular query and converts the results to Arrow format.
|
|
228
|
-
Drivers can call this method when they don't have native Arrow support.
|
|
229
|
-
|
|
230
|
-
Args:
|
|
231
|
-
sql: SQL object to execute
|
|
232
|
-
connection: Optional connection override
|
|
233
|
-
**kwargs: Additional options (unused in fallback)
|
|
234
|
-
|
|
235
|
-
Returns:
|
|
236
|
-
ArrowResult with converted data
|
|
237
|
-
"""
|
|
238
|
-
try:
|
|
239
|
-
result = cast("SQLResult", self.execute(sql, _connection=connection)) # type: ignore[attr-defined]
|
|
240
|
-
except Exception:
|
|
241
|
-
compiled_sql, compiled_params = sql.compile("qmark")
|
|
242
|
-
|
|
243
|
-
# Execute directly via the driver's _execute method
|
|
244
|
-
driver_result = self._execute(compiled_sql, compiled_params, sql, connection=connection) # type: ignore[attr-defined]
|
|
245
|
-
|
|
246
|
-
# Wrap the result as a SQLResult
|
|
247
|
-
if "data" in driver_result:
|
|
248
|
-
# It's a SELECT result
|
|
249
|
-
result = self._wrap_select_result(sql, driver_result) # type: ignore[attr-defined]
|
|
250
|
-
else:
|
|
251
|
-
# It's a DML result
|
|
252
|
-
result = self._wrap_execute_result(sql, driver_result) # type: ignore[attr-defined]
|
|
253
|
-
|
|
254
|
-
data = result.data or []
|
|
255
|
-
columns = result.column_names or []
|
|
256
|
-
arrow_table = self._rows_to_arrow_table(data, columns)
|
|
257
|
-
return ArrowResult(statement=sql, data=arrow_table)
|
|
258
|
-
|
|
259
|
-
# ============================================================================
|
|
260
|
-
# Storage Integration Operations
|
|
261
|
-
# ============================================================================
|
|
262
|
-
|
|
263
|
-
def export_to_storage(
|
|
264
|
-
self,
|
|
265
|
-
statement: "Statement",
|
|
266
|
-
/,
|
|
267
|
-
*parameters: "Union[StatementParameters, StatementFilter]",
|
|
268
|
-
destination_uri: "Union[str, Path]",
|
|
269
|
-
format: "Optional[str]" = None,
|
|
270
|
-
_connection: "Optional[ConnectionT]" = None,
|
|
271
|
-
_config: "Optional[SQLConfig]" = None,
|
|
272
|
-
**options: Any,
|
|
273
|
-
) -> int:
|
|
274
|
-
"""Export query results to storage with intelligent routing.
|
|
275
|
-
|
|
276
|
-
Provides instrumentation and delegates to _export_to_storage() for consistent operation.
|
|
277
|
-
|
|
278
|
-
Args:
|
|
279
|
-
statement: SQL query to execute and export
|
|
280
|
-
*parameters: Mixed parameters and filters
|
|
281
|
-
destination_uri: URI to export data to
|
|
282
|
-
format: Optional format override (auto-detected from URI if not provided)
|
|
283
|
-
_connection: Optional connection override
|
|
284
|
-
_config: Optional SQL config override
|
|
285
|
-
**options: Additional export options AND named parameters for query
|
|
286
|
-
|
|
287
|
-
Returns:
|
|
288
|
-
Number of rows exported
|
|
289
|
-
"""
|
|
290
|
-
filters, params = separate_filters_and_parameters(parameters)
|
|
291
|
-
|
|
292
|
-
# For storage operations, disable transformations that might add unwanted parameters
|
|
293
|
-
if _config is None:
|
|
294
|
-
_config = self.config
|
|
295
|
-
if _config and not _config.dialect:
|
|
296
|
-
_config = replace(_config, dialect=self.dialect)
|
|
297
|
-
if _config and _config.enable_transformations:
|
|
298
|
-
_config = replace(_config, enable_transformations=False)
|
|
299
|
-
|
|
300
|
-
sql = (
|
|
301
|
-
SQL(statement, parameters=params, config=_config) if params is not None else SQL(statement, config=_config)
|
|
302
|
-
)
|
|
303
|
-
for filter_ in filters:
|
|
304
|
-
sql = sql.filter(filter_)
|
|
305
|
-
|
|
306
|
-
return self._export_to_storage(
|
|
307
|
-
sql, destination_uri=destination_uri, format=format, _connection=_connection, **options
|
|
308
|
-
)
|
|
309
|
-
|
|
310
|
-
def _export_to_storage(
|
|
311
|
-
self,
|
|
312
|
-
sql: "SQL",
|
|
313
|
-
destination_uri: "Union[str, Path]",
|
|
314
|
-
format: "Optional[str]" = None,
|
|
315
|
-
_connection: "Optional[ConnectionT]" = None,
|
|
316
|
-
**kwargs: Any,
|
|
317
|
-
) -> int:
|
|
318
|
-
"""Protected method for sync export operation implementation."""
|
|
319
|
-
detected_format = self._detect_format(destination_uri)
|
|
320
|
-
if format:
|
|
321
|
-
file_format = format
|
|
322
|
-
elif detected_format == "csv" and not str(destination_uri).endswith((".csv", ".tsv", ".txt")):
|
|
323
|
-
# Detection returned default "csv" but file doesn't actually have CSV extension
|
|
324
|
-
file_format = "parquet"
|
|
325
|
-
else:
|
|
326
|
-
file_format = detected_format
|
|
327
|
-
|
|
328
|
-
# destination doesn't have .parquet extension, add it to ensure compatibility
|
|
329
|
-
# with pyarrow.parquet.read_table() which requires the extension
|
|
330
|
-
if file_format == "parquet" and not str(destination_uri).endswith(".parquet"):
|
|
331
|
-
destination_uri = f"{destination_uri}.parquet"
|
|
332
|
-
|
|
333
|
-
# Use storage backend - resolve AFTER modifying destination_uri
|
|
334
|
-
backend, path = self._resolve_backend_and_path(destination_uri)
|
|
335
|
-
|
|
336
|
-
# Try native database export first
|
|
337
|
-
if file_format == "parquet" and self.supports_native_parquet_export:
|
|
338
|
-
try:
|
|
339
|
-
compiled_sql, _ = sql.compile(placeholder_style="static")
|
|
340
|
-
return self._export_native(compiled_sql, destination_uri, file_format, **kwargs)
|
|
341
|
-
except NotImplementedError:
|
|
342
|
-
# Fall through to use storage backend
|
|
343
|
-
pass
|
|
344
|
-
|
|
345
|
-
if file_format == "parquet":
|
|
346
|
-
# Use Arrow for efficient transfer
|
|
347
|
-
arrow_result = self._fetch_arrow_table(sql, connection=_connection, **kwargs)
|
|
348
|
-
arrow_table = arrow_result.data
|
|
349
|
-
num_rows = arrow_table.num_rows
|
|
350
|
-
backend.write_arrow(path, arrow_table, **kwargs)
|
|
351
|
-
return num_rows
|
|
352
|
-
|
|
353
|
-
return self._export_via_backend(sql, backend, path, file_format, **kwargs)
|
|
354
|
-
|
|
355
|
-
def import_from_storage(
|
|
356
|
-
self,
|
|
357
|
-
source_uri: "Union[str, Path]",
|
|
358
|
-
table_name: str,
|
|
359
|
-
format: "Optional[str]" = None,
|
|
360
|
-
mode: str = "create",
|
|
361
|
-
**options: Any,
|
|
362
|
-
) -> int:
|
|
363
|
-
"""Import data from storage with intelligent routing.
|
|
364
|
-
|
|
365
|
-
Provides instrumentation and delegates to _import_from_storage() for consistent operation.
|
|
366
|
-
|
|
367
|
-
Args:
|
|
368
|
-
source_uri: URI to import data from
|
|
369
|
-
table_name: Target table name
|
|
370
|
-
format: Optional format override (auto-detected from URI if not provided)
|
|
371
|
-
mode: Import mode ('create', 'append', 'replace')
|
|
372
|
-
**options: Additional import options
|
|
373
|
-
|
|
374
|
-
Returns:
|
|
375
|
-
Number of rows imported
|
|
376
|
-
"""
|
|
377
|
-
return self._import_from_storage(source_uri, table_name, format, mode, **options)
|
|
378
|
-
|
|
379
|
-
def _import_from_storage(
|
|
380
|
-
self,
|
|
381
|
-
source_uri: "Union[str, Path]",
|
|
382
|
-
table_name: str,
|
|
383
|
-
format: "Optional[str]" = None,
|
|
384
|
-
mode: str = "create",
|
|
385
|
-
**options: Any,
|
|
386
|
-
) -> int:
|
|
387
|
-
"""Protected method for import operation implementation.
|
|
388
|
-
|
|
389
|
-
Args:
|
|
390
|
-
source_uri: URI to import data from
|
|
391
|
-
table_name: Target table name
|
|
392
|
-
format: Optional format override (auto-detected from URI if not provided)
|
|
393
|
-
mode: Import mode ('create', 'append', 'replace')
|
|
394
|
-
**options: Additional import options
|
|
395
|
-
|
|
396
|
-
Returns:
|
|
397
|
-
Number of rows imported
|
|
398
|
-
"""
|
|
399
|
-
# Auto-detect format if not provided
|
|
400
|
-
file_format = format or self._detect_format(source_uri)
|
|
401
|
-
|
|
402
|
-
# Try native database import first
|
|
403
|
-
if file_format == "parquet" and self.supports_native_parquet_import:
|
|
404
|
-
return self._import_native(source_uri, table_name, file_format, mode, **options)
|
|
405
|
-
|
|
406
|
-
# Use storage backend
|
|
407
|
-
backend, path = self._resolve_backend_and_path(source_uri)
|
|
408
|
-
|
|
409
|
-
if file_format == "parquet":
|
|
410
|
-
try:
|
|
411
|
-
# Use Arrow for efficient transfer
|
|
412
|
-
arrow_table = backend.read_arrow(path, **options)
|
|
413
|
-
return self.ingest_arrow_table(arrow_table, table_name, mode=mode)
|
|
414
|
-
except AttributeError:
|
|
415
|
-
# Backend doesn't support read_arrow, try alternative approach
|
|
416
|
-
try:
|
|
417
|
-
import pyarrow.parquet as pq
|
|
418
|
-
|
|
419
|
-
# Read Parquet file directly
|
|
420
|
-
with tempfile.NamedTemporaryFile(mode="wb", suffix=".parquet", delete=False) as tmp:
|
|
421
|
-
tmp.write(backend.read_bytes(path))
|
|
422
|
-
tmp_path = Path(tmp.name)
|
|
423
|
-
try:
|
|
424
|
-
arrow_table = pq.read_table(tmp_path)
|
|
425
|
-
return self.ingest_arrow_table(arrow_table, table_name, mode=mode)
|
|
426
|
-
finally:
|
|
427
|
-
tmp_path.unlink(missing_ok=True)
|
|
428
|
-
except ImportError:
|
|
429
|
-
# PyArrow not installed, cannot import Parquet
|
|
430
|
-
msg = "PyArrow is required to import Parquet files. Install with: pip install pyarrow"
|
|
431
|
-
raise ImportError(msg) from None
|
|
432
|
-
|
|
433
|
-
# Use traditional import through temporary file
|
|
434
|
-
return self._import_via_backend(backend, path, table_name, file_format, mode, **options)
|
|
435
|
-
|
|
436
|
-
# ============================================================================
|
|
437
|
-
# Database-Specific Implementation Hooks
|
|
438
|
-
# ============================================================================
|
|
439
|
-
|
|
440
|
-
def _read_parquet_native(
|
|
441
|
-
self, source_uri: "Union[str, Path]", columns: "Optional[list[str]]" = None, **options: Any
|
|
442
|
-
) -> "SQLResult":
|
|
443
|
-
"""Database-specific native Parquet reading. Override in drivers."""
|
|
444
|
-
msg = "Driver should implement _read_parquet_native"
|
|
445
|
-
raise NotImplementedError(msg)
|
|
446
|
-
|
|
447
|
-
def _write_parquet_native(
|
|
448
|
-
self, data: Union[str, ArrowTable], destination_uri: "Union[str, Path]", **options: Any
|
|
449
|
-
) -> None:
|
|
450
|
-
"""Database-specific native Parquet writing. Override in drivers."""
|
|
451
|
-
msg = "Driver should implement _write_parquet_native"
|
|
452
|
-
raise NotImplementedError(msg)
|
|
453
|
-
|
|
454
|
-
def _export_native(self, query: str, destination_uri: "Union[str, Path]", format: str, **options: Any) -> int:
|
|
455
|
-
"""Database-specific native export. Override in drivers."""
|
|
456
|
-
msg = "Driver should implement _export_native"
|
|
457
|
-
raise NotImplementedError(msg)
|
|
458
|
-
|
|
459
|
-
def _import_native(
|
|
460
|
-
self, source_uri: "Union[str, Path]", table_name: str, format: str, mode: str, **options: Any
|
|
461
|
-
) -> int:
|
|
462
|
-
"""Database-specific native import. Override in drivers."""
|
|
463
|
-
msg = "Driver should implement _import_native"
|
|
464
|
-
raise NotImplementedError(msg)
|
|
465
|
-
|
|
466
|
-
def _export_via_backend(
|
|
467
|
-
self, sql_obj: "SQL", backend: "ObjectStoreProtocol", path: str, format: str, **options: Any
|
|
468
|
-
) -> int:
|
|
469
|
-
"""Export via storage backend using temporary file."""
|
|
470
|
-
|
|
471
|
-
# Execute query and get results - use the SQL object directly
|
|
472
|
-
try:
|
|
473
|
-
result = cast("SQLResult", self.execute(sql_obj)) # type: ignore[attr-defined]
|
|
474
|
-
except Exception:
|
|
475
|
-
# Fall back to direct execution
|
|
476
|
-
compiled_sql, compiled_params = sql_obj.compile("qmark")
|
|
477
|
-
driver_result = self._execute(compiled_sql, compiled_params, sql_obj) # type: ignore[attr-defined]
|
|
478
|
-
if "data" in driver_result:
|
|
479
|
-
result = self._wrap_select_result(sql_obj, driver_result) # type: ignore[attr-defined]
|
|
480
|
-
else:
|
|
481
|
-
result = self._wrap_execute_result(sql_obj, driver_result) # type: ignore[attr-defined]
|
|
482
|
-
|
|
483
|
-
# For parquet format, convert through Arrow
|
|
484
|
-
if format == "parquet":
|
|
485
|
-
arrow_table = self._rows_to_arrow_table(result.data or [], result.column_names or [])
|
|
486
|
-
backend.write_arrow(path, arrow_table, **options)
|
|
487
|
-
return len(result.data or [])
|
|
488
|
-
|
|
489
|
-
compression = options.get("compression")
|
|
490
|
-
|
|
491
|
-
suffix = f".{format}"
|
|
492
|
-
if compression == "gzip":
|
|
493
|
-
suffix += ".gz"
|
|
494
|
-
|
|
495
|
-
with tempfile.NamedTemporaryFile(mode="w", suffix=suffix, delete=False, encoding="utf-8") as tmp:
|
|
496
|
-
tmp_path = Path(tmp.name)
|
|
497
|
-
|
|
498
|
-
if compression == "gzip":
|
|
499
|
-
import gzip
|
|
500
|
-
|
|
501
|
-
with gzip.open(tmp_path, "wt", encoding="utf-8") as file_to_write:
|
|
502
|
-
if format == "csv":
|
|
503
|
-
self._write_csv(result, file_to_write, **options)
|
|
504
|
-
elif format == "json":
|
|
505
|
-
self._write_json(result, file_to_write, **options)
|
|
506
|
-
else:
|
|
507
|
-
msg = f"Unsupported format for backend export: {format}"
|
|
508
|
-
raise ValueError(msg)
|
|
509
|
-
else:
|
|
510
|
-
with tmp_path.open("w", encoding="utf-8") as file_to_write:
|
|
511
|
-
if format == "csv":
|
|
512
|
-
self._write_csv(result, file_to_write, **options)
|
|
513
|
-
elif format == "json":
|
|
514
|
-
self._write_json(result, file_to_write, **options)
|
|
515
|
-
else:
|
|
516
|
-
msg = f"Unsupported format for backend export: {format}"
|
|
517
|
-
raise ValueError(msg)
|
|
518
|
-
|
|
519
|
-
try:
|
|
520
|
-
# Upload to storage backend
|
|
521
|
-
# Adjust path if compression was used
|
|
522
|
-
final_path = path
|
|
523
|
-
if compression == "gzip" and not path.endswith(".gz"):
|
|
524
|
-
final_path = path + ".gz"
|
|
525
|
-
|
|
526
|
-
backend.write_bytes(final_path, tmp_path.read_bytes())
|
|
527
|
-
return result.rows_affected or len(result.data or [])
|
|
528
|
-
finally:
|
|
529
|
-
tmp_path.unlink(missing_ok=True)
|
|
530
|
-
|
|
531
|
-
def _import_via_backend(
|
|
532
|
-
self, backend: "ObjectStoreProtocol", path: str, table_name: str, format: str, mode: str, **options: Any
|
|
533
|
-
) -> int:
|
|
534
|
-
"""Import via storage backend using temporary file."""
|
|
535
|
-
# Download from storage backend
|
|
536
|
-
data = backend.read_bytes(path)
|
|
537
|
-
|
|
538
|
-
with tempfile.NamedTemporaryFile(mode="wb", suffix=f".{format}", delete=False) as tmp:
|
|
539
|
-
tmp.write(data)
|
|
540
|
-
tmp_path = Path(tmp.name)
|
|
541
|
-
|
|
542
|
-
try:
|
|
543
|
-
# Use database's bulk load capabilities
|
|
544
|
-
return self._bulk_load_file(tmp_path, table_name, format, mode, **options)
|
|
545
|
-
finally:
|
|
546
|
-
tmp_path.unlink(missing_ok=True)
|
|
547
|
-
|
|
548
|
-
@staticmethod
|
|
549
|
-
def _write_csv(result: "SQLResult", file: Any, **options: Any) -> None:
|
|
550
|
-
"""Write result to CSV file."""
|
|
551
|
-
write_csv(result, file, **options)
|
|
552
|
-
|
|
553
|
-
@staticmethod
|
|
554
|
-
def _write_json(result: "SQLResult", file: Any, **options: Any) -> None:
|
|
555
|
-
"""Write result to JSON file."""
|
|
556
|
-
_ = options
|
|
557
|
-
|
|
558
|
-
if result.data and result.column_names:
|
|
559
|
-
if result.data and isinstance(result.data[0], dict):
|
|
560
|
-
# Data is already dictionaries, use as-is
|
|
561
|
-
rows = result.data
|
|
562
|
-
else:
|
|
563
|
-
rows = [dict(zip(result.column_names, row)) for row in result.data]
|
|
564
|
-
json_str = to_json(rows)
|
|
565
|
-
file.write(json_str)
|
|
566
|
-
else:
|
|
567
|
-
json_str = to_json([])
|
|
568
|
-
file.write(json_str)
|
|
569
|
-
|
|
570
|
-
def _bulk_load_file(self, file_path: Path, table_name: str, format: str, mode: str, **options: Any) -> int:
|
|
571
|
-
"""Database-specific bulk load implementation. Override in drivers."""
|
|
572
|
-
msg = "Driver should implement _bulk_load_file"
|
|
573
|
-
raise NotImplementedError(msg)
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
class AsyncStorageMixin(StorageMixinBase):
|
|
577
|
-
"""Unified storage operations for asynchronous drivers."""
|
|
578
|
-
|
|
579
|
-
__slots__ = ()
|
|
580
|
-
|
|
581
|
-
async def ingest_arrow_table(
|
|
582
|
-
self, table: "ArrowTable", table_name: str, mode: str = "create", **options: Any
|
|
583
|
-
) -> int:
|
|
584
|
-
"""Ingest an Arrow table into the database asynchronously.
|
|
585
|
-
|
|
586
|
-
This public method provides a consistent entry point and can be used for
|
|
587
|
-
instrumentation, logging, etc., while delegating the actual work to the
|
|
588
|
-
driver-specific `_ingest_arrow_table` implementation.
|
|
589
|
-
"""
|
|
590
|
-
self._ensure_pyarrow_installed()
|
|
591
|
-
return await self._ingest_arrow_table(table, table_name, mode, **options)
|
|
592
|
-
|
|
593
|
-
async def _ingest_arrow_table(
|
|
594
|
-
self, table: "ArrowTable", table_name: str, mode: str = "create", **options: Any
|
|
595
|
-
) -> int:
|
|
596
|
-
"""Generic async fallback for ingesting an Arrow table.
|
|
597
|
-
|
|
598
|
-
This implementation writes the Arrow table to a temporary Parquet file
|
|
599
|
-
and then uses the driver's generic `_bulk_load_file` capability.
|
|
600
|
-
Drivers with more efficient, native Arrow ingestion methods should override this.
|
|
601
|
-
"""
|
|
602
|
-
import pyarrow.parquet as pq
|
|
603
|
-
|
|
604
|
-
# Use an async-friendly way to handle the temporary file if possible,
|
|
605
|
-
# but for simplicity, standard tempfile is acceptable here as it's a fallback.
|
|
606
|
-
with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as tmp:
|
|
607
|
-
tmp_path = Path(tmp.name)
|
|
608
|
-
await async_(pq.write_table)(table, tmp_path) # pyright: ignore
|
|
609
|
-
|
|
610
|
-
try:
|
|
611
|
-
# Use database's async bulk load capabilities for Parquet
|
|
612
|
-
return await self._bulk_load_file(tmp_path, table_name, "parquet", mode, **options)
|
|
613
|
-
finally:
|
|
614
|
-
tmp_path.unlink(missing_ok=True)
|
|
615
|
-
|
|
616
|
-
# ============================================================================
|
|
617
|
-
# Core Arrow Operations (Async)
|
|
618
|
-
# ============================================================================
|
|
619
|
-
|
|
620
|
-
async def fetch_arrow_table(
|
|
621
|
-
self,
|
|
622
|
-
statement: "Statement",
|
|
623
|
-
/,
|
|
624
|
-
*parameters: "Union[StatementParameters, StatementFilter]",
|
|
625
|
-
_connection: "Optional[ConnectionT]" = None,
|
|
626
|
-
_config: "Optional[SQLConfig]" = None,
|
|
627
|
-
**kwargs: Any,
|
|
628
|
-
) -> "ArrowResult":
|
|
629
|
-
"""Async fetch query results as Arrow table with intelligent routing.
|
|
630
|
-
|
|
631
|
-
Args:
|
|
632
|
-
statement: SQL statement (string, SQL object, or sqlglot Expression)
|
|
633
|
-
*parameters: Mixed parameters and filters
|
|
634
|
-
_connection: Optional connection override
|
|
635
|
-
_config: Optional SQL config override
|
|
636
|
-
**kwargs: Additional options
|
|
637
|
-
|
|
638
|
-
Returns:
|
|
639
|
-
ArrowResult wrapping the Arrow table
|
|
640
|
-
"""
|
|
641
|
-
self._ensure_pyarrow_installed()
|
|
642
|
-
|
|
643
|
-
filters, params = separate_filters_and_parameters(parameters)
|
|
644
|
-
# Convert to SQL object for processing
|
|
645
|
-
# Use a custom config if transformations will add parameters
|
|
646
|
-
if _config is None:
|
|
647
|
-
_config = self.config
|
|
648
|
-
|
|
649
|
-
# If no parameters provided but we have transformations enabled,
|
|
650
|
-
# disable parameter validation entirely to allow transformer-added parameters
|
|
651
|
-
if params is None and _config and _config.enable_transformations:
|
|
652
|
-
# Disable validation entirely for transformer-generated parameters
|
|
653
|
-
_config = replace(_config, strict_mode=False, enable_validation=False)
|
|
654
|
-
|
|
655
|
-
# Only pass params if it's not None to avoid adding None as a parameter
|
|
656
|
-
if params is not None:
|
|
657
|
-
sql = SQL(statement, params, *filters, config=_config, **kwargs)
|
|
658
|
-
else:
|
|
659
|
-
sql = SQL(statement, *filters, config=_config, **kwargs)
|
|
660
|
-
|
|
661
|
-
return await self._fetch_arrow_table(sql, connection=_connection, **kwargs)
|
|
662
|
-
|
|
663
|
-
async def _fetch_arrow_table(
|
|
664
|
-
self, sql: SQL, connection: "Optional[ConnectionT]" = None, **kwargs: Any
|
|
665
|
-
) -> "ArrowResult":
|
|
666
|
-
"""Generic async fallback for Arrow table fetching.
|
|
667
|
-
|
|
668
|
-
This method executes a regular query and converts the results to Arrow format.
|
|
669
|
-
Drivers should override this method to provide native Arrow support if available.
|
|
670
|
-
If a driver has partial native support, it can call `super()._fetch_arrow_table(...)`
|
|
671
|
-
to use this fallback implementation.
|
|
672
|
-
|
|
673
|
-
Args:
|
|
674
|
-
sql: SQL object to execute
|
|
675
|
-
connection: Optional connection override
|
|
676
|
-
**kwargs: Additional options (unused in fallback)
|
|
677
|
-
|
|
678
|
-
Returns:
|
|
679
|
-
ArrowResult with converted data
|
|
680
|
-
"""
|
|
681
|
-
# Execute regular query
|
|
682
|
-
result = await self.execute(sql, _connection=connection) # type: ignore[attr-defined]
|
|
683
|
-
|
|
684
|
-
arrow_table = self._rows_to_arrow_table(result.data or [], result.column_names or [])
|
|
685
|
-
|
|
686
|
-
return ArrowResult(statement=sql, data=arrow_table)
|
|
687
|
-
|
|
688
|
-
async def export_to_storage(
|
|
689
|
-
self,
|
|
690
|
-
statement: "Statement",
|
|
691
|
-
/,
|
|
692
|
-
*parameters: "Union[StatementParameters, StatementFilter]",
|
|
693
|
-
destination_uri: "Union[str, Path]",
|
|
694
|
-
format: "Optional[str]" = None,
|
|
695
|
-
_connection: "Optional[ConnectionT]" = None,
|
|
696
|
-
_config: "Optional[SQLConfig]" = None,
|
|
697
|
-
**kwargs: Any,
|
|
698
|
-
) -> int:
|
|
699
|
-
filters, params = separate_filters_and_parameters(parameters)
|
|
700
|
-
|
|
701
|
-
# For storage operations, disable transformations that might add unwanted parameters
|
|
702
|
-
if _config is None:
|
|
703
|
-
_config = self.config
|
|
704
|
-
if _config and not _config.dialect:
|
|
705
|
-
_config = replace(_config, dialect=self.dialect)
|
|
706
|
-
if _config and _config.enable_transformations:
|
|
707
|
-
_config = replace(_config, enable_transformations=False)
|
|
708
|
-
|
|
709
|
-
sql = (
|
|
710
|
-
SQL(statement, parameters=params, config=_config) if params is not None else SQL(statement, config=_config)
|
|
711
|
-
)
|
|
712
|
-
for filter_ in filters:
|
|
713
|
-
sql = sql.filter(filter_)
|
|
714
|
-
|
|
715
|
-
return await self._export_to_storage(sql, destination_uri, format, connection=_connection, **kwargs)
|
|
716
|
-
|
|
717
|
-
async def _export_to_storage(
|
|
718
|
-
self,
|
|
719
|
-
query: "SQL",
|
|
720
|
-
destination_uri: "Union[str, Path]",
|
|
721
|
-
format: "Optional[str]" = None,
|
|
722
|
-
connection: "Optional[ConnectionT]" = None,
|
|
723
|
-
**kwargs: Any,
|
|
724
|
-
) -> int:
|
|
725
|
-
"""Protected async method for export operation implementation.
|
|
726
|
-
|
|
727
|
-
Args:
|
|
728
|
-
query: SQL query to execute and export
|
|
729
|
-
destination_uri: URI to export data to
|
|
730
|
-
format: Optional format override (auto-detected from URI if not provided)
|
|
731
|
-
connection: Optional connection override
|
|
732
|
-
**kwargs: Additional export options
|
|
733
|
-
|
|
734
|
-
Returns:
|
|
735
|
-
Number of rows exported
|
|
736
|
-
"""
|
|
737
|
-
# Auto-detect format if not provided
|
|
738
|
-
detected_format = self._detect_format(destination_uri)
|
|
739
|
-
if format:
|
|
740
|
-
file_format = format
|
|
741
|
-
elif detected_format == "csv" and not str(destination_uri).endswith((".csv", ".tsv", ".txt")):
|
|
742
|
-
# Detection returned default "csv" but file doesn't actually have CSV extension
|
|
743
|
-
file_format = "parquet"
|
|
744
|
-
else:
|
|
745
|
-
file_format = detected_format
|
|
746
|
-
|
|
747
|
-
# destination doesn't have .parquet extension, add it to ensure compatibility
|
|
748
|
-
# with pyarrow.parquet.read_table() which requires the extension
|
|
749
|
-
if file_format == "parquet" and not str(destination_uri).endswith(".parquet"):
|
|
750
|
-
destination_uri = f"{destination_uri}.parquet"
|
|
751
|
-
|
|
752
|
-
# Use storage backend - resolve AFTER modifying destination_uri
|
|
753
|
-
backend, path = self._resolve_backend_and_path(destination_uri)
|
|
754
|
-
|
|
755
|
-
# Try native database export first
|
|
756
|
-
if file_format == "parquet" and self.supports_native_parquet_export:
|
|
757
|
-
try:
|
|
758
|
-
compiled_sql, _ = query.compile(placeholder_style="static")
|
|
759
|
-
return await self._export_native(compiled_sql, destination_uri, file_format, **kwargs)
|
|
760
|
-
except NotImplementedError:
|
|
761
|
-
# Fall through to use storage backend
|
|
762
|
-
pass
|
|
763
|
-
|
|
764
|
-
if file_format == "parquet":
|
|
765
|
-
# Use Arrow for efficient transfer
|
|
766
|
-
arrow_result = await self._fetch_arrow_table(query, connection=connection, **kwargs)
|
|
767
|
-
arrow_table = arrow_result.data
|
|
768
|
-
if arrow_table is not None:
|
|
769
|
-
await backend.write_arrow_async(path, arrow_table, **kwargs)
|
|
770
|
-
return arrow_table.num_rows
|
|
771
|
-
return 0
|
|
772
|
-
|
|
773
|
-
return await self._export_via_backend(query, backend, path, file_format, **kwargs)
|
|
774
|
-
|
|
775
|
-
async def import_from_storage(
|
|
776
|
-
self,
|
|
777
|
-
source_uri: "Union[str, Path]",
|
|
778
|
-
table_name: str,
|
|
779
|
-
format: "Optional[str]" = None,
|
|
780
|
-
mode: str = "create",
|
|
781
|
-
**options: Any,
|
|
782
|
-
) -> int:
|
|
783
|
-
"""Async import data from storage with intelligent routing.
|
|
784
|
-
|
|
785
|
-
Provides instrumentation and delegates to _import_from_storage() for consistent operation.
|
|
786
|
-
|
|
787
|
-
Args:
|
|
788
|
-
source_uri: URI to import data from
|
|
789
|
-
table_name: Target table name
|
|
790
|
-
format: Optional format override (auto-detected from URI if not provided)
|
|
791
|
-
mode: Import mode ('create', 'append', 'replace')
|
|
792
|
-
**options: Additional import options
|
|
793
|
-
|
|
794
|
-
Returns:
|
|
795
|
-
Number of rows imported
|
|
796
|
-
"""
|
|
797
|
-
return await self._import_from_storage(source_uri, table_name, format, mode, **options)
|
|
798
|
-
|
|
799
|
-
async def _import_from_storage(
|
|
800
|
-
self,
|
|
801
|
-
source_uri: "Union[str, Path]",
|
|
802
|
-
table_name: str,
|
|
803
|
-
format: "Optional[str]" = None,
|
|
804
|
-
mode: str = "create",
|
|
805
|
-
**options: Any,
|
|
806
|
-
) -> int:
|
|
807
|
-
"""Protected async method for import operation implementation.
|
|
808
|
-
|
|
809
|
-
Args:
|
|
810
|
-
source_uri: URI to import data from
|
|
811
|
-
table_name: Target table name
|
|
812
|
-
format: Optional format override (auto-detected from URI if not provided)
|
|
813
|
-
mode: Import mode ('create', 'append', 'replace')
|
|
814
|
-
**options: Additional import options
|
|
815
|
-
|
|
816
|
-
Returns:
|
|
817
|
-
Number of rows imported
|
|
818
|
-
"""
|
|
819
|
-
file_format = format or self._detect_format(source_uri)
|
|
820
|
-
backend, path = self._resolve_backend_and_path(source_uri)
|
|
821
|
-
|
|
822
|
-
if file_format == "parquet":
|
|
823
|
-
arrow_table = await backend.read_arrow_async(path, **options)
|
|
824
|
-
return await self.ingest_arrow_table(arrow_table, table_name, mode=mode)
|
|
825
|
-
|
|
826
|
-
return await self._import_via_backend(backend, path, table_name, file_format, mode, **options)
|
|
827
|
-
|
|
828
|
-
# ============================================================================
|
|
829
|
-
# Async Database-Specific Implementation Hooks
|
|
830
|
-
# ============================================================================
|
|
831
|
-
|
|
832
|
-
async def _export_native(self, query: str, destination_uri: "Union[str, Path]", format: str, **options: Any) -> int:
|
|
833
|
-
"""Async database-specific native export."""
|
|
834
|
-
msg = "Driver should implement _export_native"
|
|
835
|
-
raise NotImplementedError(msg)
|
|
836
|
-
|
|
837
|
-
async def _import_native(
|
|
838
|
-
self, source_uri: "Union[str, Path]", table_name: str, format: str, mode: str, **options: Any
|
|
839
|
-
) -> int:
|
|
840
|
-
"""Async database-specific native import."""
|
|
841
|
-
msg = "Driver should implement _import_native"
|
|
842
|
-
raise NotImplementedError(msg)
|
|
843
|
-
|
|
844
|
-
async def _export_via_backend(
|
|
845
|
-
self, sql_obj: "SQL", backend: "ObjectStoreProtocol", path: str, format: str, **options: Any
|
|
846
|
-
) -> int:
|
|
847
|
-
"""Async export via storage backend."""
|
|
848
|
-
|
|
849
|
-
# Execute query and get results - use the SQL object directly
|
|
850
|
-
try:
|
|
851
|
-
result = await self.execute(sql_obj) # type: ignore[attr-defined]
|
|
852
|
-
except Exception:
|
|
853
|
-
# Fall back to direct execution
|
|
854
|
-
compiled_sql, compiled_params = sql_obj.compile("qmark")
|
|
855
|
-
driver_result = await self._execute(compiled_sql, compiled_params, sql_obj) # type: ignore[attr-defined]
|
|
856
|
-
if "data" in driver_result:
|
|
857
|
-
result = self._wrap_select_result(sql_obj, driver_result) # type: ignore[attr-defined]
|
|
858
|
-
else:
|
|
859
|
-
result = self._wrap_execute_result(sql_obj, driver_result) # type: ignore[attr-defined]
|
|
860
|
-
|
|
861
|
-
# For parquet format, convert through Arrow
|
|
862
|
-
if format == "parquet":
|
|
863
|
-
arrow_table = self._rows_to_arrow_table(result.data or [], result.column_names or [])
|
|
864
|
-
await backend.write_arrow_async(path, arrow_table, **options)
|
|
865
|
-
return len(result.data or [])
|
|
866
|
-
|
|
867
|
-
with tempfile.NamedTemporaryFile(mode="w", suffix=f".{format}", delete=False, encoding="utf-8") as tmp:
|
|
868
|
-
if format == "csv":
|
|
869
|
-
self._write_csv(result, tmp, **options)
|
|
870
|
-
elif format == "json":
|
|
871
|
-
self._write_json(result, tmp, **options)
|
|
872
|
-
else:
|
|
873
|
-
msg = f"Unsupported format for backend export: {format}"
|
|
874
|
-
raise ValueError(msg)
|
|
875
|
-
|
|
876
|
-
tmp_path = Path(tmp.name)
|
|
877
|
-
|
|
878
|
-
try:
|
|
879
|
-
# Upload to storage backend (async if supported)
|
|
880
|
-
await backend.write_bytes_async(path, tmp_path.read_bytes())
|
|
881
|
-
return result.rows_affected or len(result.data or [])
|
|
882
|
-
finally:
|
|
883
|
-
tmp_path.unlink(missing_ok=True)
|
|
884
|
-
|
|
885
|
-
async def _import_via_backend(
|
|
886
|
-
self, backend: "ObjectStoreProtocol", path: str, table_name: str, format: str, mode: str, **options: Any
|
|
887
|
-
) -> int:
|
|
888
|
-
"""Async import via storage backend."""
|
|
889
|
-
# Download from storage backend (async if supported)
|
|
890
|
-
data = await backend.read_bytes_async(path)
|
|
891
|
-
|
|
892
|
-
with tempfile.NamedTemporaryFile(mode="wb", suffix=f".{format}", delete=False) as tmp:
|
|
893
|
-
tmp.write(data)
|
|
894
|
-
tmp_path = Path(tmp.name)
|
|
895
|
-
|
|
896
|
-
try:
|
|
897
|
-
return await self._bulk_load_file(tmp_path, table_name, format, mode, **options)
|
|
898
|
-
finally:
|
|
899
|
-
tmp_path.unlink(missing_ok=True)
|
|
900
|
-
|
|
901
|
-
@staticmethod
|
|
902
|
-
def _write_csv(result: "SQLResult", file: Any, **options: Any) -> None:
|
|
903
|
-
"""Reuse sync implementation."""
|
|
904
|
-
write_csv(result, file, **options)
|
|
905
|
-
|
|
906
|
-
@staticmethod
|
|
907
|
-
def _write_json(result: "SQLResult", file: Any, **options: Any) -> None:
|
|
908
|
-
"""Reuse sync implementation."""
|
|
909
|
-
_ = options # May be used in the future for JSON formatting options
|
|
910
|
-
|
|
911
|
-
if result.data and result.column_names:
|
|
912
|
-
if result.data and isinstance(result.data[0], dict):
|
|
913
|
-
# Data is already dictionaries, use as-is
|
|
914
|
-
rows = result.data
|
|
915
|
-
else:
|
|
916
|
-
rows = [dict(zip(result.column_names, row)) for row in result.data]
|
|
917
|
-
json_str = to_json(rows)
|
|
918
|
-
file.write(json_str)
|
|
919
|
-
else:
|
|
920
|
-
json_str = to_json([])
|
|
921
|
-
file.write(json_str)
|
|
922
|
-
|
|
923
|
-
async def _bulk_load_file(self, file_path: Path, table_name: str, format: str, mode: str, **options: Any) -> int:
|
|
924
|
-
"""Async database-specific bulk load implementation."""
|
|
925
|
-
msg = "Driver should implement _bulk_load_file"
|
|
926
|
-
raise NotImplementedError(msg)
|