sqlspec 0.14.1__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

Files changed (159) hide show
  1. sqlspec/__init__.py +50 -25
  2. sqlspec/__main__.py +1 -1
  3. sqlspec/__metadata__.py +1 -3
  4. sqlspec/_serialization.py +1 -2
  5. sqlspec/_sql.py +480 -121
  6. sqlspec/_typing.py +278 -142
  7. sqlspec/adapters/adbc/__init__.py +4 -3
  8. sqlspec/adapters/adbc/_types.py +12 -0
  9. sqlspec/adapters/adbc/config.py +115 -260
  10. sqlspec/adapters/adbc/driver.py +462 -367
  11. sqlspec/adapters/aiosqlite/__init__.py +18 -3
  12. sqlspec/adapters/aiosqlite/_types.py +13 -0
  13. sqlspec/adapters/aiosqlite/config.py +199 -129
  14. sqlspec/adapters/aiosqlite/driver.py +230 -269
  15. sqlspec/adapters/asyncmy/__init__.py +18 -3
  16. sqlspec/adapters/asyncmy/_types.py +12 -0
  17. sqlspec/adapters/asyncmy/config.py +80 -168
  18. sqlspec/adapters/asyncmy/driver.py +260 -225
  19. sqlspec/adapters/asyncpg/__init__.py +19 -4
  20. sqlspec/adapters/asyncpg/_types.py +17 -0
  21. sqlspec/adapters/asyncpg/config.py +82 -181
  22. sqlspec/adapters/asyncpg/driver.py +285 -383
  23. sqlspec/adapters/bigquery/__init__.py +17 -3
  24. sqlspec/adapters/bigquery/_types.py +12 -0
  25. sqlspec/adapters/bigquery/config.py +191 -258
  26. sqlspec/adapters/bigquery/driver.py +474 -646
  27. sqlspec/adapters/duckdb/__init__.py +14 -3
  28. sqlspec/adapters/duckdb/_types.py +12 -0
  29. sqlspec/adapters/duckdb/config.py +415 -351
  30. sqlspec/adapters/duckdb/driver.py +343 -413
  31. sqlspec/adapters/oracledb/__init__.py +19 -5
  32. sqlspec/adapters/oracledb/_types.py +14 -0
  33. sqlspec/adapters/oracledb/config.py +123 -379
  34. sqlspec/adapters/oracledb/driver.py +507 -560
  35. sqlspec/adapters/psqlpy/__init__.py +13 -3
  36. sqlspec/adapters/psqlpy/_types.py +11 -0
  37. sqlspec/adapters/psqlpy/config.py +93 -254
  38. sqlspec/adapters/psqlpy/driver.py +505 -234
  39. sqlspec/adapters/psycopg/__init__.py +19 -5
  40. sqlspec/adapters/psycopg/_types.py +17 -0
  41. sqlspec/adapters/psycopg/config.py +143 -403
  42. sqlspec/adapters/psycopg/driver.py +706 -872
  43. sqlspec/adapters/sqlite/__init__.py +14 -3
  44. sqlspec/adapters/sqlite/_types.py +11 -0
  45. sqlspec/adapters/sqlite/config.py +202 -118
  46. sqlspec/adapters/sqlite/driver.py +264 -303
  47. sqlspec/base.py +105 -9
  48. sqlspec/{statement/builder → builder}/__init__.py +12 -14
  49. sqlspec/{statement/builder → builder}/_base.py +120 -55
  50. sqlspec/{statement/builder → builder}/_column.py +17 -6
  51. sqlspec/{statement/builder → builder}/_ddl.py +46 -79
  52. sqlspec/{statement/builder → builder}/_ddl_utils.py +5 -10
  53. sqlspec/{statement/builder → builder}/_delete.py +6 -25
  54. sqlspec/{statement/builder → builder}/_insert.py +18 -65
  55. sqlspec/builder/_merge.py +56 -0
  56. sqlspec/{statement/builder → builder}/_parsing_utils.py +8 -11
  57. sqlspec/{statement/builder → builder}/_select.py +11 -56
  58. sqlspec/{statement/builder → builder}/_update.py +12 -18
  59. sqlspec/{statement/builder → builder}/mixins/__init__.py +10 -14
  60. sqlspec/{statement/builder → builder}/mixins/_cte_and_set_ops.py +48 -59
  61. sqlspec/{statement/builder → builder}/mixins/_insert_operations.py +34 -18
  62. sqlspec/{statement/builder → builder}/mixins/_join_operations.py +1 -3
  63. sqlspec/{statement/builder → builder}/mixins/_merge_operations.py +19 -9
  64. sqlspec/{statement/builder → builder}/mixins/_order_limit_operations.py +3 -3
  65. sqlspec/{statement/builder → builder}/mixins/_pivot_operations.py +4 -8
  66. sqlspec/{statement/builder → builder}/mixins/_select_operations.py +25 -38
  67. sqlspec/{statement/builder → builder}/mixins/_update_operations.py +15 -16
  68. sqlspec/{statement/builder → builder}/mixins/_where_clause.py +210 -137
  69. sqlspec/cli.py +4 -5
  70. sqlspec/config.py +180 -133
  71. sqlspec/core/__init__.py +63 -0
  72. sqlspec/core/cache.py +873 -0
  73. sqlspec/core/compiler.py +396 -0
  74. sqlspec/core/filters.py +830 -0
  75. sqlspec/core/hashing.py +310 -0
  76. sqlspec/core/parameters.py +1209 -0
  77. sqlspec/core/result.py +664 -0
  78. sqlspec/{statement → core}/splitter.py +321 -191
  79. sqlspec/core/statement.py +666 -0
  80. sqlspec/driver/__init__.py +7 -10
  81. sqlspec/driver/_async.py +387 -176
  82. sqlspec/driver/_common.py +527 -289
  83. sqlspec/driver/_sync.py +390 -172
  84. sqlspec/driver/mixins/__init__.py +2 -19
  85. sqlspec/driver/mixins/_result_tools.py +164 -0
  86. sqlspec/driver/mixins/_sql_translator.py +6 -3
  87. sqlspec/exceptions.py +5 -252
  88. sqlspec/extensions/aiosql/adapter.py +93 -96
  89. sqlspec/extensions/litestar/cli.py +1 -1
  90. sqlspec/extensions/litestar/config.py +0 -1
  91. sqlspec/extensions/litestar/handlers.py +15 -26
  92. sqlspec/extensions/litestar/plugin.py +18 -16
  93. sqlspec/extensions/litestar/providers.py +17 -52
  94. sqlspec/loader.py +424 -105
  95. sqlspec/migrations/__init__.py +12 -0
  96. sqlspec/migrations/base.py +92 -68
  97. sqlspec/migrations/commands.py +24 -106
  98. sqlspec/migrations/loaders.py +402 -0
  99. sqlspec/migrations/runner.py +49 -51
  100. sqlspec/migrations/tracker.py +31 -44
  101. sqlspec/migrations/utils.py +64 -24
  102. sqlspec/protocols.py +7 -183
  103. sqlspec/storage/__init__.py +1 -1
  104. sqlspec/storage/backends/base.py +37 -40
  105. sqlspec/storage/backends/fsspec.py +136 -112
  106. sqlspec/storage/backends/obstore.py +138 -160
  107. sqlspec/storage/capabilities.py +5 -4
  108. sqlspec/storage/registry.py +57 -106
  109. sqlspec/typing.py +136 -115
  110. sqlspec/utils/__init__.py +2 -3
  111. sqlspec/utils/correlation.py +0 -3
  112. sqlspec/utils/deprecation.py +6 -6
  113. sqlspec/utils/fixtures.py +6 -6
  114. sqlspec/utils/logging.py +0 -2
  115. sqlspec/utils/module_loader.py +7 -12
  116. sqlspec/utils/singleton.py +0 -1
  117. sqlspec/utils/sync_tools.py +17 -38
  118. sqlspec/utils/text.py +12 -51
  119. sqlspec/utils/type_guards.py +443 -232
  120. {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/METADATA +7 -2
  121. sqlspec-0.16.0.dist-info/RECORD +134 -0
  122. sqlspec/adapters/adbc/transformers.py +0 -108
  123. sqlspec/driver/connection.py +0 -207
  124. sqlspec/driver/mixins/_cache.py +0 -114
  125. sqlspec/driver/mixins/_csv_writer.py +0 -91
  126. sqlspec/driver/mixins/_pipeline.py +0 -508
  127. sqlspec/driver/mixins/_query_tools.py +0 -796
  128. sqlspec/driver/mixins/_result_utils.py +0 -138
  129. sqlspec/driver/mixins/_storage.py +0 -912
  130. sqlspec/driver/mixins/_type_coercion.py +0 -128
  131. sqlspec/driver/parameters.py +0 -138
  132. sqlspec/statement/__init__.py +0 -21
  133. sqlspec/statement/builder/_merge.py +0 -95
  134. sqlspec/statement/cache.py +0 -50
  135. sqlspec/statement/filters.py +0 -625
  136. sqlspec/statement/parameters.py +0 -956
  137. sqlspec/statement/pipelines/__init__.py +0 -210
  138. sqlspec/statement/pipelines/analyzers/__init__.py +0 -9
  139. sqlspec/statement/pipelines/analyzers/_analyzer.py +0 -646
  140. sqlspec/statement/pipelines/context.py +0 -109
  141. sqlspec/statement/pipelines/transformers/__init__.py +0 -7
  142. sqlspec/statement/pipelines/transformers/_expression_simplifier.py +0 -88
  143. sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +0 -1247
  144. sqlspec/statement/pipelines/transformers/_remove_comments_and_hints.py +0 -76
  145. sqlspec/statement/pipelines/validators/__init__.py +0 -23
  146. sqlspec/statement/pipelines/validators/_dml_safety.py +0 -290
  147. sqlspec/statement/pipelines/validators/_parameter_style.py +0 -370
  148. sqlspec/statement/pipelines/validators/_performance.py +0 -714
  149. sqlspec/statement/pipelines/validators/_security.py +0 -967
  150. sqlspec/statement/result.py +0 -435
  151. sqlspec/statement/sql.py +0 -1774
  152. sqlspec/utils/cached_property.py +0 -25
  153. sqlspec/utils/statement_hashing.py +0 -203
  154. sqlspec-0.14.1.dist-info/RECORD +0 -145
  155. /sqlspec/{statement/builder → builder}/mixins/_delete_operations.py +0 -0
  156. {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/WHEEL +0 -0
  157. {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/entry_points.txt +0 -0
  158. {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/licenses/LICENSE +0 -0
  159. {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/licenses/NOTICE +0 -0
@@ -1,912 +0,0 @@
1
- """Unified storage operations for database drivers.
2
-
3
- This module provides the new simplified storage architecture that replaces
4
- the complex web of Arrow, Export, Copy, and ResultConverter mixins with
5
- just two comprehensive mixins: SyncStorageMixin and AsyncStorageMixin.
6
-
7
- These mixins provide intelligent routing between native database capabilities
8
- and storage backend operations for optimal performance.
9
- """
10
-
11
- # pyright: reportCallIssue=false, reportAttributeAccessIssue=false, reportArgumentType=false
12
- import logging
13
- import tempfile
14
- from abc import ABC
15
- from dataclasses import replace
16
- from pathlib import Path
17
- from typing import TYPE_CHECKING, Any, ClassVar, Optional, Union, cast
18
- from urllib.parse import urlparse
19
-
20
- from sqlspec.driver.mixins._csv_writer import write_csv
21
- from sqlspec.driver.parameters import separate_filters_and_parameters
22
- from sqlspec.exceptions import MissingDependencyError
23
- from sqlspec.statement import SQL, ArrowResult, StatementFilter
24
- from sqlspec.storage import storage_registry
25
- from sqlspec.typing import ArrowTable, RowT, StatementParameters
26
- from sqlspec.utils.serializers import to_json
27
- from sqlspec.utils.sync_tools import async_
28
-
29
- if TYPE_CHECKING:
30
- from sqlglot.dialects.dialect import DialectType
31
-
32
- from sqlspec.protocols import ObjectStoreProtocol
33
- from sqlspec.statement import SQLResult, Statement
34
- from sqlspec.statement.sql import SQLConfig
35
- from sqlspec.typing import ConnectionT
36
-
37
- __all__ = ("AsyncStorageMixin", "SyncStorageMixin")
38
-
39
- logger = logging.getLogger(__name__)
40
-
41
- WINDOWS_PATH_MIN_LENGTH = 3
42
-
43
-
44
- class StorageMixinBase(ABC):
45
- """Base class with common storage functionality."""
46
-
47
- config: Any
48
- _connection: Any
49
- dialect: "DialectType"
50
- supports_native_parquet_export: "ClassVar[bool]"
51
- supports_native_parquet_import: "ClassVar[bool]"
52
-
53
- @staticmethod
54
- def _ensure_pyarrow_installed() -> None:
55
- """Ensure PyArrow is installed for Arrow operations."""
56
- from sqlspec.typing import PYARROW_INSTALLED
57
-
58
- if not PYARROW_INSTALLED:
59
- msg = "pyarrow is required for Arrow operations. Install with: pip install pyarrow"
60
- raise MissingDependencyError(msg)
61
-
62
- @staticmethod
63
- def _get_storage_backend(uri_or_key: "Union[str, Path]") -> "ObjectStoreProtocol":
64
- """Get storage backend by URI or key with intelligent routing."""
65
- if isinstance(uri_or_key, Path):
66
- return storage_registry.get(uri_or_key)
67
- return storage_registry.get(str(uri_or_key))
68
-
69
- @staticmethod
70
- def _is_uri(path_or_uri: "Union[str, Path]") -> bool:
71
- """Check if input is a URI rather than a relative path."""
72
- path_str = str(path_or_uri)
73
- schemes = {"s3", "gs", "gcs", "az", "azure", "abfs", "abfss", "file", "http", "https"}
74
- if "://" in path_str:
75
- scheme = path_str.split("://", maxsplit=1)[0].lower()
76
- return scheme in schemes
77
- if len(path_str) >= WINDOWS_PATH_MIN_LENGTH and path_str[1:3] == ":\\":
78
- return True
79
- return bool(path_str.startswith("/"))
80
-
81
- @staticmethod
82
- def _detect_format(uri: "Union[str, Path]") -> str:
83
- """Detect file format from URI extension."""
84
- uri_str = str(uri)
85
- parsed = urlparse(uri_str)
86
- path = Path(parsed.path)
87
- extension = path.suffix.lower().lstrip(".")
88
-
89
- format_map = {
90
- "csv": "csv",
91
- "tsv": "csv",
92
- "txt": "csv",
93
- "parquet": "parquet",
94
- "pq": "parquet",
95
- "json": "json",
96
- "jsonl": "jsonl",
97
- "ndjson": "jsonl",
98
- }
99
-
100
- return format_map.get(extension, "csv")
101
-
102
- def _resolve_backend_and_path(self, uri: "Union[str, Path]") -> "tuple[ObjectStoreProtocol, str]":
103
- """Resolve backend and path from URI with Phase 3 URI-first routing.
104
-
105
- Args:
106
- uri: URI to resolve (e.g., "s3://bucket/path", "file:///local/path", Path object)
107
-
108
- Returns:
109
- Tuple of (backend, path) where path is relative to the backend's base path
110
- """
111
- uri_str = str(uri)
112
- original_path = uri_str
113
-
114
- if self._is_uri(uri_str) and "://" not in uri_str:
115
- uri_str = f"file://{uri_str}"
116
-
117
- backend = self._get_storage_backend(uri_str)
118
-
119
- path = uri_str[7:] if uri_str.startswith("file://") else original_path
120
-
121
- return backend, path
122
-
123
- @staticmethod
124
- def _rows_to_arrow_table(rows: "list[RowT]", columns: "list[str]") -> ArrowTable:
125
- """Convert rows to Arrow table."""
126
- import pyarrow as pa
127
-
128
- if not rows:
129
- empty_data: dict[str, list[Any]] = {col: [] for col in columns}
130
- return pa.table(empty_data)
131
-
132
- if isinstance(rows[0], dict):
133
- # Dict rows
134
- data = {col: [cast("dict[str, Any]", row).get(col) for row in rows] for col in columns}
135
- else:
136
- # Tuple/list rows
137
- data = {col: [cast("tuple[Any, ...]", row)[i] for row in rows] for i, col in enumerate(columns)}
138
-
139
- return pa.table(data)
140
-
141
-
142
- class SyncStorageMixin(StorageMixinBase):
143
- """Unified storage operations for synchronous drivers."""
144
-
145
- def ingest_arrow_table(self, table: "ArrowTable", table_name: str, mode: str = "create", **options: Any) -> int:
146
- """Ingest an Arrow table into the database.
147
-
148
- This public method provides a consistent entry point and can be used for
149
- instrumentation, logging, etc., while delegating the actual work to the
150
- driver-specific `_ingest_arrow_table` implementation.
151
- """
152
- return self._ingest_arrow_table(table, table_name, mode, **options)
153
-
154
- def _ingest_arrow_table(self, table: "ArrowTable", table_name: str, mode: str = "create", **options: Any) -> int:
155
- """Generic fallback for ingesting an Arrow table.
156
-
157
- This implementation writes the Arrow table to a temporary Parquet file
158
- and then uses the driver's generic `_bulk_load_file` capability.
159
- Drivers with more efficient, native Arrow ingestion methods should override this.
160
- """
161
- import pyarrow.parquet as pq
162
-
163
- with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as tmp:
164
- tmp_path = Path(tmp.name)
165
- pq.write_table(table, tmp_path) # pyright: ignore
166
-
167
- try:
168
- # Use database's bulk load capabilities for Parquet
169
- return self._bulk_load_file(tmp_path, table_name, "parquet", mode, **options)
170
- finally:
171
- tmp_path.unlink(missing_ok=True)
172
-
173
- # ============================================================================
174
- # Core Arrow Operations
175
- # ============================================================================
176
-
177
- def fetch_arrow_table(
178
- self,
179
- statement: "Statement",
180
- /,
181
- *parameters: "Union[StatementParameters, StatementFilter]",
182
- _connection: "Optional[ConnectionT]" = None,
183
- _config: "Optional[SQLConfig]" = None,
184
- **kwargs: Any,
185
- ) -> "ArrowResult":
186
- """Fetch query results as Arrow table with intelligent routing.
187
-
188
- Args:
189
- statement: SQL statement (string, SQL object, or sqlglot Expression)
190
- *parameters: Mixed parameters and filters
191
- _connection: Optional connection override
192
- _config: Optional SQL config override
193
- **kwargs: Additional options
194
-
195
- Returns:
196
- ArrowResult wrapping the Arrow table
197
- """
198
- self._ensure_pyarrow_installed()
199
-
200
- filters, params = separate_filters_and_parameters(parameters)
201
- # Convert to SQL object for processing
202
- # Use a custom config if transformations will add parameters
203
- if _config is None:
204
- _config = self.config
205
-
206
- # If no parameters provided but we have transformations enabled,
207
- # disable parameter validation entirely to allow transformer-added parameters
208
- if params is None and _config and _config.enable_transformations:
209
- # Disable validation entirely for transformer-generated parameters
210
- _config = replace(_config, enable_validation=False)
211
-
212
- # Only pass params if it's not None to avoid adding None as a parameter
213
- if params is not None:
214
- sql = SQL(statement, params, *filters, config=_config, **kwargs)
215
- else:
216
- sql = SQL(statement, *filters, config=_config, **kwargs)
217
-
218
- return self._fetch_arrow_table(sql, connection=_connection, **kwargs)
219
-
220
- def _fetch_arrow_table(self, sql: SQL, connection: "Optional[ConnectionT]" = None, **kwargs: Any) -> "ArrowResult":
221
- """Generic fallback for Arrow table fetching.
222
-
223
- This method executes a regular query and converts the results to Arrow format.
224
- Drivers can call this method when they don't have native Arrow support.
225
-
226
- Args:
227
- sql: SQL object to execute
228
- connection: Optional connection override
229
- **kwargs: Additional options (unused in fallback)
230
-
231
- Returns:
232
- ArrowResult with converted data
233
- """
234
- try:
235
- result = cast("SQLResult", self.execute(sql, _connection=connection)) # type: ignore[attr-defined]
236
- except Exception:
237
- compiled_sql, compiled_params = sql.compile("qmark")
238
-
239
- # Execute directly via the driver's _execute method
240
- driver_result = self._execute(compiled_sql, compiled_params, sql, connection=connection) # type: ignore[attr-defined]
241
-
242
- # Wrap the result as a SQLResult
243
- if "data" in driver_result:
244
- # It's a SELECT result
245
- result = self._wrap_select_result(sql, driver_result) # type: ignore[attr-defined]
246
- else:
247
- # It's a DML result
248
- result = self._wrap_execute_result(sql, driver_result) # type: ignore[attr-defined]
249
-
250
- data = result.data or []
251
- columns = result.column_names or []
252
- arrow_table = self._rows_to_arrow_table(data, columns)
253
- return ArrowResult(statement=sql, data=arrow_table)
254
-
255
- # ============================================================================
256
- # Storage Integration Operations
257
- # ============================================================================
258
-
259
- def export_to_storage(
260
- self,
261
- statement: "Statement",
262
- /,
263
- *parameters: "Union[StatementParameters, StatementFilter]",
264
- destination_uri: "Union[str, Path]",
265
- format: "Optional[str]" = None,
266
- _connection: "Optional[ConnectionT]" = None,
267
- _config: "Optional[SQLConfig]" = None,
268
- **options: Any,
269
- ) -> int:
270
- """Export query results to storage with intelligent routing.
271
-
272
- Provides instrumentation and delegates to _export_to_storage() for consistent operation.
273
-
274
- Args:
275
- statement: SQL query to execute and export
276
- *parameters: Mixed parameters and filters
277
- destination_uri: URI to export data to
278
- format: Optional format override (auto-detected from URI if not provided)
279
- _connection: Optional connection override
280
- _config: Optional SQL config override
281
- **options: Additional export options AND named parameters for query
282
-
283
- Returns:
284
- Number of rows exported
285
- """
286
- filters, params = separate_filters_and_parameters(parameters)
287
-
288
- # For storage operations, disable transformations that might add unwanted parameters
289
- if _config is None:
290
- _config = self.config
291
- if _config and not _config.dialect:
292
- _config = replace(_config, dialect=self.dialect)
293
-
294
- sql = SQL(statement, *params, config=_config) if params else SQL(statement, config=_config)
295
- for filter_ in filters:
296
- sql = sql.filter(filter_)
297
-
298
- return self._export_to_storage(
299
- sql, destination_uri=destination_uri, format=format, _connection=_connection, **options
300
- )
301
-
302
- def _export_to_storage(
303
- self,
304
- sql: "SQL",
305
- destination_uri: "Union[str, Path]",
306
- format: "Optional[str]" = None,
307
- _connection: "Optional[ConnectionT]" = None,
308
- **kwargs: Any,
309
- ) -> int:
310
- """Protected method for sync export operation implementation."""
311
- detected_format = self._detect_format(destination_uri)
312
- if format:
313
- file_format = format
314
- elif detected_format == "csv" and not str(destination_uri).endswith((".csv", ".tsv", ".txt")):
315
- # Detection returned default "csv" but file doesn't actually have CSV extension
316
- file_format = "parquet"
317
- else:
318
- file_format = detected_format
319
-
320
- # destination doesn't have .parquet extension, add it to ensure compatibility
321
- # with pyarrow.parquet.read_table() which requires the extension
322
- if file_format == "parquet" and not str(destination_uri).endswith(".parquet"):
323
- destination_uri = f"{destination_uri}.parquet"
324
-
325
- # Use storage backend - resolve AFTER modifying destination_uri
326
- backend, path = self._resolve_backend_and_path(destination_uri)
327
-
328
- # Try native database export first
329
- if file_format == "parquet" and self.supports_native_parquet_export:
330
- try:
331
- compiled_sql, _ = sql.compile(placeholder_style="static")
332
- return self._export_native(compiled_sql, destination_uri, file_format, **kwargs)
333
- except NotImplementedError:
334
- # Fall through to use storage backend
335
- pass
336
-
337
- if file_format == "parquet":
338
- # Use Arrow for efficient transfer
339
- arrow_result = self._fetch_arrow_table(sql, connection=_connection, **kwargs)
340
- arrow_table = arrow_result.data
341
- num_rows = arrow_table.num_rows
342
- backend.write_arrow(path, arrow_table, **kwargs)
343
- return num_rows
344
-
345
- return self._export_via_backend(sql, backend, path, file_format, **kwargs)
346
-
347
- def import_from_storage(
348
- self,
349
- source_uri: "Union[str, Path]",
350
- table_name: str,
351
- format: "Optional[str]" = None,
352
- mode: str = "create",
353
- **options: Any,
354
- ) -> int:
355
- """Import data from storage with intelligent routing.
356
-
357
- Provides instrumentation and delegates to _import_from_storage() for consistent operation.
358
-
359
- Args:
360
- source_uri: URI to import data from
361
- table_name: Target table name
362
- format: Optional format override (auto-detected from URI if not provided)
363
- mode: Import mode ('create', 'append', 'replace')
364
- **options: Additional import options
365
-
366
- Returns:
367
- Number of rows imported
368
- """
369
- return self._import_from_storage(source_uri, table_name, format, mode, **options)
370
-
371
- def _import_from_storage(
372
- self,
373
- source_uri: "Union[str, Path]",
374
- table_name: str,
375
- format: "Optional[str]" = None,
376
- mode: str = "create",
377
- **options: Any,
378
- ) -> int:
379
- """Protected method for import operation implementation.
380
-
381
- Args:
382
- source_uri: URI to import data from
383
- table_name: Target table name
384
- format: Optional format override (auto-detected from URI if not provided)
385
- mode: Import mode ('create', 'append', 'replace')
386
- **options: Additional import options
387
-
388
- Returns:
389
- Number of rows imported
390
- """
391
- # Auto-detect format if not provided
392
- file_format = format or self._detect_format(source_uri)
393
-
394
- # Try native database import first
395
- if file_format == "parquet" and self.supports_native_parquet_import:
396
- return self._import_native(source_uri, table_name, file_format, mode, **options)
397
-
398
- # Use storage backend
399
- backend, path = self._resolve_backend_and_path(source_uri)
400
-
401
- if file_format == "parquet":
402
- try:
403
- # Use Arrow for efficient transfer
404
- arrow_table = backend.read_arrow(path, **options)
405
- return self.ingest_arrow_table(arrow_table, table_name, mode=mode)
406
- except AttributeError:
407
- # Backend doesn't support read_arrow, try alternative approach
408
- try:
409
- import pyarrow.parquet as pq
410
-
411
- # Read Parquet file directly
412
- with tempfile.NamedTemporaryFile(mode="wb", suffix=".parquet", delete=False) as tmp:
413
- tmp.write(backend.read_bytes(path))
414
- tmp_path = Path(tmp.name)
415
- try:
416
- arrow_table = pq.read_table(tmp_path)
417
- return self.ingest_arrow_table(arrow_table, table_name, mode=mode)
418
- finally:
419
- tmp_path.unlink(missing_ok=True)
420
- except ImportError:
421
- # PyArrow not installed, cannot import Parquet
422
- msg = "PyArrow is required to import Parquet files. Install with: pip install pyarrow"
423
- raise ImportError(msg) from None
424
-
425
- # Use traditional import through temporary file
426
- return self._import_via_backend(backend, path, table_name, file_format, mode, **options)
427
-
428
- # ============================================================================
429
- # Database-Specific Implementation Hooks
430
- # ============================================================================
431
-
432
- def _read_parquet_native(
433
- self, source_uri: "Union[str, Path]", columns: "Optional[list[str]]" = None, **options: Any
434
- ) -> "SQLResult":
435
- """Database-specific native Parquet reading. Override in drivers."""
436
- msg = "Driver should implement _read_parquet_native"
437
- raise NotImplementedError(msg)
438
-
439
- def _write_parquet_native(
440
- self, data: Union[str, ArrowTable], destination_uri: "Union[str, Path]", **options: Any
441
- ) -> None:
442
- """Database-specific native Parquet writing. Override in drivers."""
443
- msg = "Driver should implement _write_parquet_native"
444
- raise NotImplementedError(msg)
445
-
446
- def _export_native(self, query: str, destination_uri: "Union[str, Path]", format: str, **options: Any) -> int:
447
- """Database-specific native export. Override in drivers."""
448
- msg = "Driver should implement _export_native"
449
- raise NotImplementedError(msg)
450
-
451
- def _import_native(
452
- self, source_uri: "Union[str, Path]", table_name: str, format: str, mode: str, **options: Any
453
- ) -> int:
454
- """Database-specific native import. Override in drivers."""
455
- msg = "Driver should implement _import_native"
456
- raise NotImplementedError(msg)
457
-
458
- def _export_via_backend(
459
- self, sql_obj: "SQL", backend: "ObjectStoreProtocol", path: str, format: str, **options: Any
460
- ) -> int:
461
- """Export via storage backend using temporary file."""
462
-
463
- # Execute query and get results - use the SQL object directly
464
- try:
465
- result = cast("SQLResult", self.execute(sql_obj)) # type: ignore[attr-defined]
466
- except Exception:
467
- # Fall back to direct execution
468
- compiled_sql, compiled_params = sql_obj.compile("qmark")
469
- driver_result = self._execute(compiled_sql, compiled_params, sql_obj) # type: ignore[attr-defined]
470
- if "data" in driver_result:
471
- result = self._wrap_select_result(sql_obj, driver_result) # type: ignore[attr-defined]
472
- else:
473
- result = self._wrap_execute_result(sql_obj, driver_result) # type: ignore[attr-defined]
474
-
475
- # For parquet format, convert through Arrow
476
- if format == "parquet":
477
- arrow_table = self._rows_to_arrow_table(result.data or [], result.column_names or [])
478
- backend.write_arrow(path, arrow_table, **options)
479
- return len(result.data or [])
480
-
481
- compression = options.get("compression")
482
-
483
- suffix = f".{format}"
484
- if compression == "gzip":
485
- suffix += ".gz"
486
-
487
- with tempfile.NamedTemporaryFile(mode="w", suffix=suffix, delete=False, encoding="utf-8") as tmp:
488
- tmp_path = Path(tmp.name)
489
-
490
- if compression == "gzip":
491
- import gzip
492
-
493
- with gzip.open(tmp_path, "wt", encoding="utf-8") as file_to_write:
494
- if format == "csv":
495
- self._write_csv(result, file_to_write, **options)
496
- elif format == "json":
497
- self._write_json(result, file_to_write, **options)
498
- else:
499
- msg = f"Unsupported format for backend export: {format}"
500
- raise ValueError(msg)
501
- else:
502
- with tmp_path.open("w", encoding="utf-8") as file_to_write:
503
- if format == "csv":
504
- self._write_csv(result, file_to_write, **options)
505
- elif format == "json":
506
- self._write_json(result, file_to_write, **options)
507
- else:
508
- msg = f"Unsupported format for backend export: {format}"
509
- raise ValueError(msg)
510
-
511
- try:
512
- # Upload to storage backend
513
- # Adjust path if compression was used
514
- final_path = path
515
- if compression == "gzip" and not path.endswith(".gz"):
516
- final_path = path + ".gz"
517
-
518
- backend.write_bytes(final_path, tmp_path.read_bytes())
519
- return result.rows_affected or len(result.data or [])
520
- finally:
521
- tmp_path.unlink(missing_ok=True)
522
-
523
- def _import_via_backend(
524
- self, backend: "ObjectStoreProtocol", path: str, table_name: str, format: str, mode: str, **options: Any
525
- ) -> int:
526
- """Import via storage backend using temporary file."""
527
- # Download from storage backend
528
- data = backend.read_bytes(path)
529
-
530
- with tempfile.NamedTemporaryFile(mode="wb", suffix=f".{format}", delete=False) as tmp:
531
- tmp.write(data)
532
- tmp_path = Path(tmp.name)
533
-
534
- try:
535
- # Use database's bulk load capabilities
536
- return self._bulk_load_file(tmp_path, table_name, format, mode, **options)
537
- finally:
538
- tmp_path.unlink(missing_ok=True)
539
-
540
- @staticmethod
541
- def _write_csv(result: "SQLResult", file: Any, **options: Any) -> None:
542
- """Write result to CSV file."""
543
- write_csv(result, file, **options)
544
-
545
- @staticmethod
546
- def _write_json(result: "SQLResult", file: Any, **options: Any) -> None:
547
- """Write result to JSON file."""
548
- _ = options
549
-
550
- if result.data and result.column_names:
551
- if result.data and isinstance(result.data[0], dict):
552
- # Data is already dictionaries, use as-is
553
- rows = result.data
554
- else:
555
- rows = [dict(zip(result.column_names, row)) for row in result.data]
556
- json_str = to_json(rows)
557
- file.write(json_str)
558
- else:
559
- json_str = to_json([])
560
- file.write(json_str)
561
-
562
- def _bulk_load_file(self, file_path: Path, table_name: str, format: str, mode: str, **options: Any) -> int:
563
- """Database-specific bulk load implementation. Override in drivers."""
564
- msg = "Driver should implement _bulk_load_file"
565
- raise NotImplementedError(msg)
566
-
567
-
568
- class AsyncStorageMixin(StorageMixinBase):
569
- """Unified storage operations for asynchronous drivers."""
570
-
571
- async def ingest_arrow_table(
572
- self, table: "ArrowTable", table_name: str, mode: str = "create", **options: Any
573
- ) -> int:
574
- """Ingest an Arrow table into the database asynchronously.
575
-
576
- This public method provides a consistent entry point and can be used for
577
- instrumentation, logging, etc., while delegating the actual work to the
578
- driver-specific `_ingest_arrow_table` implementation.
579
- """
580
- self._ensure_pyarrow_installed()
581
- return await self._ingest_arrow_table(table, table_name, mode, **options)
582
-
583
- async def _ingest_arrow_table(
584
- self, table: "ArrowTable", table_name: str, mode: str = "create", **options: Any
585
- ) -> int:
586
- """Generic async fallback for ingesting an Arrow table.
587
-
588
- This implementation writes the Arrow table to a temporary Parquet file
589
- and then uses the driver's generic `_bulk_load_file` capability.
590
- Drivers with more efficient, native Arrow ingestion methods should override this.
591
- """
592
- import pyarrow.parquet as pq
593
-
594
- # Use an async-friendly way to handle the temporary file if possible,
595
- # but for simplicity, standard tempfile is acceptable here as it's a fallback.
596
- with tempfile.NamedTemporaryFile(suffix=".parquet", delete=False) as tmp:
597
- tmp_path = Path(tmp.name)
598
- await async_(pq.write_table)(table, tmp_path) # pyright: ignore
599
-
600
- try:
601
- # Use database's async bulk load capabilities for Parquet
602
- return await self._bulk_load_file(tmp_path, table_name, "parquet", mode, **options)
603
- finally:
604
- tmp_path.unlink(missing_ok=True)
605
-
606
- # ============================================================================
607
- # Core Arrow Operations (Async)
608
- # ============================================================================
609
-
610
- async def fetch_arrow_table(
611
- self,
612
- statement: "Statement",
613
- /,
614
- *parameters: "Union[StatementParameters, StatementFilter]",
615
- _connection: "Optional[ConnectionT]" = None,
616
- _config: "Optional[SQLConfig]" = None,
617
- **kwargs: Any,
618
- ) -> "ArrowResult":
619
- """Async fetch query results as Arrow table with intelligent routing.
620
-
621
- Args:
622
- statement: SQL statement (string, SQL object, or sqlglot Expression)
623
- *parameters: Mixed parameters and filters
624
- _connection: Optional connection override
625
- _config: Optional SQL config override
626
- **kwargs: Additional options
627
-
628
- Returns:
629
- ArrowResult wrapping the Arrow table
630
- """
631
- self._ensure_pyarrow_installed()
632
-
633
- filters, params = separate_filters_and_parameters(parameters)
634
- # Convert to SQL object for processing
635
- # Use a custom config if transformations will add parameters
636
- if _config is None:
637
- _config = self.config
638
-
639
- # If no parameters provided but we have transformations enabled,
640
- # disable parameter validation entirely to allow transformer-added parameters
641
- if params is None and _config and _config.enable_transformations:
642
- # Disable validation entirely for transformer-generated parameters
643
- _config = replace(_config, enable_validation=False)
644
-
645
- # Only pass params if it's not None to avoid adding None as a parameter
646
- if params is not None:
647
- sql = SQL(statement, params, *filters, config=_config, **kwargs)
648
- else:
649
- sql = SQL(statement, *filters, config=_config, **kwargs)
650
-
651
- return await self._fetch_arrow_table(sql, connection=_connection, **kwargs)
652
-
653
- async def _fetch_arrow_table(
654
- self, sql: SQL, connection: "Optional[ConnectionT]" = None, **kwargs: Any
655
- ) -> "ArrowResult":
656
- """Generic async fallback for Arrow table fetching.
657
-
658
- This method executes a regular query and converts the results to Arrow format.
659
- Drivers should override this method to provide native Arrow support if available.
660
- If a driver has partial native support, it can call `super()._fetch_arrow_table(...)`
661
- to use this fallback implementation.
662
-
663
- Args:
664
- sql: SQL object to execute
665
- connection: Optional connection override
666
- **kwargs: Additional options (unused in fallback)
667
-
668
- Returns:
669
- ArrowResult with converted data
670
- """
671
- # Execute regular query
672
- result = await self.execute(sql, _connection=connection) # type: ignore[attr-defined]
673
-
674
- arrow_table = self._rows_to_arrow_table(result.data or [], result.column_names or [])
675
-
676
- return ArrowResult(statement=sql, data=arrow_table)
677
-
678
- async def export_to_storage(
679
- self,
680
- statement: "Statement",
681
- /,
682
- *parameters: "Union[StatementParameters, StatementFilter]",
683
- destination_uri: "Union[str, Path]",
684
- format: "Optional[str]" = None,
685
- _connection: "Optional[ConnectionT]" = None,
686
- _config: "Optional[SQLConfig]" = None,
687
- **kwargs: Any,
688
- ) -> int:
689
- filters, params = separate_filters_and_parameters(parameters)
690
-
691
- # For storage operations, disable transformations that might add unwanted parameters
692
- if _config is None:
693
- _config = self.config
694
- if _config and not _config.dialect:
695
- _config = replace(_config, dialect=self.dialect)
696
-
697
- sql = SQL(statement, *params, config=_config) if params else SQL(statement, config=_config)
698
- for filter_ in filters:
699
- sql = sql.filter(filter_)
700
-
701
- return await self._export_to_storage(sql, destination_uri, format, connection=_connection, **kwargs)
702
-
703
- async def _export_to_storage(
704
- self,
705
- query: "SQL",
706
- destination_uri: "Union[str, Path]",
707
- format: "Optional[str]" = None,
708
- connection: "Optional[ConnectionT]" = None,
709
- **kwargs: Any,
710
- ) -> int:
711
- """Protected async method for export operation implementation.
712
-
713
- Args:
714
- query: SQL query to execute and export
715
- destination_uri: URI to export data to
716
- format: Optional format override (auto-detected from URI if not provided)
717
- connection: Optional connection override
718
- **kwargs: Additional export options
719
-
720
- Returns:
721
- Number of rows exported
722
- """
723
- # Auto-detect format if not provided
724
- detected_format = self._detect_format(destination_uri)
725
- if format:
726
- file_format = format
727
- elif detected_format == "csv" and not str(destination_uri).endswith((".csv", ".tsv", ".txt")):
728
- # Detection returned default "csv" but file doesn't actually have CSV extension
729
- file_format = "parquet"
730
- else:
731
- file_format = detected_format
732
-
733
- # destination doesn't have .parquet extension, add it to ensure compatibility
734
- # with pyarrow.parquet.read_table() which requires the extension
735
- if file_format == "parquet" and not str(destination_uri).endswith(".parquet"):
736
- destination_uri = f"{destination_uri}.parquet"
737
-
738
- # Use storage backend - resolve AFTER modifying destination_uri
739
- backend, path = self._resolve_backend_and_path(destination_uri)
740
-
741
- # Try native database export first
742
- if file_format == "parquet" and self.supports_native_parquet_export:
743
- try:
744
- compiled_sql, _ = query.compile(placeholder_style="static")
745
- return await self._export_native(compiled_sql, destination_uri, file_format, **kwargs)
746
- except NotImplementedError:
747
- # Fall through to use storage backend
748
- pass
749
-
750
- if file_format == "parquet":
751
- # Use Arrow for efficient transfer
752
- arrow_result = await self._fetch_arrow_table(query, connection=connection, **kwargs)
753
- arrow_table = arrow_result.data
754
- if arrow_table is not None:
755
- await backend.write_arrow_async(path, arrow_table, **kwargs)
756
- return arrow_table.num_rows
757
- return 0
758
-
759
- return await self._export_via_backend(query, backend, path, file_format, **kwargs)
760
-
761
- async def import_from_storage(
762
- self,
763
- source_uri: "Union[str, Path]",
764
- table_name: str,
765
- format: "Optional[str]" = None,
766
- mode: str = "create",
767
- **options: Any,
768
- ) -> int:
769
- """Async import data from storage with intelligent routing.
770
-
771
- Provides instrumentation and delegates to _import_from_storage() for consistent operation.
772
-
773
- Args:
774
- source_uri: URI to import data from
775
- table_name: Target table name
776
- format: Optional format override (auto-detected from URI if not provided)
777
- mode: Import mode ('create', 'append', 'replace')
778
- **options: Additional import options
779
-
780
- Returns:
781
- Number of rows imported
782
- """
783
- return await self._import_from_storage(source_uri, table_name, format, mode, **options)
784
-
785
- async def _import_from_storage(
786
- self,
787
- source_uri: "Union[str, Path]",
788
- table_name: str,
789
- format: "Optional[str]" = None,
790
- mode: str = "create",
791
- **options: Any,
792
- ) -> int:
793
- """Protected async method for import operation implementation.
794
-
795
- Args:
796
- source_uri: URI to import data from
797
- table_name: Target table name
798
- format: Optional format override (auto-detected from URI if not provided)
799
- mode: Import mode ('create', 'append', 'replace')
800
- **options: Additional import options
801
-
802
- Returns:
803
- Number of rows imported
804
- """
805
- file_format = format or self._detect_format(source_uri)
806
- backend, path = self._resolve_backend_and_path(source_uri)
807
-
808
- if file_format == "parquet":
809
- arrow_table = await backend.read_arrow_async(path, **options)
810
- return await self.ingest_arrow_table(arrow_table, table_name, mode=mode)
811
-
812
- return await self._import_via_backend(backend, path, table_name, file_format, mode, **options)
813
-
814
- # ============================================================================
815
- # Async Database-Specific Implementation Hooks
816
- # ============================================================================
817
-
818
- async def _export_native(self, query: str, destination_uri: "Union[str, Path]", format: str, **options: Any) -> int:
819
- """Async database-specific native export."""
820
- msg = "Driver should implement _export_native"
821
- raise NotImplementedError(msg)
822
-
823
- async def _import_native(
824
- self, source_uri: "Union[str, Path]", table_name: str, format: str, mode: str, **options: Any
825
- ) -> int:
826
- """Async database-specific native import."""
827
- msg = "Driver should implement _import_native"
828
- raise NotImplementedError(msg)
829
-
830
- async def _export_via_backend(
831
- self, sql_obj: "SQL", backend: "ObjectStoreProtocol", path: str, format: str, **options: Any
832
- ) -> int:
833
- """Async export via storage backend."""
834
-
835
- # Execute query and get results - use the SQL object directly
836
- try:
837
- result = await self.execute(sql_obj) # type: ignore[attr-defined]
838
- except Exception:
839
- # Fall back to direct execution
840
- compiled_sql, compiled_params = sql_obj.compile("qmark")
841
- driver_result = await self._execute(compiled_sql, compiled_params, sql_obj) # type: ignore[attr-defined]
842
- if "data" in driver_result:
843
- result = self._wrap_select_result(sql_obj, driver_result) # type: ignore[attr-defined]
844
- else:
845
- result = self._wrap_execute_result(sql_obj, driver_result) # type: ignore[attr-defined]
846
-
847
- # For parquet format, convert through Arrow
848
- if format == "parquet":
849
- arrow_table = self._rows_to_arrow_table(result.data or [], result.column_names or [])
850
- await backend.write_arrow_async(path, arrow_table, **options)
851
- return len(result.data or [])
852
-
853
- with tempfile.NamedTemporaryFile(mode="w", suffix=f".{format}", delete=False, encoding="utf-8") as tmp:
854
- if format == "csv":
855
- self._write_csv(result, tmp, **options)
856
- elif format == "json":
857
- self._write_json(result, tmp, **options)
858
- else:
859
- msg = f"Unsupported format for backend export: {format}"
860
- raise ValueError(msg)
861
-
862
- tmp_path = Path(tmp.name)
863
-
864
- try:
865
- # Upload to storage backend (async if supported)
866
- await backend.write_bytes_async(path, tmp_path.read_bytes())
867
- return result.rows_affected or len(result.data or [])
868
- finally:
869
- tmp_path.unlink(missing_ok=True)
870
-
871
- async def _import_via_backend(
872
- self, backend: "ObjectStoreProtocol", path: str, table_name: str, format: str, mode: str, **options: Any
873
- ) -> int:
874
- """Async import via storage backend."""
875
- # Download from storage backend (async if supported)
876
- data = await backend.read_bytes_async(path)
877
-
878
- with tempfile.NamedTemporaryFile(mode="wb", suffix=f".{format}", delete=False) as tmp:
879
- tmp.write(data)
880
- tmp_path = Path(tmp.name)
881
-
882
- try:
883
- return await self._bulk_load_file(tmp_path, table_name, format, mode, **options)
884
- finally:
885
- tmp_path.unlink(missing_ok=True)
886
-
887
- @staticmethod
888
- def _write_csv(result: "SQLResult", file: Any, **options: Any) -> None:
889
- """Reuse sync implementation."""
890
- write_csv(result, file, **options)
891
-
892
- @staticmethod
893
- def _write_json(result: "SQLResult", file: Any, **options: Any) -> None:
894
- """Reuse sync implementation."""
895
- _ = options # May be used in the future for JSON formatting options
896
-
897
- if result.data and result.column_names:
898
- if result.data and isinstance(result.data[0], dict):
899
- # Data is already dictionaries, use as-is
900
- rows = result.data
901
- else:
902
- rows = [dict(zip(result.column_names, row)) for row in result.data]
903
- json_str = to_json(rows)
904
- file.write(json_str)
905
- else:
906
- json_str = to_json([])
907
- file.write(json_str)
908
-
909
- async def _bulk_load_file(self, file_path: Path, table_name: str, format: str, mode: str, **options: Any) -> int:
910
- """Async database-specific bulk load implementation."""
911
- msg = "Driver should implement _bulk_load_file"
912
- raise NotImplementedError(msg)