sqlspec 0.11.1__py3-none-any.whl → 0.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

Files changed (155) hide show
  1. sqlspec/__init__.py +16 -3
  2. sqlspec/_serialization.py +3 -10
  3. sqlspec/_sql.py +1147 -0
  4. sqlspec/_typing.py +343 -41
  5. sqlspec/adapters/adbc/__init__.py +2 -6
  6. sqlspec/adapters/adbc/config.py +474 -149
  7. sqlspec/adapters/adbc/driver.py +330 -621
  8. sqlspec/adapters/aiosqlite/__init__.py +2 -6
  9. sqlspec/adapters/aiosqlite/config.py +143 -57
  10. sqlspec/adapters/aiosqlite/driver.py +269 -431
  11. sqlspec/adapters/asyncmy/__init__.py +3 -8
  12. sqlspec/adapters/asyncmy/config.py +247 -202
  13. sqlspec/adapters/asyncmy/driver.py +218 -436
  14. sqlspec/adapters/asyncpg/__init__.py +4 -7
  15. sqlspec/adapters/asyncpg/config.py +329 -176
  16. sqlspec/adapters/asyncpg/driver.py +417 -487
  17. sqlspec/adapters/bigquery/__init__.py +2 -2
  18. sqlspec/adapters/bigquery/config.py +407 -0
  19. sqlspec/adapters/bigquery/driver.py +600 -553
  20. sqlspec/adapters/duckdb/__init__.py +4 -1
  21. sqlspec/adapters/duckdb/config.py +432 -321
  22. sqlspec/adapters/duckdb/driver.py +392 -406
  23. sqlspec/adapters/oracledb/__init__.py +3 -8
  24. sqlspec/adapters/oracledb/config.py +625 -0
  25. sqlspec/adapters/oracledb/driver.py +548 -921
  26. sqlspec/adapters/psqlpy/__init__.py +4 -7
  27. sqlspec/adapters/psqlpy/config.py +372 -203
  28. sqlspec/adapters/psqlpy/driver.py +197 -533
  29. sqlspec/adapters/psycopg/__init__.py +3 -8
  30. sqlspec/adapters/psycopg/config.py +725 -0
  31. sqlspec/adapters/psycopg/driver.py +734 -694
  32. sqlspec/adapters/sqlite/__init__.py +2 -6
  33. sqlspec/adapters/sqlite/config.py +146 -81
  34. sqlspec/adapters/sqlite/driver.py +242 -405
  35. sqlspec/base.py +220 -784
  36. sqlspec/config.py +354 -0
  37. sqlspec/driver/__init__.py +22 -0
  38. sqlspec/driver/_async.py +252 -0
  39. sqlspec/driver/_common.py +338 -0
  40. sqlspec/driver/_sync.py +261 -0
  41. sqlspec/driver/mixins/__init__.py +17 -0
  42. sqlspec/driver/mixins/_pipeline.py +523 -0
  43. sqlspec/driver/mixins/_result_utils.py +122 -0
  44. sqlspec/driver/mixins/_sql_translator.py +35 -0
  45. sqlspec/driver/mixins/_storage.py +993 -0
  46. sqlspec/driver/mixins/_type_coercion.py +131 -0
  47. sqlspec/exceptions.py +299 -7
  48. sqlspec/extensions/aiosql/__init__.py +10 -0
  49. sqlspec/extensions/aiosql/adapter.py +474 -0
  50. sqlspec/extensions/litestar/__init__.py +1 -6
  51. sqlspec/extensions/litestar/_utils.py +1 -5
  52. sqlspec/extensions/litestar/config.py +5 -6
  53. sqlspec/extensions/litestar/handlers.py +13 -12
  54. sqlspec/extensions/litestar/plugin.py +22 -24
  55. sqlspec/extensions/litestar/providers.py +37 -55
  56. sqlspec/loader.py +528 -0
  57. sqlspec/service/__init__.py +3 -0
  58. sqlspec/service/base.py +24 -0
  59. sqlspec/service/pagination.py +26 -0
  60. sqlspec/statement/__init__.py +21 -0
  61. sqlspec/statement/builder/__init__.py +54 -0
  62. sqlspec/statement/builder/_ddl_utils.py +119 -0
  63. sqlspec/statement/builder/_parsing_utils.py +135 -0
  64. sqlspec/statement/builder/base.py +328 -0
  65. sqlspec/statement/builder/ddl.py +1379 -0
  66. sqlspec/statement/builder/delete.py +80 -0
  67. sqlspec/statement/builder/insert.py +274 -0
  68. sqlspec/statement/builder/merge.py +95 -0
  69. sqlspec/statement/builder/mixins/__init__.py +65 -0
  70. sqlspec/statement/builder/mixins/_aggregate_functions.py +151 -0
  71. sqlspec/statement/builder/mixins/_case_builder.py +91 -0
  72. sqlspec/statement/builder/mixins/_common_table_expr.py +91 -0
  73. sqlspec/statement/builder/mixins/_delete_from.py +34 -0
  74. sqlspec/statement/builder/mixins/_from.py +61 -0
  75. sqlspec/statement/builder/mixins/_group_by.py +119 -0
  76. sqlspec/statement/builder/mixins/_having.py +35 -0
  77. sqlspec/statement/builder/mixins/_insert_from_select.py +48 -0
  78. sqlspec/statement/builder/mixins/_insert_into.py +36 -0
  79. sqlspec/statement/builder/mixins/_insert_values.py +69 -0
  80. sqlspec/statement/builder/mixins/_join.py +110 -0
  81. sqlspec/statement/builder/mixins/_limit_offset.py +53 -0
  82. sqlspec/statement/builder/mixins/_merge_clauses.py +405 -0
  83. sqlspec/statement/builder/mixins/_order_by.py +46 -0
  84. sqlspec/statement/builder/mixins/_pivot.py +82 -0
  85. sqlspec/statement/builder/mixins/_returning.py +37 -0
  86. sqlspec/statement/builder/mixins/_select_columns.py +60 -0
  87. sqlspec/statement/builder/mixins/_set_ops.py +122 -0
  88. sqlspec/statement/builder/mixins/_unpivot.py +80 -0
  89. sqlspec/statement/builder/mixins/_update_from.py +54 -0
  90. sqlspec/statement/builder/mixins/_update_set.py +91 -0
  91. sqlspec/statement/builder/mixins/_update_table.py +29 -0
  92. sqlspec/statement/builder/mixins/_where.py +374 -0
  93. sqlspec/statement/builder/mixins/_window_functions.py +86 -0
  94. sqlspec/statement/builder/protocols.py +20 -0
  95. sqlspec/statement/builder/select.py +206 -0
  96. sqlspec/statement/builder/update.py +178 -0
  97. sqlspec/statement/filters.py +571 -0
  98. sqlspec/statement/parameters.py +736 -0
  99. sqlspec/statement/pipelines/__init__.py +67 -0
  100. sqlspec/statement/pipelines/analyzers/__init__.py +9 -0
  101. sqlspec/statement/pipelines/analyzers/_analyzer.py +649 -0
  102. sqlspec/statement/pipelines/base.py +315 -0
  103. sqlspec/statement/pipelines/context.py +119 -0
  104. sqlspec/statement/pipelines/result_types.py +41 -0
  105. sqlspec/statement/pipelines/transformers/__init__.py +8 -0
  106. sqlspec/statement/pipelines/transformers/_expression_simplifier.py +256 -0
  107. sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +623 -0
  108. sqlspec/statement/pipelines/transformers/_remove_comments.py +66 -0
  109. sqlspec/statement/pipelines/transformers/_remove_hints.py +81 -0
  110. sqlspec/statement/pipelines/validators/__init__.py +23 -0
  111. sqlspec/statement/pipelines/validators/_dml_safety.py +275 -0
  112. sqlspec/statement/pipelines/validators/_parameter_style.py +297 -0
  113. sqlspec/statement/pipelines/validators/_performance.py +703 -0
  114. sqlspec/statement/pipelines/validators/_security.py +990 -0
  115. sqlspec/statement/pipelines/validators/base.py +67 -0
  116. sqlspec/statement/result.py +527 -0
  117. sqlspec/statement/splitter.py +701 -0
  118. sqlspec/statement/sql.py +1198 -0
  119. sqlspec/storage/__init__.py +15 -0
  120. sqlspec/storage/backends/__init__.py +0 -0
  121. sqlspec/storage/backends/base.py +166 -0
  122. sqlspec/storage/backends/fsspec.py +315 -0
  123. sqlspec/storage/backends/obstore.py +464 -0
  124. sqlspec/storage/protocol.py +170 -0
  125. sqlspec/storage/registry.py +315 -0
  126. sqlspec/typing.py +157 -36
  127. sqlspec/utils/correlation.py +155 -0
  128. sqlspec/utils/deprecation.py +3 -6
  129. sqlspec/utils/fixtures.py +6 -11
  130. sqlspec/utils/logging.py +135 -0
  131. sqlspec/utils/module_loader.py +45 -43
  132. sqlspec/utils/serializers.py +4 -0
  133. sqlspec/utils/singleton.py +6 -8
  134. sqlspec/utils/sync_tools.py +15 -27
  135. sqlspec/utils/text.py +58 -26
  136. {sqlspec-0.11.1.dist-info → sqlspec-0.12.1.dist-info}/METADATA +97 -26
  137. sqlspec-0.12.1.dist-info/RECORD +145 -0
  138. sqlspec/adapters/bigquery/config/__init__.py +0 -3
  139. sqlspec/adapters/bigquery/config/_common.py +0 -40
  140. sqlspec/adapters/bigquery/config/_sync.py +0 -87
  141. sqlspec/adapters/oracledb/config/__init__.py +0 -9
  142. sqlspec/adapters/oracledb/config/_asyncio.py +0 -186
  143. sqlspec/adapters/oracledb/config/_common.py +0 -131
  144. sqlspec/adapters/oracledb/config/_sync.py +0 -186
  145. sqlspec/adapters/psycopg/config/__init__.py +0 -19
  146. sqlspec/adapters/psycopg/config/_async.py +0 -169
  147. sqlspec/adapters/psycopg/config/_common.py +0 -56
  148. sqlspec/adapters/psycopg/config/_sync.py +0 -168
  149. sqlspec/filters.py +0 -331
  150. sqlspec/mixins.py +0 -305
  151. sqlspec/statement.py +0 -378
  152. sqlspec-0.11.1.dist-info/RECORD +0 -69
  153. {sqlspec-0.11.1.dist-info → sqlspec-0.12.1.dist-info}/WHEEL +0 -0
  154. {sqlspec-0.11.1.dist-info → sqlspec-0.12.1.dist-info}/licenses/LICENSE +0 -0
  155. {sqlspec-0.11.1.dist-info → sqlspec-0.12.1.dist-info}/licenses/NOTICE +0 -0
@@ -1,425 +1,411 @@
1
- import logging
1
+ import contextlib
2
+ import uuid
3
+ from collections.abc import Generator
2
4
  from contextlib import contextmanager
3
- from typing import TYPE_CHECKING, Any, Optional, Union, cast, overload
5
+ from typing import TYPE_CHECKING, Any, ClassVar, Optional, Union, cast
4
6
 
5
7
  from duckdb import DuckDBPyConnection
6
-
7
- from sqlspec.base import SyncDriverAdapterProtocol
8
- from sqlspec.filters import StatementFilter
9
- from sqlspec.mixins import ResultConverter, SQLTranslatorMixin, SyncArrowBulkOperationsMixin
10
- from sqlspec.statement import SQLStatement
11
- from sqlspec.typing import ArrowTable, StatementParameterType
8
+ from sqlglot import exp
9
+
10
+ from sqlspec.driver import SyncDriverAdapterProtocol
11
+ from sqlspec.driver.mixins import (
12
+ SQLTranslatorMixin,
13
+ SyncPipelinedExecutionMixin,
14
+ SyncStorageMixin,
15
+ ToSchemaMixin,
16
+ TypeCoercionMixin,
17
+ )
18
+ from sqlspec.statement.parameters import ParameterStyle
19
+ from sqlspec.statement.result import ArrowResult, DMLResultDict, ScriptResultDict, SelectResultDict, SQLResult
20
+ from sqlspec.statement.sql import SQL, SQLConfig
21
+ from sqlspec.typing import ArrowTable, DictRow, ModelDTOT, RowT
22
+ from sqlspec.utils.logging import get_logger
12
23
 
13
24
  if TYPE_CHECKING:
14
- from collections.abc import Generator, Mapping, Sequence
25
+ from sqlglot.dialects.dialect import DialectType
15
26
 
16
- from sqlspec.typing import ArrowTable, ModelDTOT, StatementParameterType, T
27
+ from sqlspec.typing import ArrowTable
17
28
 
18
29
  __all__ = ("DuckDBConnection", "DuckDBDriver")
19
30
 
20
- logger = logging.getLogger("sqlspec")
21
-
22
31
  DuckDBConnection = DuckDBPyConnection
23
32
 
33
+ logger = get_logger("adapters.duckdb")
34
+
24
35
 
25
36
  class DuckDBDriver(
26
- SyncArrowBulkOperationsMixin["DuckDBConnection"],
27
- SQLTranslatorMixin["DuckDBConnection"],
28
- SyncDriverAdapterProtocol["DuckDBConnection"],
29
- ResultConverter,
37
+ SyncDriverAdapterProtocol["DuckDBConnection", RowT],
38
+ SQLTranslatorMixin,
39
+ TypeCoercionMixin,
40
+ SyncStorageMixin,
41
+ SyncPipelinedExecutionMixin,
42
+ ToSchemaMixin,
30
43
  ):
31
- """DuckDB Sync Driver Adapter."""
32
-
33
- connection: "DuckDBConnection"
34
- use_cursor: bool = True
35
- dialect: str = "duckdb"
36
-
37
- def __init__(self, connection: "DuckDBConnection", use_cursor: bool = True) -> None:
38
- self.connection = connection
39
- self.use_cursor = use_cursor
40
-
41
- def _cursor(self, connection: "DuckDBConnection") -> "DuckDBConnection":
42
- if self.use_cursor:
43
- return connection.cursor()
44
- return connection
44
+ """DuckDB Sync Driver Adapter with modern architecture.
45
+
46
+ DuckDB is a fast, in-process analytical database built for modern data analysis.
47
+ This driver provides:
48
+
49
+ - High-performance columnar query execution
50
+ - Excellent Arrow integration for analytics workloads
51
+ - Direct file querying (CSV, Parquet, JSON) without imports
52
+ - Extension ecosystem for cloud storage and formats
53
+ - Zero-copy operations where possible
54
+ """
55
+
56
+ dialect: "DialectType" = "duckdb"
57
+ supported_parameter_styles: "tuple[ParameterStyle, ...]" = (ParameterStyle.QMARK, ParameterStyle.NUMERIC)
58
+ default_parameter_style: ParameterStyle = ParameterStyle.QMARK
59
+ supports_native_arrow_export: ClassVar[bool] = True
60
+ supports_native_arrow_import: ClassVar[bool] = True
61
+ supports_native_parquet_export: ClassVar[bool] = True
62
+ supports_native_parquet_import: ClassVar[bool] = True
63
+ __slots__ = ()
64
+
65
+ def __init__(
66
+ self,
67
+ connection: "DuckDBConnection",
68
+ config: "Optional[SQLConfig]" = None,
69
+ default_row_type: "type[DictRow]" = DictRow,
70
+ ) -> None:
71
+ super().__init__(connection=connection, config=config, default_row_type=default_row_type)
45
72
 
73
+ @staticmethod
46
74
  @contextmanager
47
- def _with_cursor(self, connection: "DuckDBConnection") -> "Generator[DuckDBConnection, None, None]":
48
- if self.use_cursor:
49
- cursor = self._cursor(connection)
75
+ def _get_cursor(connection: "DuckDBConnection") -> Generator["DuckDBConnection", None, None]:
76
+ cursor = connection.cursor()
77
+ try:
78
+ yield cursor
79
+ finally:
80
+ cursor.close()
81
+
82
+ def _execute_statement(
83
+ self, statement: SQL, connection: Optional["DuckDBConnection"] = None, **kwargs: Any
84
+ ) -> "Union[SelectResultDict, DMLResultDict, ScriptResultDict]":
85
+ if statement.is_script:
86
+ sql, _ = statement.compile(placeholder_style=ParameterStyle.STATIC)
87
+ return self._execute_script(sql, connection=connection, **kwargs)
88
+
89
+ if statement.is_many:
90
+ sql, params = statement.compile(placeholder_style=self.default_parameter_style)
91
+ params = self._process_parameters(params)
92
+ return self._execute_many(sql, params, connection=connection, **kwargs)
93
+
94
+ sql, params = statement.compile(placeholder_style=self.default_parameter_style)
95
+ params = self._process_parameters(params)
96
+ return self._execute(sql, params, statement, connection=connection, **kwargs)
97
+
98
+ def _execute(
99
+ self, sql: str, parameters: Any, statement: SQL, connection: Optional["DuckDBConnection"] = None, **kwargs: Any
100
+ ) -> "Union[SelectResultDict, DMLResultDict]":
101
+ conn = self._connection(connection)
102
+
103
+ if self.returns_rows(statement.expression):
104
+ result = conn.execute(sql, parameters or [])
105
+ fetched_data = result.fetchall()
106
+ column_names = [col[0] for col in result.description or []]
107
+ return {"data": fetched_data, "column_names": column_names, "rows_affected": len(fetched_data)}
108
+
109
+ with self._get_cursor(conn) as cursor:
110
+ cursor.execute(sql, parameters or [])
111
+ # DuckDB returns -1 for rowcount on DML operations
112
+ # However, fetchone() returns the actual affected row count as (count,)
113
+ rows_affected = cursor.rowcount
114
+ if rows_affected < 0:
115
+ try:
116
+ # Get actual affected row count from fetchone()
117
+ fetch_result = cursor.fetchone()
118
+ if fetch_result and isinstance(fetch_result, (tuple, list)) and len(fetch_result) > 0:
119
+ rows_affected = fetch_result[0]
120
+ else:
121
+ rows_affected = 0
122
+ except Exception:
123
+ # Fallback to 1 if fetchone fails
124
+ rows_affected = 1
125
+ return {"rows_affected": rows_affected}
126
+
127
+ def _execute_many(
128
+ self, sql: str, param_list: Any, connection: Optional["DuckDBConnection"] = None, **kwargs: Any
129
+ ) -> "DMLResultDict":
130
+ conn = self._connection(connection)
131
+ param_list = param_list or []
132
+
133
+ # DuckDB throws an error if executemany is called with empty parameter list
134
+ if not param_list:
135
+ return {"rows_affected": 0}
136
+ with self._get_cursor(conn) as cursor:
137
+ cursor.executemany(sql, param_list)
138
+ # DuckDB returns -1 for rowcount on DML operations
139
+ # For executemany, fetchone() only returns the count from the last operation,
140
+ # so use parameter list length as the most accurate estimate
141
+ rows_affected = cursor.rowcount if cursor.rowcount >= 0 else len(param_list)
142
+ return {"rows_affected": rows_affected}
143
+
144
+ def _execute_script(
145
+ self, script: str, connection: Optional["DuckDBConnection"] = None, **kwargs: Any
146
+ ) -> "ScriptResultDict":
147
+ conn = self._connection(connection)
148
+ with self._get_cursor(conn) as cursor:
149
+ cursor.execute(script)
150
+
151
+ return {
152
+ "statements_executed": -1,
153
+ "status_message": "Script executed successfully.",
154
+ "description": "The script was sent to the database.",
155
+ }
156
+
157
+ def _wrap_select_result(
158
+ self, statement: SQL, result: "SelectResultDict", schema_type: Optional[type[ModelDTOT]] = None, **kwargs: Any
159
+ ) -> Union[SQLResult[ModelDTOT], SQLResult[RowT]]:
160
+ fetched_tuples = result["data"]
161
+ column_names = result["column_names"]
162
+ rows_affected = result["rows_affected"]
163
+
164
+ rows_as_dicts: list[dict[str, Any]] = [dict(zip(column_names, row)) for row in fetched_tuples]
165
+
166
+ logger.debug("Query returned %d rows", len(rows_as_dicts))
167
+
168
+ if schema_type:
169
+ converted_data = self.to_schema(data=rows_as_dicts, schema_type=schema_type)
170
+ return SQLResult[ModelDTOT](
171
+ statement=statement,
172
+ data=list(converted_data),
173
+ column_names=column_names,
174
+ rows_affected=rows_affected,
175
+ operation_type="SELECT",
176
+ )
177
+
178
+ return SQLResult[RowT](
179
+ statement=statement,
180
+ data=rows_as_dicts,
181
+ column_names=column_names,
182
+ rows_affected=rows_affected,
183
+ operation_type="SELECT",
184
+ )
185
+
186
+ def _wrap_execute_result(
187
+ self, statement: SQL, result: "Union[DMLResultDict, ScriptResultDict]", **kwargs: Any
188
+ ) -> SQLResult[RowT]:
189
+ operation_type = "UNKNOWN"
190
+ if statement.expression:
191
+ operation_type = str(statement.expression.key).upper()
192
+
193
+ if "statements_executed" in result:
194
+ script_result = cast("ScriptResultDict", result)
195
+ return SQLResult[RowT](
196
+ statement=statement,
197
+ data=[],
198
+ rows_affected=0,
199
+ operation_type=operation_type or "SCRIPT",
200
+ metadata={"status_message": script_result.get("status_message", "")},
201
+ )
202
+
203
+ dml_result = cast("DMLResultDict", result)
204
+ rows_affected = dml_result.get("rows_affected", -1)
205
+ status_message = dml_result.get("status_message", "")
206
+ return SQLResult[RowT](
207
+ statement=statement,
208
+ data=[],
209
+ rows_affected=rows_affected,
210
+ operation_type=operation_type,
211
+ metadata={"status_message": status_message},
212
+ )
213
+
214
+ # ============================================================================
215
+ # DuckDB Native Arrow Support
216
+ # ============================================================================
217
+
218
+ def _fetch_arrow_table(self, sql: SQL, connection: "Optional[Any]" = None, **kwargs: Any) -> "ArrowResult":
219
+ """Enhanced DuckDB native Arrow table fetching with streaming support."""
220
+ conn = self._connection(connection)
221
+ sql_string, parameters = sql.compile(placeholder_style=self.default_parameter_style)
222
+ parameters = self._process_parameters(parameters)
223
+ result = conn.execute(sql_string, parameters or [])
224
+
225
+ batch_size = kwargs.get("batch_size")
226
+ if batch_size:
227
+ arrow_reader = result.fetch_record_batch(batch_size)
228
+ import pyarrow as pa
229
+
230
+ batches = list(arrow_reader)
231
+ arrow_table = pa.Table.from_batches(batches) if batches else pa.table({})
232
+ logger.debug("Fetched Arrow table (streaming) with %d rows", arrow_table.num_rows)
233
+ else:
234
+ arrow_table = result.arrow()
235
+ logger.debug("Fetched Arrow table (zero-copy) with %d rows", arrow_table.num_rows)
236
+
237
+ return ArrowResult(statement=sql, data=arrow_table)
238
+
239
+ # ============================================================================
240
+ # DuckDB Native Storage Operations (Override base implementations)
241
+ # ============================================================================
242
+
243
+ def _has_native_capability(self, operation: str, uri: str = "", format: str = "") -> bool:
244
+ if format:
245
+ format_lower = format.lower()
246
+ if operation == "export" and format_lower in {"parquet", "csv", "json"}:
247
+ return True
248
+ if operation == "import" and format_lower in {"parquet", "csv", "json"}:
249
+ return True
250
+ if operation == "read" and format_lower == "parquet":
251
+ return True
252
+ return False
253
+
254
+ def _export_native(self, query: str, destination_uri: str, format: str, **options: Any) -> int:
255
+ conn = self._connection(None)
256
+ copy_options: list[str] = []
257
+
258
+ if format.lower() == "parquet":
259
+ copy_options.append("FORMAT PARQUET")
260
+ if "compression" in options:
261
+ copy_options.append(f"COMPRESSION '{options['compression'].upper()}'")
262
+ if "row_group_size" in options:
263
+ copy_options.append(f"ROW_GROUP_SIZE {options['row_group_size']}")
264
+ if "partition_by" in options:
265
+ partition_cols = (
266
+ [options["partition_by"]] if isinstance(options["partition_by"], str) else options["partition_by"]
267
+ )
268
+ copy_options.append(f"PARTITION_BY ({', '.join(partition_cols)})")
269
+ elif format.lower() == "csv":
270
+ copy_options.extend(("FORMAT CSV", "HEADER"))
271
+ if "compression" in options:
272
+ copy_options.append(f"COMPRESSION '{options['compression'].upper()}'")
273
+ if "delimiter" in options:
274
+ copy_options.append(f"DELIMITER '{options['delimiter']}'")
275
+ if "quote" in options:
276
+ copy_options.append(f"QUOTE '{options['quote']}'")
277
+ elif format.lower() == "json":
278
+ copy_options.append("FORMAT JSON")
279
+ if "compression" in options:
280
+ copy_options.append(f"COMPRESSION '{options['compression'].upper()}'")
281
+ else:
282
+ msg = f"Unsupported format for DuckDB native export: {format}"
283
+ raise ValueError(msg)
284
+
285
+ options_str = f"({', '.join(copy_options)})" if copy_options else ""
286
+ copy_sql = f"COPY ({query}) TO '{destination_uri}' {options_str}"
287
+ result_rel = conn.execute(copy_sql)
288
+ result = result_rel.fetchone() if result_rel else None
289
+ return result[0] if result else 0
290
+
291
+ def _import_native(self, source_uri: str, table_name: str, format: str, mode: str, **options: Any) -> int:
292
+ conn = self._connection(None)
293
+ if format == "parquet":
294
+ read_func = f"read_parquet('{source_uri}')"
295
+ elif format == "csv":
296
+ read_func = f"read_csv_auto('{source_uri}')"
297
+ elif format == "json":
298
+ read_func = f"read_json_auto('{source_uri}')"
299
+ else:
300
+ msg = f"Unsupported format for DuckDB native import: {format}"
301
+ raise ValueError(msg)
302
+
303
+ if mode == "create":
304
+ sql = f"CREATE TABLE {table_name} AS SELECT * FROM {read_func}"
305
+ elif mode == "replace":
306
+ sql = f"CREATE OR REPLACE TABLE {table_name} AS SELECT * FROM {read_func}"
307
+ elif mode == "append":
308
+ sql = f"INSERT INTO {table_name} SELECT * FROM {read_func}"
309
+ else:
310
+ msg = f"Unsupported import mode: {mode}"
311
+ raise ValueError(msg)
312
+
313
+ result_rel = conn.execute(sql)
314
+ result = result_rel.fetchone() if result_rel else None
315
+ if result:
316
+ return int(result[0])
317
+
318
+ count_result_rel = conn.execute(f"SELECT COUNT(*) FROM {table_name}")
319
+ count_result = count_result_rel.fetchone() if count_result_rel else None
320
+ return int(count_result[0]) if count_result else 0
321
+
322
+ def _read_parquet_native(
323
+ self, source_uri: str, columns: Optional[list[str]] = None, **options: Any
324
+ ) -> "SQLResult[dict[str, Any]]":
325
+ conn = self._connection(None)
326
+ if isinstance(source_uri, list):
327
+ file_list = "[" + ", ".join(f"'{f}'" for f in source_uri) + "]"
328
+ read_func = f"read_parquet({file_list})"
329
+ elif "*" in source_uri or "?" in source_uri:
330
+ read_func = f"read_parquet('{source_uri}')"
331
+ else:
332
+ read_func = f"read_parquet('{source_uri}')"
333
+
334
+ column_list = ", ".join(columns) if columns else "*"
335
+ query = f"SELECT {column_list} FROM {read_func}"
336
+
337
+ filters = options.get("filters")
338
+ if filters:
339
+ where_clauses = []
340
+ for col, op, val in filters:
341
+ where_clauses.append(f"'{col}' {op} '{val}'" if isinstance(val, str) else f"'{col}' {op} {val}")
342
+ if where_clauses:
343
+ query += " WHERE " + " AND ".join(where_clauses)
344
+
345
+ arrow_table = conn.execute(query).arrow()
346
+ arrow_dict = arrow_table.to_pydict()
347
+ column_names = arrow_table.column_names
348
+ num_rows = arrow_table.num_rows
349
+
350
+ rows = [{col: arrow_dict[col][i] for col in column_names} for i in range(num_rows)]
351
+
352
+ return SQLResult[dict[str, Any]](
353
+ statement=SQL(query), data=rows, column_names=column_names, rows_affected=num_rows, operation_type="SELECT"
354
+ )
355
+
356
+ def _write_parquet_native(self, data: Union[str, "ArrowTable"], destination_uri: str, **options: Any) -> None:
357
+ conn = self._connection(None)
358
+ copy_options: list[str] = ["FORMAT PARQUET"]
359
+ if "compression" in options:
360
+ copy_options.append(f"COMPRESSION '{options['compression'].upper()}'")
361
+ if "row_group_size" in options:
362
+ copy_options.append(f"ROW_GROUP_SIZE {options['row_group_size']}")
363
+
364
+ options_str = f"({', '.join(copy_options)})"
365
+
366
+ if isinstance(data, str):
367
+ copy_sql = f"COPY ({data}) TO '{destination_uri}' {options_str}"
368
+ conn.execute(copy_sql)
369
+ else:
370
+ temp_name = f"_arrow_data_{uuid.uuid4().hex[:8]}"
371
+ conn.register(temp_name, data)
50
372
  try:
51
- yield cursor
373
+ copy_sql = f"COPY {temp_name} TO '{destination_uri}' {options_str}"
374
+ conn.execute(copy_sql)
52
375
  finally:
53
- cursor.close()
54
- else:
55
- yield connection
56
-
57
- def _process_sql_params(
58
- self,
59
- sql: str,
60
- parameters: "Optional[StatementParameterType]" = None,
61
- *filters: "StatementFilter",
62
- **kwargs: Any,
63
- ) -> "tuple[str, Optional[Union[tuple[Any, ...], list[Any], dict[str, Any]]]]":
64
- """Process SQL and parameters for DuckDB using SQLStatement.
65
-
66
- DuckDB supports both named (:name, $name) and positional (?) parameters.
67
- This method processes the SQL with dialect-aware parsing and handles
68
- parameters appropriately for DuckDB.
69
-
70
- Args:
71
- sql: SQL statement.
72
- parameters: Query parameters.
73
- *filters: Statement filters to apply.
74
- **kwargs: Additional keyword arguments.
75
-
76
- Returns:
77
- Tuple of processed SQL and parameters.
78
- """
79
- data_params_for_statement: Optional[Union[Mapping[str, Any], Sequence[Any]]] = None
80
- combined_filters_list: list[StatementFilter] = list(filters)
81
-
82
- if parameters is not None:
83
- if isinstance(parameters, StatementFilter):
84
- combined_filters_list.insert(0, parameters)
376
+ with contextlib.suppress(Exception):
377
+ conn.unregister(temp_name)
378
+
379
+ def _ingest_arrow_table(self, table: "ArrowTable", table_name: str, mode: str = "create", **options: Any) -> int:
380
+ """DuckDB-optimized Arrow table ingestion using native registration."""
381
+ self._ensure_pyarrow_installed()
382
+ conn = self._connection(None)
383
+ temp_name = f"_arrow_temp_{uuid.uuid4().hex[:8]}"
384
+
385
+ try:
386
+ conn.register(temp_name, table)
387
+
388
+ if mode == "create":
389
+ sql_expr = exp.Create(
390
+ this=exp.to_table(table_name), expression=exp.Select().from_(temp_name).select("*"), kind="TABLE"
391
+ )
392
+ elif mode == "append":
393
+ sql_expr = exp.Insert( # type: ignore[assignment]
394
+ this=exp.to_table(table_name), expression=exp.Select().from_(temp_name).select("*")
395
+ )
396
+ elif mode == "replace":
397
+ sql_expr = exp.Create(
398
+ this=exp.to_table(table_name),
399
+ expression=exp.Select().from_(temp_name).select("*"),
400
+ kind="TABLE",
401
+ replace=True,
402
+ )
85
403
  else:
86
- data_params_for_statement = parameters
87
- if data_params_for_statement is not None and not isinstance(data_params_for_statement, (list, tuple, dict)):
88
- data_params_for_statement = (data_params_for_statement,)
89
- statement = SQLStatement(sql, data_params_for_statement, kwargs=kwargs, dialect=self.dialect)
90
- for filter_obj in combined_filters_list:
91
- statement = statement.apply_filter(filter_obj)
92
-
93
- processed_sql, processed_params, _ = statement.process()
94
- if processed_params is None:
95
- return processed_sql, None
96
- if isinstance(processed_params, dict):
97
- return processed_sql, processed_params
98
- if isinstance(processed_params, (list, tuple)):
99
- return processed_sql, tuple(processed_params)
100
- return processed_sql, (processed_params,) # type: ignore[unreachable]
101
-
102
- # --- Public API Methods --- #
103
- @overload
104
- def select(
105
- self,
106
- sql: str,
107
- parameters: "Optional[StatementParameterType]" = None,
108
- *filters: "StatementFilter",
109
- connection: "Optional[DuckDBConnection]" = None,
110
- schema_type: None = None,
111
- **kwargs: Any,
112
- ) -> "Sequence[dict[str, Any]]": ...
113
- @overload
114
- def select(
115
- self,
116
- sql: str,
117
- parameters: "Optional[StatementParameterType]" = None,
118
- *filters: "StatementFilter",
119
- connection: "Optional[DuckDBConnection]" = None,
120
- schema_type: "type[ModelDTOT]",
121
- **kwargs: Any,
122
- ) -> "Sequence[ModelDTOT]": ...
123
- def select(
124
- self,
125
- sql: str,
126
- parameters: "Optional[StatementParameterType]" = None,
127
- *filters: "StatementFilter",
128
- connection: "Optional[DuckDBConnection]" = None,
129
- schema_type: "Optional[type[ModelDTOT]]" = None,
130
- **kwargs: Any,
131
- ) -> "Sequence[Union[dict[str, Any], ModelDTOT]]":
132
- """Fetch data from the database.
133
-
134
- Returns:
135
- List of row data as either model instances or dictionaries.
136
- """
137
- connection = self._connection(connection)
138
- sql, parameters = self._process_sql_params(sql, parameters, *filters, **kwargs)
139
- with self._with_cursor(connection) as cursor:
140
- cursor.execute(sql, [] if parameters is None else parameters)
141
- results = cursor.fetchall()
142
- if not results:
143
- return []
144
- column_names = [column[0] for column in cursor.description or []]
145
- return self.to_schema([dict(zip(column_names, row)) for row in results], schema_type=schema_type)
146
-
147
- @overload
148
- def select_one(
149
- self,
150
- sql: str,
151
- parameters: "Optional[StatementParameterType]" = None,
152
- *filters: "StatementFilter",
153
- connection: "Optional[DuckDBConnection]" = None,
154
- schema_type: None = None,
155
- **kwargs: Any,
156
- ) -> "dict[str, Any]": ...
157
- @overload
158
- def select_one(
159
- self,
160
- sql: str,
161
- parameters: "Optional[StatementParameterType]" = None,
162
- *filters: "StatementFilter",
163
- connection: "Optional[DuckDBConnection]" = None,
164
- schema_type: "type[ModelDTOT]",
165
- **kwargs: Any,
166
- ) -> "ModelDTOT": ...
167
- def select_one(
168
- self,
169
- sql: str,
170
- parameters: "Optional[StatementParameterType]" = None,
171
- *filters: "StatementFilter",
172
- connection: "Optional[DuckDBConnection]" = None,
173
- schema_type: "Optional[type[ModelDTOT]]" = None,
174
- **kwargs: Any,
175
- ) -> "Union[dict[str, Any], ModelDTOT]":
176
- """Fetch one row from the database.
177
-
178
- Returns:
179
- The first row of the query results.
180
- """
181
- connection = self._connection(connection)
182
- sql, parameters = self._process_sql_params(sql, parameters, *filters, **kwargs)
183
- with self._with_cursor(connection) as cursor:
184
- cursor.execute(sql, [] if parameters is None else parameters)
185
- result = cursor.fetchone()
186
- result = self.check_not_found(result)
187
- column_names = [column[0] for column in cursor.description or []]
188
- return self.to_schema(dict(zip(column_names, result)), schema_type=schema_type)
189
-
190
- @overload
191
- def select_one_or_none(
192
- self,
193
- sql: str,
194
- parameters: "Optional[StatementParameterType]" = None,
195
- *filters: "StatementFilter",
196
- connection: "Optional[DuckDBConnection]" = None,
197
- schema_type: None = None,
198
- **kwargs: Any,
199
- ) -> "Optional[dict[str, Any]]": ...
200
- @overload
201
- def select_one_or_none(
202
- self,
203
- sql: str,
204
- parameters: "Optional[StatementParameterType]" = None,
205
- *filters: "StatementFilter",
206
- connection: "Optional[DuckDBConnection]" = None,
207
- schema_type: "type[ModelDTOT]",
208
- **kwargs: Any,
209
- ) -> "Optional[ModelDTOT]": ...
210
- def select_one_or_none(
211
- self,
212
- sql: str,
213
- parameters: "Optional[StatementParameterType]" = None,
214
- *filters: "StatementFilter",
215
- connection: "Optional[DuckDBConnection]" = None,
216
- schema_type: "Optional[type[ModelDTOT]]" = None,
217
- **kwargs: Any,
218
- ) -> "Optional[Union[dict[str, Any], ModelDTOT]]":
219
- """Fetch one row from the database.
220
-
221
- Returns:
222
- The first row of the query results, or None if no results.
223
- """
224
- connection = self._connection(connection)
225
- sql, parameters = self._process_sql_params(sql, parameters, *filters, **kwargs)
226
- with self._with_cursor(connection) as cursor:
227
- cursor.execute(sql, [] if parameters is None else parameters)
228
- result = cursor.fetchone()
229
- if result is None:
230
- return None
231
- column_names = [column[0] for column in cursor.description or []]
232
- return self.to_schema(dict(zip(column_names, result)), schema_type=schema_type)
233
-
234
- @overload
235
- def select_value(
236
- self,
237
- sql: str,
238
- parameters: "Optional[StatementParameterType]" = None,
239
- *filters: "StatementFilter",
240
- connection: "Optional[DuckDBConnection]" = None,
241
- schema_type: None = None,
242
- **kwargs: Any,
243
- ) -> "Any": ...
244
- @overload
245
- def select_value(
246
- self,
247
- sql: str,
248
- parameters: "Optional[StatementParameterType]" = None,
249
- *filters: "StatementFilter",
250
- connection: "Optional[DuckDBConnection]" = None,
251
- schema_type: "type[T]",
252
- **kwargs: Any,
253
- ) -> "T": ...
254
- def select_value(
255
- self,
256
- sql: str,
257
- parameters: "Optional[StatementParameterType]" = None,
258
- *filters: "StatementFilter",
259
- connection: "Optional[DuckDBConnection]" = None,
260
- schema_type: "Optional[type[T]]" = None,
261
- **kwargs: Any,
262
- ) -> "Union[T, Any]":
263
- """Fetch a single value from the database.
264
-
265
- Returns:
266
- The first value from the first row of results.
267
- """
268
- connection = self._connection(connection)
269
- sql, parameters = self._process_sql_params(sql, parameters, *filters, **kwargs)
270
- with self._with_cursor(connection) as cursor:
271
- cursor.execute(sql, [] if parameters is None else parameters)
272
- result = cursor.fetchone()
273
- result = self.check_not_found(result)
274
- result_value = result[0]
275
- if schema_type is None:
276
- return result_value
277
- return schema_type(result_value) # type: ignore[call-arg]
278
-
279
- @overload
280
- def select_value_or_none(
281
- self,
282
- sql: str,
283
- parameters: "Optional[StatementParameterType]" = None,
284
- *filters: "StatementFilter",
285
- connection: "Optional[DuckDBConnection]" = None,
286
- schema_type: None = None,
287
- **kwargs: Any,
288
- ) -> "Optional[Any]": ...
289
- @overload
290
- def select_value_or_none(
291
- self,
292
- sql: str,
293
- parameters: "Optional[StatementParameterType]" = None,
294
- *filters: "StatementFilter",
295
- connection: "Optional[DuckDBConnection]" = None,
296
- schema_type: "type[T]",
297
- **kwargs: Any,
298
- ) -> "Optional[T]": ...
299
- def select_value_or_none(
300
- self,
301
- sql: str,
302
- parameters: "Optional[StatementParameterType]" = None,
303
- *filters: "StatementFilter",
304
- connection: "Optional[DuckDBConnection]" = None,
305
- schema_type: "Optional[type[T]]" = None,
306
- **kwargs: Any,
307
- ) -> "Optional[Union[T, Any]]":
308
- connection = self._connection(connection)
309
- sql, parameters = self._process_sql_params(sql, parameters, *filters, **kwargs)
310
- with self._with_cursor(connection) as cursor:
311
- cursor.execute(sql, [] if parameters is None else parameters)
312
- result = cursor.fetchone()
313
- if result is None:
314
- return None
315
- if schema_type is None:
316
- return result[0]
317
- return schema_type(result[0]) # type: ignore[call-arg]
318
-
319
- def insert_update_delete(
320
- self,
321
- sql: str,
322
- parameters: "Optional[StatementParameterType]" = None,
323
- *filters: "StatementFilter",
324
- connection: "Optional[DuckDBConnection]" = None,
325
- **kwargs: Any,
326
- ) -> int:
327
- connection = self._connection(connection)
328
- sql, parameters = self._process_sql_params(sql, parameters, *filters, **kwargs)
329
- with self._with_cursor(connection) as cursor:
330
- params = [] if parameters is None else parameters
331
- cursor.execute(sql, params)
332
- return getattr(cursor, "rowcount", -1)
333
-
334
- @overload
335
- def insert_update_delete_returning(
336
- self,
337
- sql: str,
338
- parameters: "Optional[StatementParameterType]" = None,
339
- *filters: "StatementFilter",
340
- connection: "Optional[DuckDBConnection]" = None,
341
- schema_type: None = None,
342
- **kwargs: Any,
343
- ) -> "dict[str, Any]": ...
344
- @overload
345
- def insert_update_delete_returning(
346
- self,
347
- sql: str,
348
- parameters: "Optional[StatementParameterType]" = None,
349
- *filters: "StatementFilter",
350
- connection: "Optional[DuckDBConnection]" = None,
351
- schema_type: "type[ModelDTOT]",
352
- **kwargs: Any,
353
- ) -> "ModelDTOT": ...
354
- def insert_update_delete_returning(
355
- self,
356
- sql: str,
357
- parameters: "Optional[StatementParameterType]" = None,
358
- *filters: "StatementFilter",
359
- connection: "Optional[DuckDBConnection]" = None,
360
- schema_type: "Optional[type[ModelDTOT]]" = None,
361
- **kwargs: Any,
362
- ) -> "Union[ModelDTOT, dict[str, Any]]":
363
- connection = self._connection(connection)
364
- sql, parameters = self._process_sql_params(sql, parameters, *filters, **kwargs)
365
- with self._with_cursor(connection) as cursor:
366
- params = [] if parameters is None else parameters
367
- cursor.execute(sql, params)
368
- result = cursor.fetchall()
369
- result = self.check_not_found(result)
370
- column_names = [col[0] for col in cursor.description or []]
371
- return self.to_schema(dict(zip(column_names, result[0])), schema_type=schema_type)
372
-
373
- def execute_script(
374
- self,
375
- sql: str,
376
- parameters: "Optional[StatementParameterType]" = None,
377
- connection: "Optional[DuckDBConnection]" = None,
378
- **kwargs: Any,
379
- ) -> str:
380
- connection = self._connection(connection)
381
- sql, parameters = self._process_sql_params(sql, parameters, **kwargs)
382
- with self._with_cursor(connection) as cursor:
383
- params = [] if parameters is None else parameters
384
- cursor.execute(sql, params)
385
- return cast("str", getattr(cursor, "statusmessage", "DONE"))
386
-
387
- # --- Arrow Bulk Operations ---
388
-
389
- def select_arrow( # pyright: ignore[reportUnknownParameterType]
390
- self,
391
- sql: str,
392
- parameters: "Optional[StatementParameterType]" = None,
393
- *filters: "StatementFilter",
394
- connection: "Optional[DuckDBConnection]" = None,
395
- **kwargs: Any,
396
- ) -> "ArrowTable":
397
- """Execute a SQL query and return results as an Apache Arrow Table.
398
-
399
- Args:
400
- sql: The SQL query string.
401
- parameters: Parameters for the query.
402
- *filters: Optional filters to apply to the SQL statement.
403
- connection: Optional connection override.
404
- **kwargs: Additional keyword arguments to merge with parameters if parameters is a dict.
405
-
406
- Returns:
407
- An Apache Arrow Table containing the query results.
408
- """
409
- connection = self._connection(connection)
410
- sql, parameters = self._process_sql_params(sql, parameters, *filters, **kwargs)
411
- with self._with_cursor(connection) as cursor:
412
- params = [] if parameters is None else parameters
413
- cursor.execute(sql, params)
414
- return cast("ArrowTable", cursor.fetch_arrow_table())
415
-
416
- def _connection(self, connection: "Optional[DuckDBConnection]" = None) -> "DuckDBConnection":
417
- """Get the connection to use for the operation.
418
-
419
- Args:
420
- connection: Optional connection to use.
421
-
422
- Returns:
423
- The connection to use.
424
- """
425
- return connection or self.connection
404
+ msg = f"Unsupported mode: {mode}"
405
+ raise ValueError(msg)
406
+
407
+ result = self.execute(SQL(sql_expr.sql(dialect=self.dialect)))
408
+ return result.rows_affected or table.num_rows
409
+ finally:
410
+ with contextlib.suppress(Exception):
411
+ conn.unregister(temp_name)