sqlspec 0.13.1__py3-none-any.whl → 0.16.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

Files changed (185) hide show
  1. sqlspec/__init__.py +71 -8
  2. sqlspec/__main__.py +12 -0
  3. sqlspec/__metadata__.py +1 -3
  4. sqlspec/_serialization.py +1 -2
  5. sqlspec/_sql.py +930 -136
  6. sqlspec/_typing.py +278 -142
  7. sqlspec/adapters/adbc/__init__.py +4 -3
  8. sqlspec/adapters/adbc/_types.py +12 -0
  9. sqlspec/adapters/adbc/config.py +116 -285
  10. sqlspec/adapters/adbc/driver.py +462 -340
  11. sqlspec/adapters/aiosqlite/__init__.py +18 -3
  12. sqlspec/adapters/aiosqlite/_types.py +13 -0
  13. sqlspec/adapters/aiosqlite/config.py +202 -150
  14. sqlspec/adapters/aiosqlite/driver.py +226 -247
  15. sqlspec/adapters/asyncmy/__init__.py +18 -3
  16. sqlspec/adapters/asyncmy/_types.py +12 -0
  17. sqlspec/adapters/asyncmy/config.py +80 -199
  18. sqlspec/adapters/asyncmy/driver.py +257 -215
  19. sqlspec/adapters/asyncpg/__init__.py +19 -4
  20. sqlspec/adapters/asyncpg/_types.py +17 -0
  21. sqlspec/adapters/asyncpg/config.py +81 -214
  22. sqlspec/adapters/asyncpg/driver.py +284 -359
  23. sqlspec/adapters/bigquery/__init__.py +17 -3
  24. sqlspec/adapters/bigquery/_types.py +12 -0
  25. sqlspec/adapters/bigquery/config.py +191 -299
  26. sqlspec/adapters/bigquery/driver.py +474 -634
  27. sqlspec/adapters/duckdb/__init__.py +14 -3
  28. sqlspec/adapters/duckdb/_types.py +12 -0
  29. sqlspec/adapters/duckdb/config.py +414 -397
  30. sqlspec/adapters/duckdb/driver.py +342 -393
  31. sqlspec/adapters/oracledb/__init__.py +19 -5
  32. sqlspec/adapters/oracledb/_types.py +14 -0
  33. sqlspec/adapters/oracledb/config.py +123 -458
  34. sqlspec/adapters/oracledb/driver.py +505 -531
  35. sqlspec/adapters/psqlpy/__init__.py +13 -3
  36. sqlspec/adapters/psqlpy/_types.py +11 -0
  37. sqlspec/adapters/psqlpy/config.py +93 -307
  38. sqlspec/adapters/psqlpy/driver.py +504 -213
  39. sqlspec/adapters/psycopg/__init__.py +19 -5
  40. sqlspec/adapters/psycopg/_types.py +17 -0
  41. sqlspec/adapters/psycopg/config.py +143 -472
  42. sqlspec/adapters/psycopg/driver.py +704 -825
  43. sqlspec/adapters/sqlite/__init__.py +14 -3
  44. sqlspec/adapters/sqlite/_types.py +11 -0
  45. sqlspec/adapters/sqlite/config.py +208 -142
  46. sqlspec/adapters/sqlite/driver.py +263 -278
  47. sqlspec/base.py +105 -9
  48. sqlspec/{statement/builder → builder}/__init__.py +12 -14
  49. sqlspec/{statement/builder/base.py → builder/_base.py} +184 -86
  50. sqlspec/{statement/builder/column.py → builder/_column.py} +97 -60
  51. sqlspec/{statement/builder/ddl.py → builder/_ddl.py} +61 -131
  52. sqlspec/{statement/builder → builder}/_ddl_utils.py +4 -10
  53. sqlspec/{statement/builder/delete.py → builder/_delete.py} +10 -30
  54. sqlspec/builder/_insert.py +421 -0
  55. sqlspec/builder/_merge.py +71 -0
  56. sqlspec/{statement/builder → builder}/_parsing_utils.py +49 -26
  57. sqlspec/builder/_select.py +170 -0
  58. sqlspec/{statement/builder/update.py → builder/_update.py} +16 -20
  59. sqlspec/builder/mixins/__init__.py +55 -0
  60. sqlspec/builder/mixins/_cte_and_set_ops.py +222 -0
  61. sqlspec/{statement/builder/mixins/_delete_from.py → builder/mixins/_delete_operations.py} +8 -1
  62. sqlspec/builder/mixins/_insert_operations.py +244 -0
  63. sqlspec/{statement/builder/mixins/_join.py → builder/mixins/_join_operations.py} +45 -13
  64. sqlspec/{statement/builder/mixins/_merge_clauses.py → builder/mixins/_merge_operations.py} +188 -30
  65. sqlspec/builder/mixins/_order_limit_operations.py +135 -0
  66. sqlspec/builder/mixins/_pivot_operations.py +153 -0
  67. sqlspec/builder/mixins/_select_operations.py +604 -0
  68. sqlspec/builder/mixins/_update_operations.py +202 -0
  69. sqlspec/builder/mixins/_where_clause.py +644 -0
  70. sqlspec/cli.py +247 -0
  71. sqlspec/config.py +183 -138
  72. sqlspec/core/__init__.py +63 -0
  73. sqlspec/core/cache.py +871 -0
  74. sqlspec/core/compiler.py +417 -0
  75. sqlspec/core/filters.py +830 -0
  76. sqlspec/core/hashing.py +310 -0
  77. sqlspec/core/parameters.py +1237 -0
  78. sqlspec/core/result.py +677 -0
  79. sqlspec/{statement → core}/splitter.py +321 -191
  80. sqlspec/core/statement.py +676 -0
  81. sqlspec/driver/__init__.py +7 -10
  82. sqlspec/driver/_async.py +422 -163
  83. sqlspec/driver/_common.py +545 -287
  84. sqlspec/driver/_sync.py +426 -160
  85. sqlspec/driver/mixins/__init__.py +2 -13
  86. sqlspec/driver/mixins/_result_tools.py +193 -0
  87. sqlspec/driver/mixins/_sql_translator.py +65 -14
  88. sqlspec/exceptions.py +5 -252
  89. sqlspec/extensions/aiosql/adapter.py +93 -96
  90. sqlspec/extensions/litestar/__init__.py +2 -1
  91. sqlspec/extensions/litestar/cli.py +48 -0
  92. sqlspec/extensions/litestar/config.py +0 -1
  93. sqlspec/extensions/litestar/handlers.py +15 -26
  94. sqlspec/extensions/litestar/plugin.py +21 -16
  95. sqlspec/extensions/litestar/providers.py +17 -52
  96. sqlspec/loader.py +423 -104
  97. sqlspec/migrations/__init__.py +35 -0
  98. sqlspec/migrations/base.py +414 -0
  99. sqlspec/migrations/commands.py +443 -0
  100. sqlspec/migrations/loaders.py +402 -0
  101. sqlspec/migrations/runner.py +213 -0
  102. sqlspec/migrations/tracker.py +140 -0
  103. sqlspec/migrations/utils.py +129 -0
  104. sqlspec/protocols.py +51 -186
  105. sqlspec/storage/__init__.py +1 -1
  106. sqlspec/storage/backends/base.py +37 -40
  107. sqlspec/storage/backends/fsspec.py +136 -112
  108. sqlspec/storage/backends/obstore.py +138 -160
  109. sqlspec/storage/capabilities.py +5 -4
  110. sqlspec/storage/registry.py +57 -106
  111. sqlspec/typing.py +136 -115
  112. sqlspec/utils/__init__.py +2 -2
  113. sqlspec/utils/correlation.py +0 -3
  114. sqlspec/utils/deprecation.py +6 -6
  115. sqlspec/utils/fixtures.py +6 -6
  116. sqlspec/utils/logging.py +0 -2
  117. sqlspec/utils/module_loader.py +7 -12
  118. sqlspec/utils/singleton.py +0 -1
  119. sqlspec/utils/sync_tools.py +17 -38
  120. sqlspec/utils/text.py +12 -51
  121. sqlspec/utils/type_guards.py +482 -235
  122. {sqlspec-0.13.1.dist-info → sqlspec-0.16.2.dist-info}/METADATA +7 -2
  123. sqlspec-0.16.2.dist-info/RECORD +134 -0
  124. sqlspec-0.16.2.dist-info/entry_points.txt +2 -0
  125. sqlspec/driver/connection.py +0 -207
  126. sqlspec/driver/mixins/_csv_writer.py +0 -91
  127. sqlspec/driver/mixins/_pipeline.py +0 -512
  128. sqlspec/driver/mixins/_result_utils.py +0 -140
  129. sqlspec/driver/mixins/_storage.py +0 -926
  130. sqlspec/driver/mixins/_type_coercion.py +0 -130
  131. sqlspec/driver/parameters.py +0 -138
  132. sqlspec/service/__init__.py +0 -4
  133. sqlspec/service/_util.py +0 -147
  134. sqlspec/service/base.py +0 -1131
  135. sqlspec/service/pagination.py +0 -26
  136. sqlspec/statement/__init__.py +0 -21
  137. sqlspec/statement/builder/insert.py +0 -288
  138. sqlspec/statement/builder/merge.py +0 -95
  139. sqlspec/statement/builder/mixins/__init__.py +0 -65
  140. sqlspec/statement/builder/mixins/_aggregate_functions.py +0 -250
  141. sqlspec/statement/builder/mixins/_case_builder.py +0 -91
  142. sqlspec/statement/builder/mixins/_common_table_expr.py +0 -90
  143. sqlspec/statement/builder/mixins/_from.py +0 -63
  144. sqlspec/statement/builder/mixins/_group_by.py +0 -118
  145. sqlspec/statement/builder/mixins/_having.py +0 -35
  146. sqlspec/statement/builder/mixins/_insert_from_select.py +0 -47
  147. sqlspec/statement/builder/mixins/_insert_into.py +0 -36
  148. sqlspec/statement/builder/mixins/_insert_values.py +0 -67
  149. sqlspec/statement/builder/mixins/_limit_offset.py +0 -53
  150. sqlspec/statement/builder/mixins/_order_by.py +0 -46
  151. sqlspec/statement/builder/mixins/_pivot.py +0 -79
  152. sqlspec/statement/builder/mixins/_returning.py +0 -37
  153. sqlspec/statement/builder/mixins/_select_columns.py +0 -61
  154. sqlspec/statement/builder/mixins/_set_ops.py +0 -122
  155. sqlspec/statement/builder/mixins/_unpivot.py +0 -77
  156. sqlspec/statement/builder/mixins/_update_from.py +0 -55
  157. sqlspec/statement/builder/mixins/_update_set.py +0 -94
  158. sqlspec/statement/builder/mixins/_update_table.py +0 -29
  159. sqlspec/statement/builder/mixins/_where.py +0 -401
  160. sqlspec/statement/builder/mixins/_window_functions.py +0 -86
  161. sqlspec/statement/builder/select.py +0 -221
  162. sqlspec/statement/filters.py +0 -596
  163. sqlspec/statement/parameter_manager.py +0 -220
  164. sqlspec/statement/parameters.py +0 -867
  165. sqlspec/statement/pipelines/__init__.py +0 -210
  166. sqlspec/statement/pipelines/analyzers/__init__.py +0 -9
  167. sqlspec/statement/pipelines/analyzers/_analyzer.py +0 -646
  168. sqlspec/statement/pipelines/context.py +0 -115
  169. sqlspec/statement/pipelines/transformers/__init__.py +0 -7
  170. sqlspec/statement/pipelines/transformers/_expression_simplifier.py +0 -88
  171. sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +0 -1247
  172. sqlspec/statement/pipelines/transformers/_remove_comments_and_hints.py +0 -76
  173. sqlspec/statement/pipelines/validators/__init__.py +0 -23
  174. sqlspec/statement/pipelines/validators/_dml_safety.py +0 -290
  175. sqlspec/statement/pipelines/validators/_parameter_style.py +0 -370
  176. sqlspec/statement/pipelines/validators/_performance.py +0 -718
  177. sqlspec/statement/pipelines/validators/_security.py +0 -967
  178. sqlspec/statement/result.py +0 -435
  179. sqlspec/statement/sql.py +0 -1704
  180. sqlspec/statement/sql_compiler.py +0 -140
  181. sqlspec/utils/cached_property.py +0 -25
  182. sqlspec-0.13.1.dist-info/RECORD +0 -150
  183. {sqlspec-0.13.1.dist-info → sqlspec-0.16.2.dist-info}/WHEEL +0 -0
  184. {sqlspec-0.13.1.dist-info → sqlspec-0.16.2.dist-info}/licenses/LICENSE +0 -0
  185. {sqlspec-0.13.1.dist-info → sqlspec-0.16.2.dist-info}/licenses/NOTICE +0 -0
@@ -1,68 +1,301 @@
1
- """DuckDB database configuration with direct field-based configuration."""
1
+ """DuckDB database configuration with connection pooling."""
2
+ # ruff: noqa: D107 W293 RUF100 S110 PLR0913 FA100 BLE001 UP037 COM812 ARG002
2
3
 
3
4
  import logging
4
- from contextlib import contextmanager
5
- from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional, TypedDict
5
+ import threading
6
+ import time
7
+ from collections.abc import Sequence
8
+ from contextlib import contextmanager, suppress
9
+ from typing import TYPE_CHECKING, Any, Final, Optional, TypedDict, cast
6
10
 
7
11
  import duckdb
8
12
  from typing_extensions import NotRequired
9
13
 
10
- from sqlspec.adapters.duckdb.driver import DuckDBConnection, DuckDBDriver
11
- from sqlspec.config import NoPoolSyncConfig
12
- from sqlspec.statement.sql import SQLConfig
13
- from sqlspec.typing import DictRow, Empty
14
+ from sqlspec.adapters.duckdb._types import DuckDBConnection
15
+ from sqlspec.adapters.duckdb.driver import DuckDBCursor, DuckDBDriver, duckdb_statement_config
16
+ from sqlspec.config import SyncDatabaseConfig
14
17
 
15
18
  if TYPE_CHECKING:
16
- from collections.abc import Generator, Sequence
17
- from contextlib import AbstractContextManager
19
+ from collections.abc import Generator
20
+ from typing import Callable, ClassVar, Union
18
21
 
19
- from sqlglot.dialects.dialect import DialectType
22
+ from sqlspec.core.statement import StatementConfig
20
23
 
21
24
 
22
25
  logger = logging.getLogger(__name__)
23
26
 
24
- __all__ = ("CONNECTION_FIELDS", "DuckDBConfig", "DuckDBExtensionConfig", "DuckDBSecretConfig")
25
-
26
-
27
- CONNECTION_FIELDS = frozenset(
28
- {
29
- "database",
30
- "read_only",
31
- "config",
32
- "memory_limit",
33
- "threads",
34
- "temp_directory",
35
- "max_temp_directory_size",
36
- "autoload_known_extensions",
37
- "autoinstall_known_extensions",
38
- "allow_community_extensions",
39
- "allow_unsigned_extensions",
40
- "extension_directory",
41
- "custom_extension_repository",
42
- "autoinstall_extension_repository",
43
- "allow_persistent_secrets",
44
- "enable_external_access",
45
- "secret_directory",
46
- "enable_object_cache",
47
- "parquet_metadata_cache",
48
- "enable_external_file_cache",
49
- "checkpoint_threshold",
50
- "enable_progress_bar",
51
- "progress_bar_time",
52
- "enable_logging",
53
- "log_query_path",
54
- "logging_level",
55
- "preserve_insertion_order",
56
- "default_null_order",
57
- "default_order",
58
- "ieee_floating_point_ops",
59
- "binary_as_string",
60
- "arrow_large_buffer_size",
61
- "errors_as_json",
62
- }
27
+ DEFAULT_MIN_POOL: Final[int] = 1
28
+ DEFAULT_MAX_POOL: Final[int] = 4
29
+ POOL_TIMEOUT: Final[float] = 30.0
30
+ POOL_RECYCLE: Final[int] = 86400
31
+
32
+ __all__ = (
33
+ "DuckDBConfig",
34
+ "DuckDBConnectionParams",
35
+ "DuckDBConnectionPool",
36
+ "DuckDBDriverFeatures",
37
+ "DuckDBExtensionConfig",
38
+ "DuckDBPoolParams",
39
+ "DuckDBSecretConfig",
63
40
  )
64
41
 
65
42
 
43
+ class DuckDBConnectionPool:
44
+ """Thread-local connection manager for DuckDB with performance optimizations.
45
+
46
+ Uses thread-local storage to ensure each thread gets its own DuckDB connection,
47
+ preventing the thread-safety issues that cause segmentation faults when
48
+ multiple cursors share the same connection concurrently.
49
+
50
+ This design trades traditional pooling for thread safety, which is essential
51
+ for DuckDB since connections and cursors are not thread-safe.
52
+ """
53
+
54
+ __slots__ = (
55
+ "_connection_config",
56
+ "_connection_times",
57
+ "_created_connections",
58
+ "_extensions",
59
+ "_lock",
60
+ "_on_connection_create",
61
+ "_recycle",
62
+ "_secrets",
63
+ "_thread_local",
64
+ )
65
+
66
+ def __init__( # noqa: PLR0913
67
+ self,
68
+ connection_config: "dict[str, Any]", # noqa: UP037
69
+ pool_min_size: int = DEFAULT_MIN_POOL,
70
+ pool_max_size: int = DEFAULT_MAX_POOL,
71
+ pool_timeout: float = POOL_TIMEOUT,
72
+ pool_recycle_seconds: int = POOL_RECYCLE,
73
+ extensions: "Optional[list[dict[str, Any]]]" = None, # noqa: FA100, UP037
74
+ secrets: "Optional[list[dict[str, Any]]]" = None, # noqa: FA100, UP037
75
+ on_connection_create: "Optional[Callable[[DuckDBConnection], None]]" = None, # noqa: FA100
76
+ ) -> None:
77
+ """Initialize the thread-local connection manager."""
78
+ self._connection_config = connection_config
79
+ self._recycle = pool_recycle_seconds
80
+ self._extensions = extensions or []
81
+ self._secrets = secrets or []
82
+ self._on_connection_create = on_connection_create
83
+ self._thread_local = threading.local()
84
+ self._lock = threading.RLock()
85
+ self._created_connections = 0
86
+ self._connection_times: "dict[int, float]" = {}
87
+
88
+ def _create_connection(self) -> DuckDBConnection:
89
+ """Create a new DuckDB connection with extensions and secrets."""
90
+ connect_parameters = {}
91
+ config_dict = {}
92
+
93
+ for key, value in self._connection_config.items():
94
+ if key in {"database", "read_only"}:
95
+ connect_parameters[key] = value
96
+ else:
97
+ config_dict[key] = value
98
+
99
+ if config_dict:
100
+ connect_parameters["config"] = config_dict
101
+
102
+ connection = duckdb.connect(**connect_parameters)
103
+
104
+ for ext_config in self._extensions:
105
+ ext_name = ext_config.get("name")
106
+ if not ext_name:
107
+ continue
108
+
109
+ install_kwargs = {}
110
+ if "version" in ext_config:
111
+ install_kwargs["version"] = ext_config["version"]
112
+ if "repository" in ext_config:
113
+ install_kwargs["repository"] = ext_config["repository"]
114
+ if ext_config.get("force_install", False):
115
+ install_kwargs["force_install"] = True
116
+
117
+ try:
118
+ if install_kwargs:
119
+ connection.install_extension(ext_name, **install_kwargs)
120
+ connection.load_extension(ext_name)
121
+ except Exception: # noqa: BLE001, S110
122
+ pass
123
+
124
+ for secret_config in self._secrets:
125
+ secret_type = secret_config.get("secret_type")
126
+ secret_name = secret_config.get("name")
127
+ secret_value = secret_config.get("value")
128
+
129
+ if not (secret_type and secret_name and secret_value):
130
+ continue
131
+
132
+ value_pairs = []
133
+ for key, value in secret_value.items():
134
+ escaped_value = str(value).replace("'", "''")
135
+ value_pairs.append(f"'{key}' = '{escaped_value}'")
136
+ value_string = ", ".join(value_pairs)
137
+ scope_clause = ""
138
+ if "scope" in secret_config:
139
+ scope_clause = f" SCOPE '{secret_config['scope']}'"
140
+
141
+ sql = f""" # noqa: S608
142
+ CREATE SECRET {secret_name} (
143
+ TYPE {secret_type},
144
+ {value_string}
145
+ ){scope_clause}
146
+ """
147
+ with suppress(Exception):
148
+ connection.execute(sql)
149
+
150
+ if self._on_connection_create:
151
+ with suppress(Exception):
152
+ self._on_connection_create(connection)
153
+
154
+ conn_id = id(connection)
155
+ with self._lock:
156
+ self._created_connections += 1
157
+ self._connection_times[conn_id] = time.time()
158
+
159
+ return connection
160
+
161
+ def _get_thread_connection(self) -> DuckDBConnection:
162
+ """Get or create a connection for the current thread.
163
+
164
+ Each thread gets its own dedicated DuckDB connection to prevent
165
+ thread-safety issues with concurrent cursor operations.
166
+ """
167
+ if not hasattr(self._thread_local, "connection"):
168
+ self._thread_local.connection = self._create_connection()
169
+ self._thread_local.created_at = time.time()
170
+
171
+ # Check if connection needs recycling
172
+ if self._recycle > 0 and time.time() - self._thread_local.created_at > self._recycle:
173
+ with suppress(Exception):
174
+ self._thread_local.connection.close()
175
+ self._thread_local.connection = self._create_connection()
176
+ self._thread_local.created_at = time.time()
177
+
178
+ return cast("DuckDBConnection", self._thread_local.connection)
179
+
180
+ def _close_thread_connection(self) -> None:
181
+ """Close the connection for the current thread."""
182
+ if hasattr(self._thread_local, "connection"):
183
+ with suppress(Exception):
184
+ self._thread_local.connection.close()
185
+ del self._thread_local.connection
186
+ if hasattr(self._thread_local, "created_at"):
187
+ del self._thread_local.created_at
188
+
189
+ def _is_connection_alive(self, connection: DuckDBConnection) -> bool:
190
+ """Check if a connection is still alive and usable.
191
+
192
+ Args:
193
+ connection: Connection to check
194
+
195
+ Returns:
196
+ True if connection is alive, False otherwise
197
+ """
198
+ try:
199
+ cursor = connection.cursor()
200
+ cursor.close()
201
+ except Exception:
202
+ return False
203
+ return True
204
+
205
+ @contextmanager
206
+ def get_connection(self) -> "Generator[DuckDBConnection, None, None]":
207
+ """Get a thread-local connection.
208
+
209
+ Each thread gets its own dedicated DuckDB connection to prevent
210
+ thread-safety issues with concurrent cursor operations.
211
+
212
+ Yields:
213
+ DuckDBConnection: A thread-local connection.
214
+ """
215
+ connection = self._get_thread_connection()
216
+ try:
217
+ yield connection
218
+ except Exception:
219
+ # On error, close and recreate connection for this thread
220
+ self._close_thread_connection()
221
+ raise
222
+
223
+ def close(self) -> None:
224
+ """Close the thread-local connection if it exists."""
225
+ self._close_thread_connection()
226
+
227
+ def size(self) -> int:
228
+ """Get current pool size (always 1 for thread-local)."""
229
+ return 1 if hasattr(self._thread_local, "connection") else 0
230
+
231
+ def checked_out(self) -> int:
232
+ """Get number of checked out connections (always 0 for thread-local)."""
233
+ return 0
234
+
235
+ def acquire(self) -> DuckDBConnection:
236
+ """Acquire a thread-local connection.
237
+
238
+ Each thread gets its own dedicated DuckDB connection to prevent
239
+ thread-safety issues with concurrent cursor operations.
240
+
241
+ Returns:
242
+ DuckDBConnection: A thread-local connection
243
+ """
244
+ return self._get_thread_connection()
245
+
246
+
247
+ class DuckDBConnectionParams(TypedDict, total=False):
248
+ """DuckDB connection parameters."""
249
+
250
+ database: NotRequired[str]
251
+ read_only: NotRequired[bool]
252
+ config: NotRequired[dict[str, Any]]
253
+ memory_limit: NotRequired[str]
254
+ threads: NotRequired[int]
255
+ temp_directory: NotRequired[str]
256
+ max_temp_directory_size: NotRequired[str]
257
+ autoload_known_extensions: NotRequired[bool]
258
+ autoinstall_known_extensions: NotRequired[bool]
259
+ allow_community_extensions: NotRequired[bool]
260
+ allow_unsigned_extensions: NotRequired[bool]
261
+ extension_directory: NotRequired[str]
262
+ custom_extension_repository: NotRequired[str]
263
+ autoinstall_extension_repository: NotRequired[str]
264
+ allow_persistent_secrets: NotRequired[bool]
265
+ enable_external_access: NotRequired[bool]
266
+ secret_directory: NotRequired[str]
267
+ enable_object_cache: NotRequired[bool]
268
+ parquet_metadata_cache: NotRequired[str]
269
+ enable_external_file_cache: NotRequired[bool]
270
+ checkpoint_threshold: NotRequired[str]
271
+ enable_progress_bar: NotRequired[bool]
272
+ progress_bar_time: NotRequired[float]
273
+ enable_logging: NotRequired[bool]
274
+ log_query_path: NotRequired[str]
275
+ logging_level: NotRequired[str]
276
+ preserve_insertion_order: NotRequired[bool]
277
+ default_null_order: NotRequired[str]
278
+ default_order: NotRequired[str]
279
+ ieee_floating_point_ops: NotRequired[bool]
280
+ binary_as_string: NotRequired[bool]
281
+ arrow_large_buffer_size: NotRequired[bool]
282
+ errors_as_json: NotRequired[bool]
283
+ extra: NotRequired[dict[str, Any]]
284
+
285
+
286
+ class DuckDBPoolParams(DuckDBConnectionParams, total=False):
287
+ """Complete pool configuration for DuckDB adapter.
288
+
289
+ Combines standardized pool parameters with DuckDB-specific connection parameters.
290
+ """
291
+
292
+ # Standardized pool parameters (consistent across ALL adapters)
293
+ pool_min_size: NotRequired[int]
294
+ pool_max_size: NotRequired[int]
295
+ pool_timeout: NotRequired[float]
296
+ pool_recycle_seconds: NotRequired[int]
297
+
298
+
66
299
  class DuckDBExtensionConfig(TypedDict, total=False):
67
300
  """DuckDB extension configuration for auto-management."""
68
301
 
@@ -95,354 +328,137 @@ class DuckDBSecretConfig(TypedDict, total=False):
95
328
  """Scope of the secret (LOCAL or PERSISTENT)."""
96
329
 
97
330
 
98
- class DuckDBConfig(NoPoolSyncConfig[DuckDBConnection, DuckDBDriver]):
99
- """Enhanced DuckDB configuration with intelligent features and modern architecture.
331
+ class DuckDBDriverFeatures(TypedDict, total=False):
332
+ """TypedDict for DuckDB driver features configuration."""
333
+
334
+ extensions: NotRequired[Sequence[DuckDBExtensionConfig]]
335
+ """List of extensions to install/load on connection creation."""
336
+ secrets: NotRequired[Sequence[DuckDBSecretConfig]]
337
+ """List of secrets to create for AI/API integrations."""
338
+ on_connection_create: NotRequired["Callable[[DuckDBConnection], Optional[DuckDBConnection]]"]
339
+ """Callback executed when connection is created."""
340
+
341
+
342
+ class DuckDBConfig(SyncDatabaseConfig[DuckDBConnection, DuckDBConnectionPool, DuckDBDriver]):
343
+ """Enhanced DuckDB configuration with connection pooling and intelligent features.
100
344
 
101
- DuckDB is an embedded analytical database that doesn't require connection pooling.
102
345
  This configuration supports all of DuckDB's unique features including:
103
346
 
347
+ - Connection pooling optimized for DuckDB's architecture
104
348
  - Extension auto-management and installation
105
349
  - Secret management for API integrations
106
350
  - Intelligent auto configuration settings
107
351
  - High-performance Arrow integration
108
352
  - Direct file querying capabilities
109
353
  - Performance optimizations for analytics workloads
110
- """
111
354
 
112
- __slots__ = (
113
- "_dialect",
114
- "allow_community_extensions",
115
- "allow_persistent_secrets",
116
- "allow_unsigned_extensions",
117
- "arrow_large_buffer_size",
118
- "autoinstall_extension_repository",
119
- "autoinstall_known_extensions",
120
- "autoload_known_extensions",
121
- "binary_as_string",
122
- "checkpoint_threshold",
123
- "config",
124
- "custom_extension_repository",
125
- "database",
126
- "default_null_order",
127
- "default_order",
128
- "default_row_type",
129
- "enable_external_access",
130
- "enable_external_file_cache",
131
- "enable_logging",
132
- "enable_object_cache",
133
- "enable_progress_bar",
134
- "errors_as_json",
135
- "extension_directory",
136
- "extensions",
137
- "extras",
138
- "ieee_floating_point_ops",
139
- "log_query_path",
140
- "logging_level",
141
- "max_temp_directory_size",
142
- "memory_limit",
143
- "on_connection_create",
144
- "parquet_metadata_cache",
145
- "pool_instance",
146
- "preserve_insertion_order",
147
- "progress_bar_time",
148
- "read_only",
149
- "secret_directory",
150
- "secrets",
151
- "statement_config",
152
- "temp_directory",
153
- "threads",
154
- )
155
-
156
- is_async: ClassVar[bool] = False
157
- supports_connection_pooling: ClassVar[bool] = False
158
-
159
- driver_type: type[DuckDBDriver] = DuckDBDriver
160
- connection_type: type[DuckDBConnection] = DuckDBConnection
161
-
162
- supported_parameter_styles: ClassVar[tuple[str, ...]] = ("qmark", "numeric")
163
- """DuckDB supports ? (qmark) and $1, $2 (numeric) parameter styles."""
355
+ DuckDB Connection Pool Best Practices:
356
+ - DuckDB performs best with long-lived connections that maintain cache
357
+ - Default pool size is 1-4 connections (DuckDB is optimized for single connection)
358
+ - Connection recycling is set to 24 hours by default (set to 0 to disable)
359
+ - Shared memory databases use `:memory:shared_db` for proper concurrency
360
+ - Health checks are minimized to reduce overhead
361
+ """
164
362
 
165
- preferred_parameter_style: ClassVar[str] = "qmark"
166
- """DuckDB's native parameter style is ? (qmark)."""
363
+ driver_type: "ClassVar[type[DuckDBDriver]]" = DuckDBDriver
364
+ connection_type: "ClassVar[type[DuckDBConnection]]" = DuckDBConnection
167
365
 
168
366
  def __init__(
169
367
  self,
170
- statement_config: "Optional[SQLConfig]" = None,
171
- default_row_type: type[DictRow] = DictRow,
172
- # Core connection parameters
173
- database: Optional[str] = None,
174
- read_only: Optional[bool] = None,
175
- config: Optional[dict[str, Any]] = None,
176
- # Resource management
177
- memory_limit: Optional[str] = None,
178
- threads: Optional[int] = None,
179
- temp_directory: Optional[str] = None,
180
- max_temp_directory_size: Optional[str] = None,
181
- # Extension configuration
182
- autoload_known_extensions: Optional[bool] = None,
183
- autoinstall_known_extensions: Optional[bool] = None,
184
- allow_community_extensions: Optional[bool] = None,
185
- allow_unsigned_extensions: Optional[bool] = None,
186
- extension_directory: Optional[str] = None,
187
- custom_extension_repository: Optional[str] = None,
188
- autoinstall_extension_repository: Optional[str] = None,
189
- # Security and access
190
- allow_persistent_secrets: Optional[bool] = None,
191
- enable_external_access: Optional[bool] = None,
192
- secret_directory: Optional[str] = None,
193
- # Performance optimizations
194
- enable_object_cache: Optional[bool] = None,
195
- parquet_metadata_cache: Optional[bool] = None,
196
- enable_external_file_cache: Optional[bool] = None,
197
- checkpoint_threshold: Optional[str] = None,
198
- # User experience
199
- enable_progress_bar: Optional[bool] = None,
200
- progress_bar_time: Optional[int] = None,
201
- # Logging and debugging
202
- enable_logging: Optional[bool] = None,
203
- log_query_path: Optional[str] = None,
204
- logging_level: Optional[str] = None,
205
- # Data processing settings
206
- preserve_insertion_order: Optional[bool] = None,
207
- default_null_order: Optional[str] = None,
208
- default_order: Optional[str] = None,
209
- ieee_floating_point_ops: Optional[bool] = None,
210
- # File format settings
211
- binary_as_string: Optional[bool] = None,
212
- arrow_large_buffer_size: Optional[bool] = None,
213
- # Error handling
214
- errors_as_json: Optional[bool] = None,
215
- # DuckDB intelligent features
216
- extensions: "Optional[Sequence[DuckDBExtensionConfig]]" = None,
217
- secrets: "Optional[Sequence[DuckDBSecretConfig]]" = None,
218
- on_connection_create: "Optional[Callable[[DuckDBConnection], Optional[DuckDBConnection]]]" = None,
219
- **kwargs: Any,
368
+ *,
369
+ pool_config: "Optional[Union[DuckDBPoolParams, dict[str, Any]]]" = None,
370
+ migration_config: Optional[dict[str, Any]] = None,
371
+ pool_instance: "Optional[DuckDBConnectionPool]" = None,
372
+ statement_config: "Optional[StatementConfig]" = None,
373
+ driver_features: "Optional[Union[DuckDBDriverFeatures, dict[str, Any]]]" = None,
220
374
  ) -> None:
221
- """Initialize DuckDB configuration with intelligent features.
222
-
223
- Args:
224
- statement_config: Default SQL statement configuration
225
- default_row_type: Default row type for results
226
- database: Path to the DuckDB database file. Use ':memory:' for in-memory database
227
- read_only: Whether to open the database in read-only mode
228
- config: DuckDB configuration options passed directly to the connection
229
- memory_limit: Maximum memory usage (e.g., '1GB', '80% of RAM')
230
- threads: Number of threads to use for parallel query execution
231
- temp_directory: Directory for temporary files during spilling
232
- max_temp_directory_size: Maximum size of temp directory (e.g., '1GB')
233
- autoload_known_extensions: Automatically load known extensions when needed
234
- autoinstall_known_extensions: Automatically install known extensions when needed
235
- allow_community_extensions: Allow community-built extensions
236
- allow_unsigned_extensions: Allow unsigned extensions (development only)
237
- extension_directory: Directory to store extensions
238
- custom_extension_repository: Custom endpoint for extension installation
239
- autoinstall_extension_repository: Override endpoint for autoloading extensions
240
- allow_persistent_secrets: Enable persistent secret storage
241
- enable_external_access: Allow external file system access
242
- secret_directory: Directory for persistent secrets
243
- enable_object_cache: Enable caching of objects (e.g., Parquet metadata)
244
- parquet_metadata_cache: Cache Parquet metadata for repeated access
245
- enable_external_file_cache: Cache external files in memory
246
- checkpoint_threshold: WAL size threshold for automatic checkpoints
247
- enable_progress_bar: Show progress bar for long queries
248
- progress_bar_time: Time in milliseconds before showing progress bar
249
- enable_logging: Enable DuckDB logging
250
- log_query_path: Path to log queries for debugging
251
- logging_level: Log level (DEBUG, INFO, WARNING, ERROR)
252
- preserve_insertion_order: Whether to preserve insertion order in results
253
- default_null_order: Default NULL ordering (NULLS_FIRST, NULLS_LAST)
254
- default_order: Default sort order (ASC, DESC)
255
- ieee_floating_point_ops: Use IEEE 754 compliant floating point operations
256
- binary_as_string: Interpret binary data as string in Parquet files
257
- arrow_large_buffer_size: Use large Arrow buffers for strings, blobs, etc.
258
- errors_as_json: Return errors in JSON format
259
- extensions: List of extension dicts to auto-install/load with keys: name, version, repository, force_install
260
- secrets: List of secret dicts for AI/API integrations with keys: secret_type, name, value, scope
261
- on_connection_create: Callback executed when connection is created
262
- **kwargs: Additional parameters (stored in extras)
263
-
264
- Example:
265
- >>> config = DuckDBConfig(
266
- ... database=":memory:",
267
- ... memory_limit="1GB",
268
- ... threads=4,
269
- ... autoload_known_extensions=True,
270
- ... extensions=[
271
- ... {"name": "spatial", "repository": "core"},
272
- ... {"name": "aws", "repository": "core"},
273
- ... ],
274
- ... secrets=[
275
- ... {
276
- ... "secret_type": "openai",
277
- ... "name": "my_openai_secret",
278
- ... "value": {"api_key": "sk-..."},
279
- ... }
280
- ... ],
281
- ... )
282
- """
283
- # Store connection parameters as instance attributes
284
- self.database = database or ":memory:"
285
- self.read_only = read_only
286
- self.config = config
287
- self.memory_limit = memory_limit
288
- self.threads = threads
289
- self.temp_directory = temp_directory
290
- self.max_temp_directory_size = max_temp_directory_size
291
- self.autoload_known_extensions = autoload_known_extensions
292
- self.autoinstall_known_extensions = autoinstall_known_extensions
293
- self.allow_community_extensions = allow_community_extensions
294
- self.allow_unsigned_extensions = allow_unsigned_extensions
295
- self.extension_directory = extension_directory
296
- self.custom_extension_repository = custom_extension_repository
297
- self.autoinstall_extension_repository = autoinstall_extension_repository
298
- self.allow_persistent_secrets = allow_persistent_secrets
299
- self.enable_external_access = enable_external_access
300
- self.secret_directory = secret_directory
301
- self.enable_object_cache = enable_object_cache
302
- self.parquet_metadata_cache = parquet_metadata_cache
303
- self.enable_external_file_cache = enable_external_file_cache
304
- self.checkpoint_threshold = checkpoint_threshold
305
- self.enable_progress_bar = enable_progress_bar
306
- self.progress_bar_time = progress_bar_time
307
- self.enable_logging = enable_logging
308
- self.log_query_path = log_query_path
309
- self.logging_level = logging_level
310
- self.preserve_insertion_order = preserve_insertion_order
311
- self.default_null_order = default_null_order
312
- self.default_order = default_order
313
- self.ieee_floating_point_ops = ieee_floating_point_ops
314
- self.binary_as_string = binary_as_string
315
- self.arrow_large_buffer_size = arrow_large_buffer_size
316
- self.errors_as_json = errors_as_json
317
-
318
- self.extras = kwargs or {}
319
-
320
- # Store other config
321
- self.statement_config = statement_config or SQLConfig()
322
- self.default_row_type = default_row_type
323
-
324
- # DuckDB intelligent features
325
- self.extensions = extensions or []
326
- self.secrets = secrets or []
327
- self.on_connection_create = on_connection_create
328
- self._dialect: DialectType = None
329
-
330
- super().__init__()
331
-
332
- @property
333
- def connection_config_dict(self) -> dict[str, Any]:
334
- """Return the connection configuration as a dict for duckdb.connect()."""
335
- # DuckDB connect() only accepts database, read_only, and config parameters
336
- connect_params: dict[str, Any] = {}
337
-
338
- if hasattr(self, "database") and self.database is not None:
339
- connect_params["database"] = self.database
340
-
341
- if hasattr(self, "read_only") and self.read_only is not None:
342
- connect_params["read_only"] = self.read_only
343
-
344
- # All other parameters go into the config dict
345
- config_dict = {}
346
- for field in CONNECTION_FIELDS:
347
- if field not in {"database", "read_only", "config"}:
348
- value = getattr(self, field, None)
349
- if value is not None and value is not Empty:
350
- config_dict[field] = value
351
-
352
- config_dict.update(self.extras)
375
+ """Initialize DuckDB configuration with intelligent features."""
376
+ if pool_config is None:
377
+ pool_config = {}
378
+ if "database" not in pool_config:
379
+ pool_config["database"] = ":memory:shared_db"
380
+
381
+ if pool_config.get("database") in {":memory:", ""}:
382
+ pool_config["database"] = ":memory:shared_db"
383
+
384
+ super().__init__(
385
+ pool_config=dict(pool_config),
386
+ pool_instance=pool_instance,
387
+ migration_config=migration_config,
388
+ statement_config=statement_config or duckdb_statement_config,
389
+ driver_features=cast("dict[str, Any]", driver_features),
390
+ )
391
+
392
+ def _get_connection_config_dict(self) -> "dict[str, Any]":
393
+ """Get connection configuration as plain dict for pool creation."""
394
+ return {
395
+ k: v
396
+ for k, v in self.pool_config.items()
397
+ if v is not None
398
+ and k not in {"pool_min_size", "pool_max_size", "pool_timeout", "pool_recycle_seconds", "extra"}
399
+ }
400
+
401
+ def _get_pool_config_dict(self) -> "dict[str, Any]":
402
+ """Get pool configuration as plain dict for pool creation."""
403
+ return {
404
+ k: v
405
+ for k, v in self.pool_config.items()
406
+ if v is not None and k in {"pool_min_size", "pool_max_size", "pool_timeout", "pool_recycle_seconds"}
407
+ }
408
+
409
+ def _create_pool(self) -> DuckDBConnectionPool:
410
+ """Create the DuckDB connection pool."""
411
+
412
+ extensions = self.driver_features.get("extensions", None)
413
+ secrets = self.driver_features.get("secrets", None)
414
+ on_connection_create = self.driver_features.get("on_connection_create", None)
415
+
416
+ extensions_dicts = [dict(ext) for ext in extensions] if extensions else None
417
+ secrets_dicts = [dict(secret) for secret in secrets] if secrets else None
418
+
419
+ pool_callback = None
420
+ if on_connection_create:
421
+
422
+ def wrapped_callback(conn: DuckDBConnection) -> None:
423
+ on_connection_create(conn)
424
+
425
+ pool_callback = wrapped_callback
426
+ conf = {"extensions": extensions_dicts, "secrets": secrets_dicts, "on_connection_create": pool_callback}
427
+
428
+ return DuckDBConnectionPool(
429
+ connection_config=self._get_connection_config_dict(),
430
+ **conf, # type: ignore[arg-type]
431
+ **self._get_pool_config_dict(),
432
+ )
433
+
434
+ def _close_pool(self) -> None:
435
+ """Close the connection pool."""
436
+ if self.pool_instance:
437
+ self.pool_instance.close()
353
438
 
354
- # If we have config parameters, add them
355
- if config_dict:
356
- connect_params["config"] = config_dict
439
+ def create_connection(self) -> DuckDBConnection:
440
+ """Get a DuckDB connection from the pool.
357
441
 
358
- return connect_params
442
+ This method ensures the pool is created and returns a connection
443
+ from the pool. The connection is checked out from the pool and must
444
+ be properly managed by the caller.
359
445
 
360
- def create_connection(self) -> DuckDBConnection:
361
- """Create and return a DuckDB connection with intelligent configuration applied."""
446
+ Returns:
447
+ DuckDBConnection: A connection from the pool
362
448
 
363
- logger.info("Creating DuckDB connection", extra={"adapter": "duckdb"})
449
+ Note:
450
+ For automatic connection management, prefer using provide_connection()
451
+ or provide_session() which handle returning connections to the pool.
452
+ The caller is responsible for returning the connection to the pool
453
+ using pool.release(connection) when done.
454
+ """
455
+ pool = self.provide_pool()
364
456
 
365
- try:
366
- config_dict = self.connection_config_dict
367
- connection = duckdb.connect(**config_dict)
368
- logger.info("DuckDB connection created successfully", extra={"adapter": "duckdb"})
369
-
370
- # Install and load extensions
371
- for ext_config in self.extensions:
372
- ext_name = None
373
- try:
374
- ext_name = ext_config.get("name")
375
- if not ext_name:
376
- continue
377
- install_kwargs: dict[str, Any] = {}
378
- if "version" in ext_config:
379
- install_kwargs["version"] = ext_config["version"]
380
- if "repository" in ext_config:
381
- install_kwargs["repository"] = ext_config["repository"]
382
- if ext_config.get("force_install", False):
383
- install_kwargs["force_install"] = True
384
-
385
- if install_kwargs or self.autoinstall_known_extensions:
386
- connection.install_extension(ext_name, **install_kwargs)
387
- connection.load_extension(ext_name)
388
- logger.debug("Loaded DuckDB extension: %s", ext_name, extra={"adapter": "duckdb"})
389
-
390
- except Exception as e:
391
- if ext_name:
392
- logger.warning(
393
- "Failed to load DuckDB extension: %s",
394
- ext_name,
395
- extra={"adapter": "duckdb", "error": str(e)},
396
- )
397
-
398
- for secret_config in self.secrets:
399
- secret_name = None
400
- try:
401
- secret_type = secret_config.get("secret_type")
402
- secret_name = secret_config.get("name")
403
- secret_value = secret_config.get("value")
404
-
405
- if secret_type and secret_name and secret_value:
406
- value_pairs = []
407
- for key, value in secret_value.items():
408
- escaped_value = str(value).replace("'", "''")
409
- value_pairs.append(f"'{key}' = '{escaped_value}'")
410
- value_string = ", ".join(value_pairs)
411
- scope_clause = ""
412
- if "scope" in secret_config:
413
- scope_clause = f" SCOPE '{secret_config['scope']}'"
414
-
415
- sql = f"""
416
- CREATE SECRET {secret_name} (
417
- TYPE {secret_type},
418
- {value_string}
419
- ){scope_clause}
420
- """
421
- connection.execute(sql)
422
- logger.debug("Created DuckDB secret: %s", secret_name, extra={"adapter": "duckdb"})
423
-
424
- except Exception as e:
425
- if secret_name:
426
- logger.warning(
427
- "Failed to create DuckDB secret: %s",
428
- secret_name,
429
- extra={"adapter": "duckdb", "error": str(e)},
430
- )
431
- if self.on_connection_create:
432
- try:
433
- self.on_connection_create(connection)
434
- logger.debug("Executed connection creation hook", extra={"adapter": "duckdb"})
435
- except Exception as e:
436
- logger.warning("Connection creation hook failed", extra={"adapter": "duckdb", "error": str(e)})
437
-
438
- except Exception as e:
439
- logger.exception("Failed to create DuckDB connection", extra={"adapter": "duckdb", "error": str(e)})
440
- raise
441
- return connection
457
+ return pool.acquire()
442
458
 
443
459
  @contextmanager
444
460
  def provide_connection(self, *args: Any, **kwargs: Any) -> "Generator[DuckDBConnection, None, None]":
445
- """Provide a DuckDB connection context manager.
461
+ """Provide a pooled DuckDB connection context manager.
446
462
 
447
463
  Args:
448
464
  *args: Additional arguments.
@@ -451,37 +467,38 @@ class DuckDBConfig(NoPoolSyncConfig[DuckDBConnection, DuckDBDriver]):
451
467
  Yields:
452
468
  A DuckDB connection instance.
453
469
  """
454
- connection = self.create_connection()
455
- try:
470
+ pool = self.provide_pool()
471
+ with pool.get_connection() as connection:
456
472
  yield connection
457
- finally:
458
- connection.close()
459
473
 
460
- def provide_session(self, *args: Any, **kwargs: Any) -> "AbstractContextManager[DuckDBDriver]":
474
+ @contextmanager
475
+ def provide_session(
476
+ self, *args: Any, statement_config: "Optional[StatementConfig]" = None, **kwargs: Any
477
+ ) -> "Generator[DuckDBDriver, None, None]":
461
478
  """Provide a DuckDB driver session context manager.
462
479
 
463
480
  Args:
464
481
  *args: Additional arguments.
482
+ statement_config: Optional statement configuration override.
465
483
  **kwargs: Additional keyword arguments.
466
484
 
467
- Returns:
485
+ Yields:
468
486
  A context manager that yields a DuckDBDriver instance.
469
487
  """
488
+ with self.provide_connection(*args, **kwargs) as connection:
489
+ driver = self.driver_type(connection=connection, statement_config=statement_config or self.statement_config)
490
+ yield driver
491
+
492
+ def get_signature_namespace(self) -> "dict[str, type[Any]]":
493
+ """Get the signature namespace for DuckDB types.
494
+
495
+ This provides all DuckDB-specific types that Litestar needs to recognize
496
+ to avoid serialization attempts.
497
+
498
+ Returns:
499
+ Dictionary mapping type names to types.
500
+ """
470
501
 
471
- @contextmanager
472
- def session_manager() -> "Generator[DuckDBDriver, None, None]":
473
- with self.provide_connection(*args, **kwargs) as connection:
474
- statement_config = self.statement_config
475
- # Inject parameter style info if not already set
476
- if statement_config.allowed_parameter_styles is None:
477
- from dataclasses import replace
478
-
479
- statement_config = replace(
480
- statement_config,
481
- allowed_parameter_styles=self.supported_parameter_styles,
482
- target_parameter_style=self.preferred_parameter_style,
483
- )
484
- driver = self.driver_type(connection=connection, config=statement_config)
485
- yield driver
486
-
487
- return session_manager()
502
+ namespace = super().get_signature_namespace()
503
+ namespace.update({"DuckDBConnection": DuckDBConnection, "DuckDBCursor": DuckDBCursor})
504
+ return namespace