sqlspec 0.14.1__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

Files changed (159) hide show
  1. sqlspec/__init__.py +50 -25
  2. sqlspec/__main__.py +1 -1
  3. sqlspec/__metadata__.py +1 -3
  4. sqlspec/_serialization.py +1 -2
  5. sqlspec/_sql.py +480 -121
  6. sqlspec/_typing.py +278 -142
  7. sqlspec/adapters/adbc/__init__.py +4 -3
  8. sqlspec/adapters/adbc/_types.py +12 -0
  9. sqlspec/adapters/adbc/config.py +115 -260
  10. sqlspec/adapters/adbc/driver.py +462 -367
  11. sqlspec/adapters/aiosqlite/__init__.py +18 -3
  12. sqlspec/adapters/aiosqlite/_types.py +13 -0
  13. sqlspec/adapters/aiosqlite/config.py +199 -129
  14. sqlspec/adapters/aiosqlite/driver.py +230 -269
  15. sqlspec/adapters/asyncmy/__init__.py +18 -3
  16. sqlspec/adapters/asyncmy/_types.py +12 -0
  17. sqlspec/adapters/asyncmy/config.py +80 -168
  18. sqlspec/adapters/asyncmy/driver.py +260 -225
  19. sqlspec/adapters/asyncpg/__init__.py +19 -4
  20. sqlspec/adapters/asyncpg/_types.py +17 -0
  21. sqlspec/adapters/asyncpg/config.py +82 -181
  22. sqlspec/adapters/asyncpg/driver.py +285 -383
  23. sqlspec/adapters/bigquery/__init__.py +17 -3
  24. sqlspec/adapters/bigquery/_types.py +12 -0
  25. sqlspec/adapters/bigquery/config.py +191 -258
  26. sqlspec/adapters/bigquery/driver.py +474 -646
  27. sqlspec/adapters/duckdb/__init__.py +14 -3
  28. sqlspec/adapters/duckdb/_types.py +12 -0
  29. sqlspec/adapters/duckdb/config.py +415 -351
  30. sqlspec/adapters/duckdb/driver.py +343 -413
  31. sqlspec/adapters/oracledb/__init__.py +19 -5
  32. sqlspec/adapters/oracledb/_types.py +14 -0
  33. sqlspec/adapters/oracledb/config.py +123 -379
  34. sqlspec/adapters/oracledb/driver.py +507 -560
  35. sqlspec/adapters/psqlpy/__init__.py +13 -3
  36. sqlspec/adapters/psqlpy/_types.py +11 -0
  37. sqlspec/adapters/psqlpy/config.py +93 -254
  38. sqlspec/adapters/psqlpy/driver.py +505 -234
  39. sqlspec/adapters/psycopg/__init__.py +19 -5
  40. sqlspec/adapters/psycopg/_types.py +17 -0
  41. sqlspec/adapters/psycopg/config.py +143 -403
  42. sqlspec/adapters/psycopg/driver.py +706 -872
  43. sqlspec/adapters/sqlite/__init__.py +14 -3
  44. sqlspec/adapters/sqlite/_types.py +11 -0
  45. sqlspec/adapters/sqlite/config.py +202 -118
  46. sqlspec/adapters/sqlite/driver.py +264 -303
  47. sqlspec/base.py +105 -9
  48. sqlspec/{statement/builder → builder}/__init__.py +12 -14
  49. sqlspec/{statement/builder → builder}/_base.py +120 -55
  50. sqlspec/{statement/builder → builder}/_column.py +17 -6
  51. sqlspec/{statement/builder → builder}/_ddl.py +46 -79
  52. sqlspec/{statement/builder → builder}/_ddl_utils.py +5 -10
  53. sqlspec/{statement/builder → builder}/_delete.py +6 -25
  54. sqlspec/{statement/builder → builder}/_insert.py +18 -65
  55. sqlspec/builder/_merge.py +56 -0
  56. sqlspec/{statement/builder → builder}/_parsing_utils.py +8 -11
  57. sqlspec/{statement/builder → builder}/_select.py +11 -56
  58. sqlspec/{statement/builder → builder}/_update.py +12 -18
  59. sqlspec/{statement/builder → builder}/mixins/__init__.py +10 -14
  60. sqlspec/{statement/builder → builder}/mixins/_cte_and_set_ops.py +48 -59
  61. sqlspec/{statement/builder → builder}/mixins/_insert_operations.py +34 -18
  62. sqlspec/{statement/builder → builder}/mixins/_join_operations.py +1 -3
  63. sqlspec/{statement/builder → builder}/mixins/_merge_operations.py +19 -9
  64. sqlspec/{statement/builder → builder}/mixins/_order_limit_operations.py +3 -3
  65. sqlspec/{statement/builder → builder}/mixins/_pivot_operations.py +4 -8
  66. sqlspec/{statement/builder → builder}/mixins/_select_operations.py +25 -38
  67. sqlspec/{statement/builder → builder}/mixins/_update_operations.py +15 -16
  68. sqlspec/{statement/builder → builder}/mixins/_where_clause.py +210 -137
  69. sqlspec/cli.py +4 -5
  70. sqlspec/config.py +180 -133
  71. sqlspec/core/__init__.py +63 -0
  72. sqlspec/core/cache.py +873 -0
  73. sqlspec/core/compiler.py +396 -0
  74. sqlspec/core/filters.py +830 -0
  75. sqlspec/core/hashing.py +310 -0
  76. sqlspec/core/parameters.py +1209 -0
  77. sqlspec/core/result.py +664 -0
  78. sqlspec/{statement → core}/splitter.py +321 -191
  79. sqlspec/core/statement.py +666 -0
  80. sqlspec/driver/__init__.py +7 -10
  81. sqlspec/driver/_async.py +387 -176
  82. sqlspec/driver/_common.py +527 -289
  83. sqlspec/driver/_sync.py +390 -172
  84. sqlspec/driver/mixins/__init__.py +2 -19
  85. sqlspec/driver/mixins/_result_tools.py +164 -0
  86. sqlspec/driver/mixins/_sql_translator.py +6 -3
  87. sqlspec/exceptions.py +5 -252
  88. sqlspec/extensions/aiosql/adapter.py +93 -96
  89. sqlspec/extensions/litestar/cli.py +1 -1
  90. sqlspec/extensions/litestar/config.py +0 -1
  91. sqlspec/extensions/litestar/handlers.py +15 -26
  92. sqlspec/extensions/litestar/plugin.py +18 -16
  93. sqlspec/extensions/litestar/providers.py +17 -52
  94. sqlspec/loader.py +424 -105
  95. sqlspec/migrations/__init__.py +12 -0
  96. sqlspec/migrations/base.py +92 -68
  97. sqlspec/migrations/commands.py +24 -106
  98. sqlspec/migrations/loaders.py +402 -0
  99. sqlspec/migrations/runner.py +49 -51
  100. sqlspec/migrations/tracker.py +31 -44
  101. sqlspec/migrations/utils.py +64 -24
  102. sqlspec/protocols.py +7 -183
  103. sqlspec/storage/__init__.py +1 -1
  104. sqlspec/storage/backends/base.py +37 -40
  105. sqlspec/storage/backends/fsspec.py +136 -112
  106. sqlspec/storage/backends/obstore.py +138 -160
  107. sqlspec/storage/capabilities.py +5 -4
  108. sqlspec/storage/registry.py +57 -106
  109. sqlspec/typing.py +136 -115
  110. sqlspec/utils/__init__.py +2 -3
  111. sqlspec/utils/correlation.py +0 -3
  112. sqlspec/utils/deprecation.py +6 -6
  113. sqlspec/utils/fixtures.py +6 -6
  114. sqlspec/utils/logging.py +0 -2
  115. sqlspec/utils/module_loader.py +7 -12
  116. sqlspec/utils/singleton.py +0 -1
  117. sqlspec/utils/sync_tools.py +17 -38
  118. sqlspec/utils/text.py +12 -51
  119. sqlspec/utils/type_guards.py +443 -232
  120. {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/METADATA +7 -2
  121. sqlspec-0.16.0.dist-info/RECORD +134 -0
  122. sqlspec/adapters/adbc/transformers.py +0 -108
  123. sqlspec/driver/connection.py +0 -207
  124. sqlspec/driver/mixins/_cache.py +0 -114
  125. sqlspec/driver/mixins/_csv_writer.py +0 -91
  126. sqlspec/driver/mixins/_pipeline.py +0 -508
  127. sqlspec/driver/mixins/_query_tools.py +0 -796
  128. sqlspec/driver/mixins/_result_utils.py +0 -138
  129. sqlspec/driver/mixins/_storage.py +0 -912
  130. sqlspec/driver/mixins/_type_coercion.py +0 -128
  131. sqlspec/driver/parameters.py +0 -138
  132. sqlspec/statement/__init__.py +0 -21
  133. sqlspec/statement/builder/_merge.py +0 -95
  134. sqlspec/statement/cache.py +0 -50
  135. sqlspec/statement/filters.py +0 -625
  136. sqlspec/statement/parameters.py +0 -956
  137. sqlspec/statement/pipelines/__init__.py +0 -210
  138. sqlspec/statement/pipelines/analyzers/__init__.py +0 -9
  139. sqlspec/statement/pipelines/analyzers/_analyzer.py +0 -646
  140. sqlspec/statement/pipelines/context.py +0 -109
  141. sqlspec/statement/pipelines/transformers/__init__.py +0 -7
  142. sqlspec/statement/pipelines/transformers/_expression_simplifier.py +0 -88
  143. sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +0 -1247
  144. sqlspec/statement/pipelines/transformers/_remove_comments_and_hints.py +0 -76
  145. sqlspec/statement/pipelines/validators/__init__.py +0 -23
  146. sqlspec/statement/pipelines/validators/_dml_safety.py +0 -290
  147. sqlspec/statement/pipelines/validators/_parameter_style.py +0 -370
  148. sqlspec/statement/pipelines/validators/_performance.py +0 -714
  149. sqlspec/statement/pipelines/validators/_security.py +0 -967
  150. sqlspec/statement/result.py +0 -435
  151. sqlspec/statement/sql.py +0 -1774
  152. sqlspec/utils/cached_property.py +0 -25
  153. sqlspec/utils/statement_hashing.py +0 -203
  154. sqlspec-0.14.1.dist-info/RECORD +0 -145
  155. /sqlspec/{statement/builder → builder}/mixins/_delete_operations.py +0 -0
  156. {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/WHEEL +0 -0
  157. {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/entry_points.txt +0 -0
  158. {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/licenses/LICENSE +0 -0
  159. {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/licenses/NOTICE +0 -0
@@ -1,66 +1,301 @@
1
- """DuckDB database configuration with direct field-based configuration."""
1
+ """DuckDB database configuration with connection pooling."""
2
+ # ruff: noqa: D107 W293 RUF100 S110 PLR0913 FA100 BLE001 UP037 COM812 ARG002
2
3
 
3
4
  import logging
4
- from contextlib import contextmanager
5
- from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional, TypedDict
5
+ import threading
6
+ import time
7
+ from collections.abc import Sequence
8
+ from contextlib import contextmanager, suppress
9
+ from typing import TYPE_CHECKING, Any, Final, Optional, TypedDict, cast
6
10
 
7
11
  import duckdb
8
12
  from typing_extensions import NotRequired
9
13
 
10
- from sqlspec.adapters.duckdb.driver import DuckDBConnection, DuckDBDriver
11
- from sqlspec.config import NoPoolSyncConfig
12
- from sqlspec.statement.sql import SQLConfig
13
- from sqlspec.typing import DictRow, Empty
14
+ from sqlspec.adapters.duckdb._types import DuckDBConnection
15
+ from sqlspec.adapters.duckdb.driver import DuckDBCursor, DuckDBDriver, duckdb_statement_config
16
+ from sqlspec.config import SyncDatabaseConfig
14
17
 
15
18
  if TYPE_CHECKING:
16
- from collections.abc import Generator, Sequence
17
- from contextlib import AbstractContextManager
19
+ from collections.abc import Generator
20
+ from typing import Callable, ClassVar, Union
21
+
22
+ from sqlspec.core.statement import StatementConfig
18
23
 
19
24
 
20
25
  logger = logging.getLogger(__name__)
21
26
 
22
- __all__ = ("CONNECTION_FIELDS", "DuckDBConfig", "DuckDBExtensionConfig", "DuckDBSecretConfig")
23
-
24
-
25
- CONNECTION_FIELDS = frozenset(
26
- {
27
- "database",
28
- "read_only",
29
- "config",
30
- "memory_limit",
31
- "threads",
32
- "temp_directory",
33
- "max_temp_directory_size",
34
- "autoload_known_extensions",
35
- "autoinstall_known_extensions",
36
- "allow_community_extensions",
37
- "allow_unsigned_extensions",
38
- "extension_directory",
39
- "custom_extension_repository",
40
- "autoinstall_extension_repository",
41
- "allow_persistent_secrets",
42
- "enable_external_access",
43
- "secret_directory",
44
- "enable_object_cache",
45
- "parquet_metadata_cache",
46
- "enable_external_file_cache",
47
- "checkpoint_threshold",
48
- "enable_progress_bar",
49
- "progress_bar_time",
50
- "enable_logging",
51
- "log_query_path",
52
- "logging_level",
53
- "preserve_insertion_order",
54
- "default_null_order",
55
- "default_order",
56
- "ieee_floating_point_ops",
57
- "binary_as_string",
58
- "arrow_large_buffer_size",
59
- "errors_as_json",
60
- }
27
+ DEFAULT_MIN_POOL: Final[int] = 1
28
+ DEFAULT_MAX_POOL: Final[int] = 4
29
+ POOL_TIMEOUT: Final[float] = 30.0
30
+ POOL_RECYCLE: Final[int] = 86400
31
+
32
+ __all__ = (
33
+ "DuckDBConfig",
34
+ "DuckDBConnectionParams",
35
+ "DuckDBConnectionPool",
36
+ "DuckDBDriverFeatures",
37
+ "DuckDBExtensionConfig",
38
+ "DuckDBPoolParams",
39
+ "DuckDBSecretConfig",
61
40
  )
62
41
 
63
42
 
43
+ class DuckDBConnectionPool:
44
+ """Thread-local connection manager for DuckDB with performance optimizations.
45
+
46
+ Uses thread-local storage to ensure each thread gets its own DuckDB connection,
47
+ preventing the thread-safety issues that cause segmentation faults when
48
+ multiple cursors share the same connection concurrently.
49
+
50
+ This design trades traditional pooling for thread safety, which is essential
51
+ for DuckDB since connections and cursors are not thread-safe.
52
+ """
53
+
54
+ __slots__ = (
55
+ "_connection_config",
56
+ "_connection_times",
57
+ "_created_connections",
58
+ "_extensions",
59
+ "_lock",
60
+ "_on_connection_create",
61
+ "_recycle",
62
+ "_secrets",
63
+ "_thread_local",
64
+ )
65
+
66
+ def __init__( # noqa: PLR0913
67
+ self,
68
+ connection_config: "dict[str, Any]", # noqa: UP037
69
+ pool_min_size: int = DEFAULT_MIN_POOL,
70
+ pool_max_size: int = DEFAULT_MAX_POOL,
71
+ pool_timeout: float = POOL_TIMEOUT,
72
+ pool_recycle_seconds: int = POOL_RECYCLE,
73
+ extensions: "Optional[list[dict[str, Any]]]" = None, # noqa: FA100, UP037
74
+ secrets: "Optional[list[dict[str, Any]]]" = None, # noqa: FA100, UP037
75
+ on_connection_create: "Optional[Callable[[DuckDBConnection], None]]" = None, # noqa: FA100
76
+ ) -> None:
77
+ """Initialize the thread-local connection manager."""
78
+ self._connection_config = connection_config
79
+ self._recycle = pool_recycle_seconds
80
+ self._extensions = extensions or []
81
+ self._secrets = secrets or []
82
+ self._on_connection_create = on_connection_create
83
+ self._thread_local = threading.local()
84
+ self._lock = threading.RLock()
85
+ self._created_connections = 0
86
+ self._connection_times: "dict[int, float]" = {}
87
+
88
+ def _create_connection(self) -> DuckDBConnection:
89
+ """Create a new DuckDB connection with extensions and secrets."""
90
+ connect_parameters = {}
91
+ config_dict = {}
92
+
93
+ for key, value in self._connection_config.items():
94
+ if key in {"database", "read_only"}:
95
+ connect_parameters[key] = value
96
+ else:
97
+ config_dict[key] = value
98
+
99
+ if config_dict:
100
+ connect_parameters["config"] = config_dict
101
+
102
+ connection = duckdb.connect(**connect_parameters)
103
+
104
+ for ext_config in self._extensions:
105
+ ext_name = ext_config.get("name")
106
+ if not ext_name:
107
+ continue
108
+
109
+ install_kwargs = {}
110
+ if "version" in ext_config:
111
+ install_kwargs["version"] = ext_config["version"]
112
+ if "repository" in ext_config:
113
+ install_kwargs["repository"] = ext_config["repository"]
114
+ if ext_config.get("force_install", False):
115
+ install_kwargs["force_install"] = True
116
+
117
+ try:
118
+ if install_kwargs:
119
+ connection.install_extension(ext_name, **install_kwargs)
120
+ connection.load_extension(ext_name)
121
+ except Exception: # noqa: BLE001, S110
122
+ pass
123
+
124
+ for secret_config in self._secrets:
125
+ secret_type = secret_config.get("secret_type")
126
+ secret_name = secret_config.get("name")
127
+ secret_value = secret_config.get("value")
128
+
129
+ if not (secret_type and secret_name and secret_value):
130
+ continue
131
+
132
+ value_pairs = []
133
+ for key, value in secret_value.items():
134
+ escaped_value = str(value).replace("'", "''")
135
+ value_pairs.append(f"'{key}' = '{escaped_value}'")
136
+ value_string = ", ".join(value_pairs)
137
+ scope_clause = ""
138
+ if "scope" in secret_config:
139
+ scope_clause = f" SCOPE '{secret_config['scope']}'"
140
+
141
+ sql = f""" # noqa: S608
142
+ CREATE SECRET {secret_name} (
143
+ TYPE {secret_type},
144
+ {value_string}
145
+ ){scope_clause}
146
+ """
147
+ with suppress(Exception):
148
+ connection.execute(sql)
149
+
150
+ if self._on_connection_create:
151
+ with suppress(Exception):
152
+ self._on_connection_create(connection)
153
+
154
+ conn_id = id(connection)
155
+ with self._lock:
156
+ self._created_connections += 1
157
+ self._connection_times[conn_id] = time.time()
158
+
159
+ return connection
160
+
161
+ def _get_thread_connection(self) -> DuckDBConnection:
162
+ """Get or create a connection for the current thread.
163
+
164
+ Each thread gets its own dedicated DuckDB connection to prevent
165
+ thread-safety issues with concurrent cursor operations.
166
+ """
167
+ if not hasattr(self._thread_local, "connection"):
168
+ self._thread_local.connection = self._create_connection()
169
+ self._thread_local.created_at = time.time()
170
+
171
+ # Check if connection needs recycling
172
+ if self._recycle > 0 and time.time() - self._thread_local.created_at > self._recycle:
173
+ with suppress(Exception):
174
+ self._thread_local.connection.close()
175
+ self._thread_local.connection = self._create_connection()
176
+ self._thread_local.created_at = time.time()
177
+
178
+ return cast("DuckDBConnection", self._thread_local.connection)
179
+
180
+ def _close_thread_connection(self) -> None:
181
+ """Close the connection for the current thread."""
182
+ if hasattr(self._thread_local, "connection"):
183
+ with suppress(Exception):
184
+ self._thread_local.connection.close()
185
+ del self._thread_local.connection
186
+ if hasattr(self._thread_local, "created_at"):
187
+ del self._thread_local.created_at
188
+
189
+ def _is_connection_alive(self, connection: DuckDBConnection) -> bool:
190
+ """Check if a connection is still alive and usable.
191
+
192
+ Args:
193
+ connection: Connection to check
194
+
195
+ Returns:
196
+ True if connection is alive, False otherwise
197
+ """
198
+ try:
199
+ cursor = connection.cursor()
200
+ cursor.close()
201
+ except Exception:
202
+ return False
203
+ return True
204
+
205
+ @contextmanager
206
+ def get_connection(self) -> "Generator[DuckDBConnection, None, None]":
207
+ """Get a thread-local connection.
208
+
209
+ Each thread gets its own dedicated DuckDB connection to prevent
210
+ thread-safety issues with concurrent cursor operations.
211
+
212
+ Yields:
213
+ DuckDBConnection: A thread-local connection.
214
+ """
215
+ connection = self._get_thread_connection()
216
+ try:
217
+ yield connection
218
+ except Exception:
219
+ # On error, close and recreate connection for this thread
220
+ self._close_thread_connection()
221
+ raise
222
+
223
+ def close(self) -> None:
224
+ """Close the thread-local connection if it exists."""
225
+ self._close_thread_connection()
226
+
227
+ def size(self) -> int:
228
+ """Get current pool size (always 1 for thread-local)."""
229
+ return 1 if hasattr(self._thread_local, "connection") else 0
230
+
231
+ def checked_out(self) -> int:
232
+ """Get number of checked out connections (always 0 for thread-local)."""
233
+ return 0
234
+
235
+ def acquire(self) -> DuckDBConnection:
236
+ """Acquire a thread-local connection.
237
+
238
+ Each thread gets its own dedicated DuckDB connection to prevent
239
+ thread-safety issues with concurrent cursor operations.
240
+
241
+ Returns:
242
+ DuckDBConnection: A thread-local connection
243
+ """
244
+ return self._get_thread_connection()
245
+
246
+
247
+ class DuckDBConnectionParams(TypedDict, total=False):
248
+ """DuckDB connection parameters."""
249
+
250
+ database: NotRequired[str]
251
+ read_only: NotRequired[bool]
252
+ config: NotRequired[dict[str, Any]]
253
+ memory_limit: NotRequired[str]
254
+ threads: NotRequired[int]
255
+ temp_directory: NotRequired[str]
256
+ max_temp_directory_size: NotRequired[str]
257
+ autoload_known_extensions: NotRequired[bool]
258
+ autoinstall_known_extensions: NotRequired[bool]
259
+ allow_community_extensions: NotRequired[bool]
260
+ allow_unsigned_extensions: NotRequired[bool]
261
+ extension_directory: NotRequired[str]
262
+ custom_extension_repository: NotRequired[str]
263
+ autoinstall_extension_repository: NotRequired[str]
264
+ allow_persistent_secrets: NotRequired[bool]
265
+ enable_external_access: NotRequired[bool]
266
+ secret_directory: NotRequired[str]
267
+ enable_object_cache: NotRequired[bool]
268
+ parquet_metadata_cache: NotRequired[str]
269
+ enable_external_file_cache: NotRequired[bool]
270
+ checkpoint_threshold: NotRequired[str]
271
+ enable_progress_bar: NotRequired[bool]
272
+ progress_bar_time: NotRequired[float]
273
+ enable_logging: NotRequired[bool]
274
+ log_query_path: NotRequired[str]
275
+ logging_level: NotRequired[str]
276
+ preserve_insertion_order: NotRequired[bool]
277
+ default_null_order: NotRequired[str]
278
+ default_order: NotRequired[str]
279
+ ieee_floating_point_ops: NotRequired[bool]
280
+ binary_as_string: NotRequired[bool]
281
+ arrow_large_buffer_size: NotRequired[bool]
282
+ errors_as_json: NotRequired[bool]
283
+ extra: NotRequired[dict[str, Any]]
284
+
285
+
286
+ class DuckDBPoolParams(DuckDBConnectionParams, total=False):
287
+ """Complete pool configuration for DuckDB adapter.
288
+
289
+ Combines standardized pool parameters with DuckDB-specific connection parameters.
290
+ """
291
+
292
+ # Standardized pool parameters (consistent across ALL adapters)
293
+ pool_min_size: NotRequired[int]
294
+ pool_max_size: NotRequired[int]
295
+ pool_timeout: NotRequired[float]
296
+ pool_recycle_seconds: NotRequired[int]
297
+
298
+
64
299
  class DuckDBExtensionConfig(TypedDict, total=False):
65
300
  """DuckDB extension configuration for auto-management."""
66
301
 
@@ -93,309 +328,137 @@ class DuckDBSecretConfig(TypedDict, total=False):
93
328
  """Scope of the secret (LOCAL or PERSISTENT)."""
94
329
 
95
330
 
96
- class DuckDBConfig(NoPoolSyncConfig[DuckDBConnection, DuckDBDriver]):
97
- """Enhanced DuckDB configuration with intelligent features and modern architecture.
331
+ class DuckDBDriverFeatures(TypedDict, total=False):
332
+ """TypedDict for DuckDB driver features configuration."""
333
+
334
+ extensions: NotRequired[Sequence[DuckDBExtensionConfig]]
335
+ """List of extensions to install/load on connection creation."""
336
+ secrets: NotRequired[Sequence[DuckDBSecretConfig]]
337
+ """List of secrets to create for AI/API integrations."""
338
+ on_connection_create: NotRequired["Callable[[DuckDBConnection], Optional[DuckDBConnection]]"]
339
+ """Callback executed when connection is created."""
340
+
341
+
342
+ class DuckDBConfig(SyncDatabaseConfig[DuckDBConnection, DuckDBConnectionPool, DuckDBDriver]):
343
+ """Enhanced DuckDB configuration with connection pooling and intelligent features.
98
344
 
99
- DuckDB is an embedded analytical database that doesn't require connection pooling.
100
345
  This configuration supports all of DuckDB's unique features including:
101
346
 
347
+ - Connection pooling optimized for DuckDB's architecture
102
348
  - Extension auto-management and installation
103
349
  - Secret management for API integrations
104
350
  - Intelligent auto configuration settings
105
351
  - High-performance Arrow integration
106
352
  - Direct file querying capabilities
107
353
  - Performance optimizations for analytics workloads
108
- """
109
-
110
- is_async: ClassVar[bool] = False
111
- supports_connection_pooling: ClassVar[bool] = False
112
354
 
113
- driver_type: type[DuckDBDriver] = DuckDBDriver
114
- connection_type: type[DuckDBConnection] = DuckDBConnection
115
-
116
- supported_parameter_styles: ClassVar[tuple[str, ...]] = ("qmark", "numeric")
117
- """DuckDB supports ? (qmark) and $1, $2 (numeric) parameter styles."""
355
+ DuckDB Connection Pool Best Practices:
356
+ - DuckDB performs best with long-lived connections that maintain cache
357
+ - Default pool size is 1-4 connections (DuckDB is optimized for single connection)
358
+ - Connection recycling is set to 24 hours by default (set to 0 to disable)
359
+ - Shared memory databases use `:memory:shared_db` for proper concurrency
360
+ - Health checks are minimized to reduce overhead
361
+ """
118
362
 
119
- default_parameter_style: ClassVar[str] = "qmark"
120
- """DuckDB's native parameter style is ? (qmark)."""
363
+ driver_type: "ClassVar[type[DuckDBDriver]]" = DuckDBDriver
364
+ connection_type: "ClassVar[type[DuckDBConnection]]" = DuckDBConnection
121
365
 
122
366
  def __init__(
123
367
  self,
124
- statement_config: "Optional[SQLConfig]" = None,
125
- default_row_type: type[DictRow] = DictRow,
126
- # Core connection parameters
127
- database: Optional[str] = None,
128
- read_only: Optional[bool] = None,
129
- config: Optional[dict[str, Any]] = None,
130
- # Resource management
131
- memory_limit: Optional[str] = None,
132
- threads: Optional[int] = None,
133
- temp_directory: Optional[str] = None,
134
- max_temp_directory_size: Optional[str] = None,
135
- # Extension configuration
136
- autoload_known_extensions: Optional[bool] = None,
137
- autoinstall_known_extensions: Optional[bool] = None,
138
- allow_community_extensions: Optional[bool] = None,
139
- allow_unsigned_extensions: Optional[bool] = None,
140
- extension_directory: Optional[str] = None,
141
- custom_extension_repository: Optional[str] = None,
142
- autoinstall_extension_repository: Optional[str] = None,
143
- # Security and access
144
- allow_persistent_secrets: Optional[bool] = None,
145
- enable_external_access: Optional[bool] = None,
146
- secret_directory: Optional[str] = None,
147
- # Performance optimizations
148
- enable_object_cache: Optional[bool] = None,
149
- parquet_metadata_cache: Optional[bool] = None,
150
- enable_external_file_cache: Optional[bool] = None,
151
- checkpoint_threshold: Optional[str] = None,
152
- # User experience
153
- enable_progress_bar: Optional[bool] = None,
154
- progress_bar_time: Optional[int] = None,
155
- # Logging and debugging
156
- enable_logging: Optional[bool] = None,
157
- log_query_path: Optional[str] = None,
158
- logging_level: Optional[str] = None,
159
- # Data processing settings
160
- preserve_insertion_order: Optional[bool] = None,
161
- default_null_order: Optional[str] = None,
162
- default_order: Optional[str] = None,
163
- ieee_floating_point_ops: Optional[bool] = None,
164
- # File format settings
165
- binary_as_string: Optional[bool] = None,
166
- arrow_large_buffer_size: Optional[bool] = None,
167
- # Error handling
168
- errors_as_json: Optional[bool] = None,
169
- # DuckDB intelligent features
170
- extensions: "Optional[Sequence[DuckDBExtensionConfig]]" = None,
171
- secrets: "Optional[Sequence[DuckDBSecretConfig]]" = None,
172
- on_connection_create: "Optional[Callable[[DuckDBConnection], Optional[DuckDBConnection]]]" = None,
173
- **kwargs: Any,
368
+ *,
369
+ pool_config: "Optional[Union[DuckDBPoolParams, dict[str, Any]]]" = None,
370
+ migration_config: Optional[dict[str, Any]] = None,
371
+ pool_instance: "Optional[DuckDBConnectionPool]" = None,
372
+ statement_config: "Optional[StatementConfig]" = None,
373
+ driver_features: "Optional[Union[DuckDBDriverFeatures, dict[str, Any]]]" = None,
174
374
  ) -> None:
175
- """Initialize DuckDB configuration with intelligent features.
176
-
177
- Args:
178
- statement_config: Default SQL statement configuration
179
- default_row_type: Default row type for results
180
- database: Path to the DuckDB database file. Use ':memory:' for in-memory database
181
- read_only: Whether to open the database in read-only mode
182
- config: DuckDB configuration options passed directly to the connection
183
- memory_limit: Maximum memory usage (e.g., '1GB', '80% of RAM')
184
- threads: Number of threads to use for parallel query execution
185
- temp_directory: Directory for temporary files during spilling
186
- max_temp_directory_size: Maximum size of temp directory (e.g., '1GB')
187
- autoload_known_extensions: Automatically load known extensions when needed
188
- autoinstall_known_extensions: Automatically install known extensions when needed
189
- allow_community_extensions: Allow community-built extensions
190
- allow_unsigned_extensions: Allow unsigned extensions (development only)
191
- extension_directory: Directory to store extensions
192
- custom_extension_repository: Custom endpoint for extension installation
193
- autoinstall_extension_repository: Override endpoint for autoloading extensions
194
- allow_persistent_secrets: Enable persistent secret storage
195
- enable_external_access: Allow external file system access
196
- secret_directory: Directory for persistent secrets
197
- enable_object_cache: Enable caching of objects (e.g., Parquet metadata)
198
- parquet_metadata_cache: Cache Parquet metadata for repeated access
199
- enable_external_file_cache: Cache external files in memory
200
- checkpoint_threshold: WAL size threshold for automatic checkpoints
201
- enable_progress_bar: Show progress bar for long queries
202
- progress_bar_time: Time in milliseconds before showing progress bar
203
- enable_logging: Enable DuckDB logging
204
- log_query_path: Path to log queries for debugging
205
- logging_level: Log level (DEBUG, INFO, WARNING, ERROR)
206
- preserve_insertion_order: Whether to preserve insertion order in results
207
- default_null_order: Default NULL ordering (NULLS_FIRST, NULLS_LAST)
208
- default_order: Default sort order (ASC, DESC)
209
- ieee_floating_point_ops: Use IEEE 754 compliant floating point operations
210
- binary_as_string: Interpret binary data as string in Parquet files
211
- arrow_large_buffer_size: Use large Arrow buffers for strings, blobs, etc.
212
- errors_as_json: Return errors in JSON format
213
- extensions: List of extension dicts to auto-install/load with keys: name, version, repository, force_install
214
- secrets: List of secret dicts for AI/API integrations with keys: secret_type, name, value, scope
215
- on_connection_create: Callback executed when connection is created
216
- **kwargs: Additional parameters (stored in extras)
217
-
218
- Example:
219
- >>> config = DuckDBConfig(
220
- ... database=":memory:",
221
- ... memory_limit="1GB",
222
- ... threads=4,
223
- ... autoload_known_extensions=True,
224
- ... extensions=[
225
- ... {"name": "spatial", "repository": "core"},
226
- ... {"name": "aws", "repository": "core"},
227
- ... ],
228
- ... secrets=[
229
- ... {
230
- ... "secret_type": "openai",
231
- ... "name": "my_openai_secret",
232
- ... "value": {"api_key": "sk-..."},
233
- ... }
234
- ... ],
235
- ... )
236
- """
237
- # Store connection parameters as instance attributes
238
- self.database = database or ":memory:"
239
- self.read_only = read_only
240
- self.config = config
241
- self.memory_limit = memory_limit
242
- self.threads = threads
243
- self.temp_directory = temp_directory
244
- self.max_temp_directory_size = max_temp_directory_size
245
- self.autoload_known_extensions = autoload_known_extensions
246
- self.autoinstall_known_extensions = autoinstall_known_extensions
247
- self.allow_community_extensions = allow_community_extensions
248
- self.allow_unsigned_extensions = allow_unsigned_extensions
249
- self.extension_directory = extension_directory
250
- self.custom_extension_repository = custom_extension_repository
251
- self.autoinstall_extension_repository = autoinstall_extension_repository
252
- self.allow_persistent_secrets = allow_persistent_secrets
253
- self.enable_external_access = enable_external_access
254
- self.secret_directory = secret_directory
255
- self.enable_object_cache = enable_object_cache
256
- self.parquet_metadata_cache = parquet_metadata_cache
257
- self.enable_external_file_cache = enable_external_file_cache
258
- self.checkpoint_threshold = checkpoint_threshold
259
- self.enable_progress_bar = enable_progress_bar
260
- self.progress_bar_time = progress_bar_time
261
- self.enable_logging = enable_logging
262
- self.log_query_path = log_query_path
263
- self.logging_level = logging_level
264
- self.preserve_insertion_order = preserve_insertion_order
265
- self.default_null_order = default_null_order
266
- self.default_order = default_order
267
- self.ieee_floating_point_ops = ieee_floating_point_ops
268
- self.binary_as_string = binary_as_string
269
- self.arrow_large_buffer_size = arrow_large_buffer_size
270
- self.errors_as_json = errors_as_json
271
-
272
- self.extras = kwargs or {}
273
-
274
- # Store other config
275
- self.statement_config = statement_config or SQLConfig()
276
- self.default_row_type = default_row_type
277
-
278
- # DuckDB intelligent features
279
- self.extensions = extensions or []
280
- self.secrets = secrets or []
281
- self.on_connection_create = on_connection_create
282
-
283
- super().__init__()
284
-
285
- @property
286
- def connection_config_dict(self) -> dict[str, Any]:
287
- """Return the connection configuration as a dict for duckdb.connect()."""
288
- # DuckDB connect() only accepts database, read_only, and config parameters
289
- connect_params: dict[str, Any] = {}
290
-
291
- if hasattr(self, "database") and self.database is not None:
292
- connect_params["database"] = self.database
293
-
294
- if hasattr(self, "read_only") and self.read_only is not None:
295
- connect_params["read_only"] = self.read_only
296
-
297
- # All other parameters go into the config dict
298
- config_dict = {}
299
- for field in CONNECTION_FIELDS:
300
- if field not in {"database", "read_only", "config"}:
301
- value = getattr(self, field, None)
302
- if value is not None and value is not Empty:
303
- config_dict[field] = value
375
+ """Initialize DuckDB configuration with intelligent features."""
376
+ if pool_config is None:
377
+ pool_config = {}
378
+ if "database" not in pool_config:
379
+ pool_config["database"] = ":memory:shared_db"
380
+
381
+ if pool_config.get("database") in {":memory:", ""}:
382
+ pool_config["database"] = ":memory:shared_db"
383
+
384
+ super().__init__(
385
+ pool_config=dict(pool_config),
386
+ pool_instance=pool_instance,
387
+ migration_config=migration_config,
388
+ statement_config=statement_config or duckdb_statement_config,
389
+ driver_features=cast("dict[str, Any]", driver_features),
390
+ )
391
+
392
+ def _get_connection_config_dict(self) -> "dict[str, Any]":
393
+ """Get connection configuration as plain dict for pool creation."""
394
+ return {
395
+ k: v
396
+ for k, v in self.pool_config.items()
397
+ if v is not None
398
+ and k not in {"pool_min_size", "pool_max_size", "pool_timeout", "pool_recycle_seconds", "extra"}
399
+ }
400
+
401
+ def _get_pool_config_dict(self) -> "dict[str, Any]":
402
+ """Get pool configuration as plain dict for pool creation."""
403
+ return {
404
+ k: v
405
+ for k, v in self.pool_config.items()
406
+ if v is not None and k in {"pool_min_size", "pool_max_size", "pool_timeout", "pool_recycle_seconds"}
407
+ }
408
+
409
+ def _create_pool(self) -> DuckDBConnectionPool:
410
+ """Create the DuckDB connection pool."""
411
+
412
+ extensions = self.driver_features.get("extensions", None)
413
+ secrets = self.driver_features.get("secrets", None)
414
+ on_connection_create = self.driver_features.get("on_connection_create", None)
415
+
416
+ extensions_dicts = [dict(ext) for ext in extensions] if extensions else None
417
+ secrets_dicts = [dict(secret) for secret in secrets] if secrets else None
418
+
419
+ pool_callback = None
420
+ if on_connection_create:
421
+
422
+ def wrapped_callback(conn: DuckDBConnection) -> None:
423
+ on_connection_create(conn)
424
+
425
+ pool_callback = wrapped_callback
426
+ conf = {"extensions": extensions_dicts, "secrets": secrets_dicts, "on_connection_create": pool_callback}
427
+
428
+ return DuckDBConnectionPool(
429
+ connection_config=self._get_connection_config_dict(),
430
+ **conf, # type: ignore[arg-type]
431
+ **self._get_pool_config_dict(),
432
+ )
433
+
434
+ def _close_pool(self) -> None:
435
+ """Close the connection pool."""
436
+ if self.pool_instance:
437
+ self.pool_instance.close()
304
438
 
305
- config_dict.update(self.extras)
306
-
307
- # If we have config parameters, add them
308
- if config_dict:
309
- connect_params["config"] = config_dict
439
+ def create_connection(self) -> DuckDBConnection:
440
+ """Get a DuckDB connection from the pool.
310
441
 
311
- return connect_params
442
+ This method ensures the pool is created and returns a connection
443
+ from the pool. The connection is checked out from the pool and must
444
+ be properly managed by the caller.
312
445
 
313
- def create_connection(self) -> DuckDBConnection:
314
- """Create and return a DuckDB connection with intelligent configuration applied."""
446
+ Returns:
447
+ DuckDBConnection: A connection from the pool
315
448
 
316
- logger.info("Creating DuckDB connection", extra={"adapter": "duckdb"})
449
+ Note:
450
+ For automatic connection management, prefer using provide_connection()
451
+ or provide_session() which handle returning connections to the pool.
452
+ The caller is responsible for returning the connection to the pool
453
+ using pool.release(connection) when done.
454
+ """
455
+ pool = self.provide_pool()
317
456
 
318
- try:
319
- config_dict = self.connection_config_dict
320
- connection = duckdb.connect(**config_dict)
321
- logger.info("DuckDB connection created successfully", extra={"adapter": "duckdb"})
322
-
323
- # Install and load extensions
324
- for ext_config in self.extensions:
325
- ext_name = None
326
- try:
327
- ext_name = ext_config.get("name")
328
- if not ext_name:
329
- continue
330
- install_kwargs: dict[str, Any] = {}
331
- if "version" in ext_config:
332
- install_kwargs["version"] = ext_config["version"]
333
- if "repository" in ext_config:
334
- install_kwargs["repository"] = ext_config["repository"]
335
- if ext_config.get("force_install", False):
336
- install_kwargs["force_install"] = True
337
-
338
- if install_kwargs or self.autoinstall_known_extensions:
339
- connection.install_extension(ext_name, **install_kwargs)
340
- connection.load_extension(ext_name)
341
- logger.debug("Loaded DuckDB extension: %s", ext_name, extra={"adapter": "duckdb"})
342
-
343
- except Exception as e:
344
- if ext_name:
345
- logger.warning(
346
- "Failed to load DuckDB extension: %s",
347
- ext_name,
348
- extra={"adapter": "duckdb", "error": str(e)},
349
- )
350
-
351
- for secret_config in self.secrets:
352
- secret_name = None
353
- try:
354
- secret_type = secret_config.get("secret_type")
355
- secret_name = secret_config.get("name")
356
- secret_value = secret_config.get("value")
357
-
358
- if secret_type and secret_name and secret_value:
359
- value_pairs = []
360
- for key, value in secret_value.items():
361
- escaped_value = str(value).replace("'", "''")
362
- value_pairs.append(f"'{key}' = '{escaped_value}'")
363
- value_string = ", ".join(value_pairs)
364
- scope_clause = ""
365
- if "scope" in secret_config:
366
- scope_clause = f" SCOPE '{secret_config['scope']}'"
367
-
368
- sql = f"""
369
- CREATE SECRET {secret_name} (
370
- TYPE {secret_type},
371
- {value_string}
372
- ){scope_clause}
373
- """
374
- connection.execute(sql)
375
- logger.debug("Created DuckDB secret: %s", secret_name, extra={"adapter": "duckdb"})
376
-
377
- except Exception as e:
378
- if secret_name:
379
- logger.warning(
380
- "Failed to create DuckDB secret: %s",
381
- secret_name,
382
- extra={"adapter": "duckdb", "error": str(e)},
383
- )
384
- if self.on_connection_create:
385
- try:
386
- self.on_connection_create(connection)
387
- logger.debug("Executed connection creation hook", extra={"adapter": "duckdb"})
388
- except Exception as e:
389
- logger.warning("Connection creation hook failed", extra={"adapter": "duckdb", "error": str(e)})
390
-
391
- except Exception as e:
392
- logger.exception("Failed to create DuckDB connection", extra={"adapter": "duckdb", "error": str(e)})
393
- raise
394
- return connection
457
+ return pool.acquire()
395
458
 
396
459
  @contextmanager
397
460
  def provide_connection(self, *args: Any, **kwargs: Any) -> "Generator[DuckDBConnection, None, None]":
398
- """Provide a DuckDB connection context manager.
461
+ """Provide a pooled DuckDB connection context manager.
399
462
 
400
463
  Args:
401
464
  *args: Additional arguments.
@@ -404,37 +467,38 @@ class DuckDBConfig(NoPoolSyncConfig[DuckDBConnection, DuckDBDriver]):
404
467
  Yields:
405
468
  A DuckDB connection instance.
406
469
  """
407
- connection = self.create_connection()
408
- try:
470
+ pool = self.provide_pool()
471
+ with pool.get_connection() as connection:
409
472
  yield connection
410
- finally:
411
- connection.close()
412
473
 
413
- def provide_session(self, *args: Any, **kwargs: Any) -> "AbstractContextManager[DuckDBDriver]":
474
+ @contextmanager
475
+ def provide_session(
476
+ self, *args: Any, statement_config: "Optional[StatementConfig]" = None, **kwargs: Any
477
+ ) -> "Generator[DuckDBDriver, None, None]":
414
478
  """Provide a DuckDB driver session context manager.
415
479
 
416
480
  Args:
417
481
  *args: Additional arguments.
482
+ statement_config: Optional statement configuration override.
418
483
  **kwargs: Additional keyword arguments.
419
484
 
420
- Returns:
485
+ Yields:
421
486
  A context manager that yields a DuckDBDriver instance.
422
487
  """
488
+ with self.provide_connection(*args, **kwargs) as connection:
489
+ driver = self.driver_type(connection=connection, statement_config=statement_config or self.statement_config)
490
+ yield driver
491
+
492
+ def get_signature_namespace(self) -> "dict[str, type[Any]]":
493
+ """Get the signature namespace for DuckDB types.
494
+
495
+ This provides all DuckDB-specific types that Litestar needs to recognize
496
+ to avoid serialization attempts.
497
+
498
+ Returns:
499
+ Dictionary mapping type names to types.
500
+ """
423
501
 
424
- @contextmanager
425
- def session_manager() -> "Generator[DuckDBDriver, None, None]":
426
- with self.provide_connection(*args, **kwargs) as connection:
427
- statement_config = self.statement_config
428
- # Inject parameter style info if not already set
429
- if statement_config.allowed_parameter_styles is None:
430
- from dataclasses import replace
431
-
432
- statement_config = replace(
433
- statement_config,
434
- allowed_parameter_styles=self.supported_parameter_styles,
435
- default_parameter_style=self.default_parameter_style,
436
- )
437
- driver = self.driver_type(connection=connection, config=statement_config)
438
- yield driver
439
-
440
- return session_manager()
502
+ namespace = super().get_signature_namespace()
503
+ namespace.update({"DuckDBConnection": DuckDBConnection, "DuckDBCursor": DuckDBCursor})
504
+ return namespace