sqlspec 0.11.0__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

Files changed (155) hide show
  1. sqlspec/__init__.py +16 -3
  2. sqlspec/_serialization.py +3 -10
  3. sqlspec/_sql.py +1147 -0
  4. sqlspec/_typing.py +343 -41
  5. sqlspec/adapters/adbc/__init__.py +2 -6
  6. sqlspec/adapters/adbc/config.py +474 -149
  7. sqlspec/adapters/adbc/driver.py +330 -644
  8. sqlspec/adapters/aiosqlite/__init__.py +2 -6
  9. sqlspec/adapters/aiosqlite/config.py +143 -57
  10. sqlspec/adapters/aiosqlite/driver.py +269 -462
  11. sqlspec/adapters/asyncmy/__init__.py +3 -8
  12. sqlspec/adapters/asyncmy/config.py +247 -202
  13. sqlspec/adapters/asyncmy/driver.py +217 -451
  14. sqlspec/adapters/asyncpg/__init__.py +4 -7
  15. sqlspec/adapters/asyncpg/config.py +329 -176
  16. sqlspec/adapters/asyncpg/driver.py +418 -498
  17. sqlspec/adapters/bigquery/__init__.py +2 -2
  18. sqlspec/adapters/bigquery/config.py +407 -0
  19. sqlspec/adapters/bigquery/driver.py +592 -634
  20. sqlspec/adapters/duckdb/__init__.py +4 -1
  21. sqlspec/adapters/duckdb/config.py +432 -321
  22. sqlspec/adapters/duckdb/driver.py +393 -436
  23. sqlspec/adapters/oracledb/__init__.py +3 -8
  24. sqlspec/adapters/oracledb/config.py +625 -0
  25. sqlspec/adapters/oracledb/driver.py +549 -942
  26. sqlspec/adapters/psqlpy/__init__.py +4 -7
  27. sqlspec/adapters/psqlpy/config.py +372 -203
  28. sqlspec/adapters/psqlpy/driver.py +197 -550
  29. sqlspec/adapters/psycopg/__init__.py +3 -8
  30. sqlspec/adapters/psycopg/config.py +741 -0
  31. sqlspec/adapters/psycopg/driver.py +732 -733
  32. sqlspec/adapters/sqlite/__init__.py +2 -6
  33. sqlspec/adapters/sqlite/config.py +146 -81
  34. sqlspec/adapters/sqlite/driver.py +243 -426
  35. sqlspec/base.py +220 -825
  36. sqlspec/config.py +354 -0
  37. sqlspec/driver/__init__.py +22 -0
  38. sqlspec/driver/_async.py +252 -0
  39. sqlspec/driver/_common.py +338 -0
  40. sqlspec/driver/_sync.py +261 -0
  41. sqlspec/driver/mixins/__init__.py +17 -0
  42. sqlspec/driver/mixins/_pipeline.py +523 -0
  43. sqlspec/driver/mixins/_result_utils.py +122 -0
  44. sqlspec/driver/mixins/_sql_translator.py +35 -0
  45. sqlspec/driver/mixins/_storage.py +993 -0
  46. sqlspec/driver/mixins/_type_coercion.py +131 -0
  47. sqlspec/exceptions.py +299 -7
  48. sqlspec/extensions/aiosql/__init__.py +10 -0
  49. sqlspec/extensions/aiosql/adapter.py +474 -0
  50. sqlspec/extensions/litestar/__init__.py +1 -6
  51. sqlspec/extensions/litestar/_utils.py +1 -5
  52. sqlspec/extensions/litestar/config.py +5 -6
  53. sqlspec/extensions/litestar/handlers.py +13 -12
  54. sqlspec/extensions/litestar/plugin.py +22 -24
  55. sqlspec/extensions/litestar/providers.py +37 -55
  56. sqlspec/loader.py +528 -0
  57. sqlspec/service/__init__.py +3 -0
  58. sqlspec/service/base.py +24 -0
  59. sqlspec/service/pagination.py +26 -0
  60. sqlspec/statement/__init__.py +21 -0
  61. sqlspec/statement/builder/__init__.py +54 -0
  62. sqlspec/statement/builder/_ddl_utils.py +119 -0
  63. sqlspec/statement/builder/_parsing_utils.py +135 -0
  64. sqlspec/statement/builder/base.py +328 -0
  65. sqlspec/statement/builder/ddl.py +1379 -0
  66. sqlspec/statement/builder/delete.py +80 -0
  67. sqlspec/statement/builder/insert.py +274 -0
  68. sqlspec/statement/builder/merge.py +95 -0
  69. sqlspec/statement/builder/mixins/__init__.py +65 -0
  70. sqlspec/statement/builder/mixins/_aggregate_functions.py +151 -0
  71. sqlspec/statement/builder/mixins/_case_builder.py +91 -0
  72. sqlspec/statement/builder/mixins/_common_table_expr.py +91 -0
  73. sqlspec/statement/builder/mixins/_delete_from.py +34 -0
  74. sqlspec/statement/builder/mixins/_from.py +61 -0
  75. sqlspec/statement/builder/mixins/_group_by.py +119 -0
  76. sqlspec/statement/builder/mixins/_having.py +35 -0
  77. sqlspec/statement/builder/mixins/_insert_from_select.py +48 -0
  78. sqlspec/statement/builder/mixins/_insert_into.py +36 -0
  79. sqlspec/statement/builder/mixins/_insert_values.py +69 -0
  80. sqlspec/statement/builder/mixins/_join.py +110 -0
  81. sqlspec/statement/builder/mixins/_limit_offset.py +53 -0
  82. sqlspec/statement/builder/mixins/_merge_clauses.py +405 -0
  83. sqlspec/statement/builder/mixins/_order_by.py +46 -0
  84. sqlspec/statement/builder/mixins/_pivot.py +82 -0
  85. sqlspec/statement/builder/mixins/_returning.py +37 -0
  86. sqlspec/statement/builder/mixins/_select_columns.py +60 -0
  87. sqlspec/statement/builder/mixins/_set_ops.py +122 -0
  88. sqlspec/statement/builder/mixins/_unpivot.py +80 -0
  89. sqlspec/statement/builder/mixins/_update_from.py +54 -0
  90. sqlspec/statement/builder/mixins/_update_set.py +91 -0
  91. sqlspec/statement/builder/mixins/_update_table.py +29 -0
  92. sqlspec/statement/builder/mixins/_where.py +374 -0
  93. sqlspec/statement/builder/mixins/_window_functions.py +86 -0
  94. sqlspec/statement/builder/protocols.py +20 -0
  95. sqlspec/statement/builder/select.py +206 -0
  96. sqlspec/statement/builder/update.py +178 -0
  97. sqlspec/statement/filters.py +571 -0
  98. sqlspec/statement/parameters.py +736 -0
  99. sqlspec/statement/pipelines/__init__.py +67 -0
  100. sqlspec/statement/pipelines/analyzers/__init__.py +9 -0
  101. sqlspec/statement/pipelines/analyzers/_analyzer.py +649 -0
  102. sqlspec/statement/pipelines/base.py +315 -0
  103. sqlspec/statement/pipelines/context.py +119 -0
  104. sqlspec/statement/pipelines/result_types.py +41 -0
  105. sqlspec/statement/pipelines/transformers/__init__.py +8 -0
  106. sqlspec/statement/pipelines/transformers/_expression_simplifier.py +256 -0
  107. sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +623 -0
  108. sqlspec/statement/pipelines/transformers/_remove_comments.py +66 -0
  109. sqlspec/statement/pipelines/transformers/_remove_hints.py +81 -0
  110. sqlspec/statement/pipelines/validators/__init__.py +23 -0
  111. sqlspec/statement/pipelines/validators/_dml_safety.py +275 -0
  112. sqlspec/statement/pipelines/validators/_parameter_style.py +297 -0
  113. sqlspec/statement/pipelines/validators/_performance.py +703 -0
  114. sqlspec/statement/pipelines/validators/_security.py +990 -0
  115. sqlspec/statement/pipelines/validators/base.py +67 -0
  116. sqlspec/statement/result.py +527 -0
  117. sqlspec/statement/splitter.py +701 -0
  118. sqlspec/statement/sql.py +1198 -0
  119. sqlspec/storage/__init__.py +15 -0
  120. sqlspec/storage/backends/__init__.py +0 -0
  121. sqlspec/storage/backends/base.py +166 -0
  122. sqlspec/storage/backends/fsspec.py +315 -0
  123. sqlspec/storage/backends/obstore.py +464 -0
  124. sqlspec/storage/protocol.py +170 -0
  125. sqlspec/storage/registry.py +315 -0
  126. sqlspec/typing.py +157 -36
  127. sqlspec/utils/correlation.py +155 -0
  128. sqlspec/utils/deprecation.py +3 -6
  129. sqlspec/utils/fixtures.py +6 -11
  130. sqlspec/utils/logging.py +135 -0
  131. sqlspec/utils/module_loader.py +45 -43
  132. sqlspec/utils/serializers.py +4 -0
  133. sqlspec/utils/singleton.py +6 -8
  134. sqlspec/utils/sync_tools.py +15 -27
  135. sqlspec/utils/text.py +58 -26
  136. {sqlspec-0.11.0.dist-info → sqlspec-0.12.0.dist-info}/METADATA +100 -26
  137. sqlspec-0.12.0.dist-info/RECORD +145 -0
  138. sqlspec/adapters/bigquery/config/__init__.py +0 -3
  139. sqlspec/adapters/bigquery/config/_common.py +0 -40
  140. sqlspec/adapters/bigquery/config/_sync.py +0 -87
  141. sqlspec/adapters/oracledb/config/__init__.py +0 -9
  142. sqlspec/adapters/oracledb/config/_asyncio.py +0 -186
  143. sqlspec/adapters/oracledb/config/_common.py +0 -131
  144. sqlspec/adapters/oracledb/config/_sync.py +0 -186
  145. sqlspec/adapters/psycopg/config/__init__.py +0 -19
  146. sqlspec/adapters/psycopg/config/_async.py +0 -169
  147. sqlspec/adapters/psycopg/config/_common.py +0 -56
  148. sqlspec/adapters/psycopg/config/_sync.py +0 -168
  149. sqlspec/filters.py +0 -330
  150. sqlspec/mixins.py +0 -306
  151. sqlspec/statement.py +0 -378
  152. sqlspec-0.11.0.dist-info/RECORD +0 -69
  153. {sqlspec-0.11.0.dist-info → sqlspec-0.12.0.dist-info}/WHEEL +0 -0
  154. {sqlspec-0.11.0.dist-info → sqlspec-0.12.0.dist-info}/licenses/LICENSE +0 -0
  155. {sqlspec-0.11.0.dist-info → sqlspec-0.12.0.dist-info}/licenses/NOTICE +0 -0
@@ -1,364 +1,459 @@
1
+ """DuckDB database configuration with direct field-based configuration."""
2
+
3
+ import logging
1
4
  from contextlib import contextmanager
2
- from dataclasses import dataclass, field
3
- from typing import TYPE_CHECKING, Any, Callable, Optional, Union, cast
5
+ from dataclasses import replace
6
+ from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional, TypedDict
4
7
 
5
- from typing_extensions import Literal, NotRequired, TypedDict
8
+ import duckdb
9
+ from typing_extensions import NotRequired
6
10
 
7
11
  from sqlspec.adapters.duckdb.driver import DuckDBConnection, DuckDBDriver
8
- from sqlspec.base import NoPoolSyncConfig
9
- from sqlspec.exceptions import ImproperConfigurationError
10
- from sqlspec.typing import Empty, EmptyType, dataclass_to_dict
12
+ from sqlspec.config import NoPoolSyncConfig
13
+ from sqlspec.statement.sql import SQLConfig
14
+ from sqlspec.typing import DictRow, Empty
11
15
 
12
16
  if TYPE_CHECKING:
13
17
  from collections.abc import Generator, Sequence
18
+ from contextlib import AbstractContextManager
19
+
20
+ from sqlglot.dialects.dialect import DialectType
21
+
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ __all__ = ("CONNECTION_FIELDS", "DuckDBConfig", "DuckDBExtensionConfig", "DuckDBSecretConfig")
26
+
27
+
28
+ CONNECTION_FIELDS = frozenset(
29
+ {
30
+ "database",
31
+ "read_only",
32
+ "config",
33
+ "memory_limit",
34
+ "threads",
35
+ "temp_directory",
36
+ "max_temp_directory_size",
37
+ "autoload_known_extensions",
38
+ "autoinstall_known_extensions",
39
+ "allow_community_extensions",
40
+ "allow_unsigned_extensions",
41
+ "extension_directory",
42
+ "custom_extension_repository",
43
+ "autoinstall_extension_repository",
44
+ "allow_persistent_secrets",
45
+ "enable_external_access",
46
+ "secret_directory",
47
+ "enable_object_cache",
48
+ "parquet_metadata_cache",
49
+ "enable_external_file_cache",
50
+ "checkpoint_threshold",
51
+ "enable_progress_bar",
52
+ "progress_bar_time",
53
+ "enable_logging",
54
+ "log_query_path",
55
+ "logging_level",
56
+ "preserve_insertion_order",
57
+ "default_null_order",
58
+ "default_order",
59
+ "ieee_floating_point_ops",
60
+ "binary_as_string",
61
+ "arrow_large_buffer_size",
62
+ "errors_as_json",
63
+ }
64
+ )
65
+
66
+
67
+ class DuckDBExtensionConfig(TypedDict, total=False):
68
+ """DuckDB extension configuration for auto-management."""
14
69
 
70
+ name: str
71
+ """Name of the extension to install/load."""
15
72
 
16
- __all__ = ("DuckDBConfig", "ExtensionConfig")
73
+ version: NotRequired[str]
74
+ """Specific version of the extension."""
17
75
 
76
+ repository: NotRequired[str]
77
+ """Repository for the extension (core, community, or custom URL)."""
18
78
 
19
- class ExtensionConfig(TypedDict):
20
- """Configuration for a DuckDB extension.
79
+ force_install: NotRequired[bool]
80
+ """Force reinstallation of the extension."""
21
81
 
22
- This class provides configuration options for DuckDB extensions, including installation
23
- and post-install configuration settings.
24
82
 
25
- For details see: https://duckdb.org/docs/extensions/overview
26
- """
83
+ class DuckDBSecretConfig(TypedDict, total=False):
84
+ """DuckDB secret configuration for AI/API integrations."""
27
85
 
28
- name: str
29
- """The name of the extension to install"""
30
- config: "NotRequired[dict[str, Any]]"
31
- """Optional configuration settings to apply after installation"""
32
- install_if_missing: "NotRequired[bool]"
33
- """Whether to install if missing"""
34
- force_install: "NotRequired[bool]"
35
- """Whether to force reinstall if already present"""
36
- repository: "NotRequired[str]"
37
- """Optional repository name to install from"""
38
- repository_url: "NotRequired[str]"
39
- """Optional repository URL to install from"""
40
- version: "NotRequired[str]"
41
- """Optional version of the extension to install"""
42
-
43
-
44
- class SecretConfig(TypedDict):
45
- """Configuration for a secret to store in a connection.
46
-
47
- This class provides configuration options for storing a secret in a connection for later retrieval.
48
-
49
- For details see: https://duckdb.org/docs/stable/configuration/secrets_manager
50
- """
86
+ secret_type: str
87
+ """Type of secret (e.g., 'openai', 'aws', 'azure', 'gcp')."""
51
88
 
52
- secret_type: Union[
53
- Literal[
54
- "azure", "gcs", "s3", "r2", "huggingface", "http", "mysql", "postgres", "bigquery", "openai", "open_prompt" # noqa: PYI051
55
- ],
56
- str,
57
- ]
58
- provider: NotRequired[str]
59
- """The provider of the secret"""
60
89
  name: str
61
- """The name of the secret to store"""
62
- value: dict[str, Any]
63
- """The secret value to store"""
64
- persist: NotRequired[bool]
65
- """Whether to persist the secret"""
66
- replace_if_exists: NotRequired[bool]
67
- """Whether to replace the secret if it already exists"""
68
-
69
-
70
- @dataclass
71
- class DuckDBConfig(NoPoolSyncConfig["DuckDBConnection", "DuckDBDriver"]):
72
- """Configuration for DuckDB database connections.
90
+ """Name of the secret."""
73
91
 
74
- This class provides configuration options for DuckDB database connections, wrapping all parameters
75
- available to duckdb.connect().
92
+ value: dict[str, Any]
93
+ """Secret configuration values."""
76
94
 
77
- For details see: https://duckdb.org/docs/api/python/overview#connection-options
78
- """
95
+ scope: NotRequired[str]
96
+ """Scope of the secret (LOCAL or PERSISTENT)."""
79
97
 
80
- database: "Union[str, EmptyType]" = field(default=":memory:")
81
- """The path to the database file to be opened. Pass ":memory:" to open a connection to a database that resides in RAM instead of on disk. If not specified, an in-memory database will be created."""
82
98
 
83
- read_only: "Union[bool, EmptyType]" = Empty
84
- """If True, the database will be opened in read-only mode. This is required if multiple processes want to access the same database file at the same time."""
99
+ class DuckDBConfig(NoPoolSyncConfig[DuckDBConnection, DuckDBDriver]):
100
+ """Enhanced DuckDB configuration with intelligent features and modern architecture.
85
101
 
86
- config: "Union[dict[str, Any], EmptyType]" = Empty
87
- """A dictionary of configuration options to be passed to DuckDB. These can include settings like 'access_mode', 'max_memory', 'threads', etc.
102
+ DuckDB is an embedded analytical database that doesn't require connection pooling.
103
+ This configuration supports all of DuckDB's unique features including:
88
104
 
89
- For details see: https://duckdb.org/docs/api/python/overview#connection-options
105
+ - Extension auto-management and installation
106
+ - Secret management for API integrations
107
+ - Intelligent auto configuration settings
108
+ - High-performance Arrow integration
109
+ - Direct file querying capabilities
110
+ - Performance optimizations for analytics workloads
90
111
  """
91
112
 
92
- extensions: "Union[Sequence[ExtensionConfig], ExtensionConfig, EmptyType]" = Empty
93
- """A sequence of extension configurations to install and configure upon connection creation."""
94
- secrets: "Union[Sequence[SecretConfig], SecretConfig , EmptyType]" = Empty
95
- """A dictionary of secrets to store in the connection for later retrieval."""
96
- auto_update_extensions: "bool" = False
97
- """Whether to automatically update on connection creation"""
98
- on_connection_create: "Optional[Callable[[DuckDBConnection], Optional[DuckDBConnection]]]" = None
99
- """A callable to be called after the connection is created."""
100
- connection_type: "type[DuckDBConnection]" = field(init=False, default_factory=lambda: DuckDBConnection)
101
- """The type of connection to create. Defaults to DuckDBConnection."""
102
- driver_type: "type[DuckDBDriver]" = field(init=False, default_factory=lambda: DuckDBDriver) # type: ignore[type-abstract,unused-ignore]
103
- """The type of driver to use. Defaults to DuckDBDriver."""
104
- pool_instance: "None" = field(init=False, default=None)
105
- """The pool instance to use. Defaults to None."""
106
-
107
- def __post_init__(self) -> None:
108
- """Post-initialization validation and processing.
109
-
110
-
111
- Raises:
112
- ImproperConfigurationError: If there are duplicate extension configurations.
113
- """
114
- if self.config is Empty:
115
- self.config = {}
116
- if self.extensions is Empty:
117
- self.extensions = []
118
- if self.secrets is Empty:
119
- self.secrets = []
120
- if isinstance(self.extensions, dict):
121
- self.extensions = [self.extensions]
122
- # this is purely for mypy
123
- assert isinstance(self.config, dict) # noqa: S101
124
- assert isinstance(self.extensions, list) # noqa: S101
125
- config_exts: list[ExtensionConfig] = self.config.pop("extensions", [])
126
- if not isinstance(config_exts, list): # pyright: ignore[reportUnnecessaryIsInstance]
127
- config_exts = [config_exts] # type: ignore[unreachable]
128
-
129
- try:
130
- if (
131
- len(set({ext["name"] for ext in config_exts}).intersection({ext["name"] for ext in self.extensions}))
132
- > 0
133
- ): # pyright: ignore[ reportUnknownArgumentType]
134
- msg = "Configuring the same extension in both 'extensions' and as a key in 'config['extensions']' is not allowed. Please use only one method to configure extensions."
135
- raise ImproperConfigurationError(msg)
136
- except (KeyError, TypeError) as e:
137
- msg = "When configuring extensions in the 'config' dictionary, the value must be a dictionary or sequence of extension names"
138
- raise ImproperConfigurationError(msg) from e
139
- self.extensions.extend(config_exts)
140
-
141
- def _configure_connection(self, connection: "DuckDBConnection") -> None:
142
- """Configure the connection.
143
-
144
- Args:
145
- connection: The DuckDB connection to configure.
146
- """
147
- for key, value in cast("dict[str,Any]", self.config).items():
148
- connection.execute(f"SET {key}='{value}'")
149
-
150
- def _configure_extensions(self, connection: "DuckDBConnection") -> None:
151
- """Configure extensions for the connection.
152
-
153
- Args:
154
- connection: The DuckDB connection to configure extensions for.
155
-
156
-
157
- """
158
- if self.extensions is Empty:
159
- return
160
-
161
- for extension in cast("list[ExtensionConfig]", self.extensions):
162
- self._configure_extension(connection, extension)
163
- if self.auto_update_extensions:
164
- connection.execute("update extensions")
165
-
166
- @staticmethod
167
- def _secret_exists(connection: "DuckDBConnection", name: "str") -> bool:
168
- """Check if a secret exists in the connection.
169
-
170
- Args:
171
- connection: The DuckDB connection to check for the secret.
172
- name: The name of the secret to check for.
173
-
174
- Returns:
175
- bool: True if the secret exists, False otherwise.
176
- """
177
- results = connection.execute("select 1 from duckdb_secrets() where name=?", [name]).fetchone() # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
178
- return results is not None
179
-
180
- @classmethod
181
- def _is_community_extension(cls, connection: "DuckDBConnection", name: "str") -> bool:
182
- """Check if an extension is a community extension.
183
-
184
- Args:
185
- connection: The DuckDB connection to check for the extension.
186
- name: The name of the extension to check.
187
-
188
- Returns:
189
- bool: True if the extension is a community extension, False otherwise.
190
- """
191
- results = connection.execute( # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
192
- "select 1 from duckdb_extensions() where extension_name=?", [name]
193
- ).fetchone()
194
- return results is None
195
-
196
- @classmethod
197
- def _extension_installed(cls, connection: "DuckDBConnection", name: "str") -> bool:
198
- """Check if a extension exists in the connection.
199
-
200
- Args:
201
- connection: The DuckDB connection to check for the secret.
202
- name: The name of the secret to check for.
203
-
204
- Returns:
205
- bool: True if the extension is installed, False otherwise.
206
- """
207
- results = connection.execute( # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
208
- "select 1 from duckdb_extensions() where extension_name=? and installed=true", [name]
209
- ).fetchone()
210
- return results is not None
211
-
212
- @classmethod
213
- def _extension_loaded(cls, connection: "DuckDBConnection", name: "str") -> bool:
214
- """Check if a extension is loaded in the connection.
113
+ __slots__ = (
114
+ "_dialect",
115
+ "allow_community_extensions",
116
+ "allow_persistent_secrets",
117
+ "allow_unsigned_extensions",
118
+ "arrow_large_buffer_size",
119
+ "autoinstall_extension_repository",
120
+ "autoinstall_known_extensions",
121
+ "autoload_known_extensions",
122
+ "binary_as_string",
123
+ "checkpoint_threshold",
124
+ "config",
125
+ "custom_extension_repository",
126
+ "database",
127
+ "default_null_order",
128
+ "default_order",
129
+ "default_row_type",
130
+ "enable_external_access",
131
+ "enable_external_file_cache",
132
+ "enable_logging",
133
+ "enable_object_cache",
134
+ "enable_progress_bar",
135
+ "errors_as_json",
136
+ "extension_directory",
137
+ "extensions",
138
+ "extras",
139
+ "ieee_floating_point_ops",
140
+ "log_query_path",
141
+ "logging_level",
142
+ "max_temp_directory_size",
143
+ "memory_limit",
144
+ "on_connection_create",
145
+ "parquet_metadata_cache",
146
+ "pool_instance",
147
+ "preserve_insertion_order",
148
+ "progress_bar_time",
149
+ "read_only",
150
+ "secret_directory",
151
+ "secrets",
152
+ "statement_config",
153
+ "temp_directory",
154
+ "threads",
155
+ )
156
+
157
+ is_async: ClassVar[bool] = False
158
+ supports_connection_pooling: ClassVar[bool] = False
159
+
160
+ driver_type: type[DuckDBDriver] = DuckDBDriver
161
+ connection_type: type[DuckDBConnection] = DuckDBConnection
162
+
163
+ supported_parameter_styles: ClassVar[tuple[str, ...]] = ("qmark", "numeric")
164
+ """DuckDB supports ? (qmark) and $1, $2 (numeric) parameter styles."""
165
+
166
+ preferred_parameter_style: ClassVar[str] = "qmark"
167
+ """DuckDB's native parameter style is ? (qmark)."""
168
+
169
+ def __init__(
170
+ self,
171
+ statement_config: "Optional[SQLConfig]" = None,
172
+ default_row_type: type[DictRow] = DictRow,
173
+ # Core connection parameters
174
+ database: Optional[str] = None,
175
+ read_only: Optional[bool] = None,
176
+ config: Optional[dict[str, Any]] = None,
177
+ # Resource management
178
+ memory_limit: Optional[str] = None,
179
+ threads: Optional[int] = None,
180
+ temp_directory: Optional[str] = None,
181
+ max_temp_directory_size: Optional[str] = None,
182
+ # Extension configuration
183
+ autoload_known_extensions: Optional[bool] = None,
184
+ autoinstall_known_extensions: Optional[bool] = None,
185
+ allow_community_extensions: Optional[bool] = None,
186
+ allow_unsigned_extensions: Optional[bool] = None,
187
+ extension_directory: Optional[str] = None,
188
+ custom_extension_repository: Optional[str] = None,
189
+ autoinstall_extension_repository: Optional[str] = None,
190
+ # Security and access
191
+ allow_persistent_secrets: Optional[bool] = None,
192
+ enable_external_access: Optional[bool] = None,
193
+ secret_directory: Optional[str] = None,
194
+ # Performance optimizations
195
+ enable_object_cache: Optional[bool] = None,
196
+ parquet_metadata_cache: Optional[bool] = None,
197
+ enable_external_file_cache: Optional[bool] = None,
198
+ checkpoint_threshold: Optional[str] = None,
199
+ # User experience
200
+ enable_progress_bar: Optional[bool] = None,
201
+ progress_bar_time: Optional[int] = None,
202
+ # Logging and debugging
203
+ enable_logging: Optional[bool] = None,
204
+ log_query_path: Optional[str] = None,
205
+ logging_level: Optional[str] = None,
206
+ # Data processing settings
207
+ preserve_insertion_order: Optional[bool] = None,
208
+ default_null_order: Optional[str] = None,
209
+ default_order: Optional[str] = None,
210
+ ieee_floating_point_ops: Optional[bool] = None,
211
+ # File format settings
212
+ binary_as_string: Optional[bool] = None,
213
+ arrow_large_buffer_size: Optional[bool] = None,
214
+ # Error handling
215
+ errors_as_json: Optional[bool] = None,
216
+ # DuckDB intelligent features
217
+ extensions: "Optional[Sequence[DuckDBExtensionConfig]]" = None,
218
+ secrets: "Optional[Sequence[DuckDBSecretConfig]]" = None,
219
+ on_connection_create: "Optional[Callable[[DuckDBConnection], Optional[DuckDBConnection]]]" = None,
220
+ **kwargs: Any,
221
+ ) -> None:
222
+ """Initialize DuckDB configuration with intelligent features.
215
223
 
216
224
  Args:
217
- connection: The DuckDB connection to check for the extension.
218
- name: The name of the extension to check for.
219
-
220
- Returns:
221
- bool: True if the extension is loaded, False otherwise.
225
+ statement_config: Default SQL statement configuration
226
+ default_row_type: Default row type for results
227
+ database: Path to the DuckDB database file. Use ':memory:' for in-memory database
228
+ read_only: Whether to open the database in read-only mode
229
+ config: DuckDB configuration options passed directly to the connection
230
+ memory_limit: Maximum memory usage (e.g., '1GB', '80% of RAM')
231
+ threads: Number of threads to use for parallel query execution
232
+ temp_directory: Directory for temporary files during spilling
233
+ max_temp_directory_size: Maximum size of temp directory (e.g., '1GB')
234
+ autoload_known_extensions: Automatically load known extensions when needed
235
+ autoinstall_known_extensions: Automatically install known extensions when needed
236
+ allow_community_extensions: Allow community-built extensions
237
+ allow_unsigned_extensions: Allow unsigned extensions (development only)
238
+ extension_directory: Directory to store extensions
239
+ custom_extension_repository: Custom endpoint for extension installation
240
+ autoinstall_extension_repository: Override endpoint for autoloading extensions
241
+ allow_persistent_secrets: Enable persistent secret storage
242
+ enable_external_access: Allow external file system access
243
+ secret_directory: Directory for persistent secrets
244
+ enable_object_cache: Enable caching of objects (e.g., Parquet metadata)
245
+ parquet_metadata_cache: Cache Parquet metadata for repeated access
246
+ enable_external_file_cache: Cache external files in memory
247
+ checkpoint_threshold: WAL size threshold for automatic checkpoints
248
+ enable_progress_bar: Show progress bar for long queries
249
+ progress_bar_time: Time in milliseconds before showing progress bar
250
+ enable_logging: Enable DuckDB logging
251
+ log_query_path: Path to log queries for debugging
252
+ logging_level: Log level (DEBUG, INFO, WARNING, ERROR)
253
+ preserve_insertion_order: Whether to preserve insertion order in results
254
+ default_null_order: Default NULL ordering (NULLS_FIRST, NULLS_LAST)
255
+ default_order: Default sort order (ASC, DESC)
256
+ ieee_floating_point_ops: Use IEEE 754 compliant floating point operations
257
+ binary_as_string: Interpret binary data as string in Parquet files
258
+ arrow_large_buffer_size: Use large Arrow buffers for strings, blobs, etc.
259
+ errors_as_json: Return errors in JSON format
260
+ extensions: List of extension dicts to auto-install/load with keys: name, version, repository, force_install
261
+ secrets: List of secret dicts for AI/API integrations with keys: secret_type, name, value, scope
262
+ on_connection_create: Callback executed when connection is created
263
+ **kwargs: Additional parameters (stored in extras)
264
+
265
+ Example:
266
+ >>> config = DuckDBConfig(
267
+ ... database=":memory:",
268
+ ... memory_limit="1GB",
269
+ ... threads=4,
270
+ ... autoload_known_extensions=True,
271
+ ... extensions=[
272
+ ... {"name": "spatial", "repository": "core"},
273
+ ... {"name": "aws", "repository": "core"},
274
+ ... ],
275
+ ... secrets=[
276
+ ... {
277
+ ... "secret_type": "openai",
278
+ ... "name": "my_openai_secret",
279
+ ... "value": {"api_key": "sk-..."},
280
+ ... }
281
+ ... ],
282
+ ... )
222
283
  """
223
- results = connection.execute( # pyright: ignore[reportUnknownMemberType,reportUnknownVariableType]
224
- "select 1 from duckdb_extensions() where extension_name=? and loaded=true", [name]
225
- ).fetchone()
226
- return results is not None
227
-
228
- @classmethod
229
- def _configure_secrets(
230
- cls,
231
- connection: "DuckDBConnection",
232
- secrets: "Sequence[SecretConfig]",
233
- ) -> None:
234
- """Configure persistent secrets for the connection.
284
+ # Store connection parameters as instance attributes
285
+ self.database = database or ":memory:"
286
+ self.read_only = read_only
287
+ self.config = config
288
+ self.memory_limit = memory_limit
289
+ self.threads = threads
290
+ self.temp_directory = temp_directory
291
+ self.max_temp_directory_size = max_temp_directory_size
292
+ self.autoload_known_extensions = autoload_known_extensions
293
+ self.autoinstall_known_extensions = autoinstall_known_extensions
294
+ self.allow_community_extensions = allow_community_extensions
295
+ self.allow_unsigned_extensions = allow_unsigned_extensions
296
+ self.extension_directory = extension_directory
297
+ self.custom_extension_repository = custom_extension_repository
298
+ self.autoinstall_extension_repository = autoinstall_extension_repository
299
+ self.allow_persistent_secrets = allow_persistent_secrets
300
+ self.enable_external_access = enable_external_access
301
+ self.secret_directory = secret_directory
302
+ self.enable_object_cache = enable_object_cache
303
+ self.parquet_metadata_cache = parquet_metadata_cache
304
+ self.enable_external_file_cache = enable_external_file_cache
305
+ self.checkpoint_threshold = checkpoint_threshold
306
+ self.enable_progress_bar = enable_progress_bar
307
+ self.progress_bar_time = progress_bar_time
308
+ self.enable_logging = enable_logging
309
+ self.log_query_path = log_query_path
310
+ self.logging_level = logging_level
311
+ self.preserve_insertion_order = preserve_insertion_order
312
+ self.default_null_order = default_null_order
313
+ self.default_order = default_order
314
+ self.ieee_floating_point_ops = ieee_floating_point_ops
315
+ self.binary_as_string = binary_as_string
316
+ self.arrow_large_buffer_size = arrow_large_buffer_size
317
+ self.errors_as_json = errors_as_json
318
+
319
+ self.extras = kwargs or {}
320
+
321
+ # Store other config
322
+ self.statement_config = statement_config or SQLConfig()
323
+ self.default_row_type = default_row_type
324
+
325
+ # DuckDB intelligent features
326
+ self.extensions = extensions or []
327
+ self.secrets = secrets or []
328
+ self.on_connection_create = on_connection_create
329
+ self._dialect: DialectType = None
330
+
331
+ super().__init__()
235
332
 
236
- Args:
237
- connection: The DuckDB connection to configure secrets for.
238
- secrets: The list of secrets to store in the connection.
333
+ @property
334
+ def connection_config_dict(self) -> dict[str, Any]:
335
+ """Return the connection configuration as a dict for duckdb.connect()."""
336
+ # DuckDB connect() only accepts database, read_only, and config parameters
337
+ connect_params: dict[str, Any] = {}
239
338
 
240
- Raises:
241
- ImproperConfigurationError: If a secret could not be stored in the connection.
242
- """
243
- try:
244
- for secret in secrets:
245
- secret_exists = cls._secret_exists(connection, secret["name"])
246
- if not secret_exists or secret.get("replace_if_exists", False):
247
- provider_type = "" if not secret.get("provider") else f"provider {secret.get('provider')},"
248
- connection.execute(
249
- f"""create or replace {"persistent" if secret.get("persist", False) else ""} secret {secret["name"]} (
250
- type {secret["secret_type"]},
251
- {provider_type}
252
- {" ,".join([f"{k} '{v}'" for k, v in secret["value"].items()])}
253
- ) """
254
- )
255
- except Exception as e:
256
- msg = f"Failed to store secret. Error: {e!s}"
257
- raise ImproperConfigurationError(msg) from e
339
+ # Set database if provided
340
+ if hasattr(self, "database") and self.database is not None:
341
+ connect_params["database"] = self.database
258
342
 
259
- @classmethod
260
- def _configure_extension(cls, connection: "DuckDBConnection", extension: "ExtensionConfig") -> None:
261
- """Configure a single extension for the connection.
343
+ # Set read_only if provided
344
+ if hasattr(self, "read_only") and self.read_only is not None:
345
+ connect_params["read_only"] = self.read_only
262
346
 
263
- Args:
264
- connection: The DuckDB connection to configure extension for.
265
- extension: The extension configuration to apply.
347
+ # All other parameters go into the config dict
348
+ config_dict = {}
349
+ for field in CONNECTION_FIELDS:
350
+ if field not in {"database", "read_only", "config"}:
351
+ value = getattr(self, field, None)
352
+ if value is not None and value is not Empty:
353
+ config_dict[field] = value
266
354
 
267
- Raises:
268
- ImproperConfigurationError: If extension installation or configuration fails.
269
- """
270
- try:
271
- # Install extension if needed
272
- if (
273
- not cls._extension_installed(connection, extension["name"])
274
- and extension.get("install_if_missing", True)
275
- ) or extension.get("force_install", False):
276
- repository = extension.get("repository", None)
277
- repository_url = (
278
- "https://community-extensions.duckdb.org"
279
- if repository is None
280
- and cls._is_community_extension(connection, extension["name"])
281
- and extension.get("repository_url") is None
282
- else extension.get("repository_url", None)
283
- )
284
- connection.install_extension(
285
- extension=extension["name"],
286
- force_install=extension.get("force_install", False),
287
- repository=repository,
288
- repository_url=repository_url,
289
- version=extension.get("version"),
290
- )
291
-
292
- # Load extension if not already loaded
293
- if not cls._extension_loaded(connection, extension["name"]):
294
- connection.load_extension(extension["name"])
295
-
296
- # Apply any configuration settings
297
- if extension.get("config"):
298
- for key, value in extension.get("config", {}).items():
299
- connection.execute(f"SET {key}={value}")
300
- except Exception as e:
301
- msg = f"Failed to configure extension {extension['name']}. Error: {e!s}"
302
- raise ImproperConfigurationError(msg) from e
355
+ # Add extras to config dict
356
+ config_dict.update(self.extras)
303
357
 
304
- @property
305
- def connection_config_dict(self) -> "dict[str, Any]":
306
- """Return the connection configuration as a dict.
358
+ # If we have config parameters, add them
359
+ if config_dict:
360
+ connect_params["config"] = config_dict
307
361
 
308
- Returns:
309
- A string keyed dict of config kwargs for the duckdb.connect() function.
310
- """
311
- config = dataclass_to_dict(
312
- self,
313
- exclude_empty=True,
314
- exclude={
315
- "extensions",
316
- "pool_instance",
317
- "secrets",
318
- "on_connection_create",
319
- "auto_update_extensions",
320
- "driver_type",
321
- "connection_type",
322
- "connection_instance",
323
- },
324
- convert_nested=False,
325
- )
326
- if not config.get("database"):
327
- config["database"] = ":memory:"
328
- return config
329
-
330
- def create_connection(self) -> "DuckDBConnection":
331
- """Create and return a new database connection with configured extensions.
362
+ return connect_params
332
363
 
333
- Returns:
334
- A new DuckDB connection instance with extensions installed and configured.
364
+ def create_connection(self) -> DuckDBConnection:
365
+ """Create and return a DuckDB connection with intelligent configuration applied."""
335
366
 
336
- Raises:
337
- ImproperConfigurationError: If the connection could not be established or extensions could not be configured.
338
- """
339
- import duckdb
367
+ logger.info("Creating DuckDB connection", extra={"adapter": "duckdb"})
340
368
 
341
369
  try:
342
- connection = duckdb.connect(**self.connection_config_dict) # pyright: ignore[reportUnknownMemberType]
343
- self._configure_extensions(connection)
344
- self._configure_secrets(connection, cast("list[SecretConfig]", self.secrets))
345
- self._configure_connection(connection)
370
+ config_dict = self.connection_config_dict
371
+ connection = duckdb.connect(**config_dict)
372
+ logger.info("DuckDB connection created successfully", extra={"adapter": "duckdb"})
373
+
374
+ # Install and load extensions
375
+ for ext_config in self.extensions:
376
+ ext_name = None
377
+ try:
378
+ ext_name = ext_config.get("name")
379
+ if not ext_name:
380
+ continue
381
+ install_kwargs: dict[str, Any] = {}
382
+ if "version" in ext_config:
383
+ install_kwargs["version"] = ext_config["version"]
384
+ if "repository" in ext_config:
385
+ install_kwargs["repository"] = ext_config["repository"]
386
+ if ext_config.get("force_install", False):
387
+ install_kwargs["force_install"] = True
388
+
389
+ if install_kwargs or self.autoinstall_known_extensions:
390
+ connection.install_extension(ext_name, **install_kwargs)
391
+ connection.load_extension(ext_name)
392
+ logger.debug("Loaded DuckDB extension: %s", ext_name, extra={"adapter": "duckdb"})
393
+
394
+ except Exception as e:
395
+ if ext_name:
396
+ logger.warning(
397
+ "Failed to load DuckDB extension: %s",
398
+ ext_name,
399
+ extra={"adapter": "duckdb", "error": str(e)},
400
+ )
401
+
402
+ for secret_config in self.secrets:
403
+ secret_name = None
404
+ try:
405
+ secret_type = secret_config.get("secret_type")
406
+ secret_name = secret_config.get("name")
407
+ secret_value = secret_config.get("value")
408
+
409
+ if secret_type and secret_name and secret_value:
410
+ value_pairs = []
411
+ for key, value in secret_value.items():
412
+ escaped_value = str(value).replace("'", "''")
413
+ value_pairs.append(f"'{key}' = '{escaped_value}'")
414
+ value_string = ", ".join(value_pairs)
415
+ scope_clause = ""
416
+ if "scope" in secret_config:
417
+ scope_clause = f" SCOPE '{secret_config['scope']}'"
418
+
419
+ sql = f"""
420
+ CREATE SECRET {secret_name} (
421
+ TYPE {secret_type},
422
+ {value_string}
423
+ ){scope_clause}
424
+ """
425
+ connection.execute(sql)
426
+ logger.debug("Created DuckDB secret: %s", secret_name, extra={"adapter": "duckdb"})
427
+
428
+ except Exception as e:
429
+ if secret_name:
430
+ logger.warning(
431
+ "Failed to create DuckDB secret: %s",
432
+ secret_name,
433
+ extra={"adapter": "duckdb", "error": str(e)},
434
+ )
346
435
  if self.on_connection_create:
347
- self.on_connection_create(connection)
436
+ try:
437
+ self.on_connection_create(connection)
438
+ logger.debug("Executed connection creation hook", extra={"adapter": "duckdb"})
439
+ except Exception as e:
440
+ logger.warning("Connection creation hook failed", extra={"adapter": "duckdb", "error": str(e)})
348
441
 
349
442
  except Exception as e:
350
- msg = f"Could not configure the DuckDB connection. Error: {e!s}"
351
- raise ImproperConfigurationError(msg) from e
443
+ logger.exception("Failed to create DuckDB connection", extra={"adapter": "duckdb", "error": str(e)})
444
+ raise
352
445
  return connection
353
446
 
354
447
  @contextmanager
355
448
  def provide_connection(self, *args: Any, **kwargs: Any) -> "Generator[DuckDBConnection, None, None]":
356
- """Create and provide a database connection.
449
+ """Provide a DuckDB connection context manager.
450
+
451
+ Args:
452
+ *args: Additional arguments.
453
+ **kwargs: Additional keyword arguments.
357
454
 
358
455
  Yields:
359
456
  A DuckDB connection instance.
360
-
361
-
362
457
  """
363
458
  connection = self.create_connection()
364
459
  try:
@@ -366,14 +461,30 @@ class DuckDBConfig(NoPoolSyncConfig["DuckDBConnection", "DuckDBDriver"]):
366
461
  finally:
367
462
  connection.close()
368
463
 
369
- @contextmanager
370
- def provide_session(self, *args: Any, **kwargs: Any) -> "Generator[DuckDBDriver, None, None]":
371
- """Create and provide a database connection.
372
-
373
- Yields:
374
- A DuckDB connection instance.
464
+ def provide_session(self, *args: Any, **kwargs: Any) -> "AbstractContextManager[DuckDBDriver]":
465
+ """Provide a DuckDB driver session context manager.
375
466
 
467
+ Args:
468
+ *args: Additional arguments.
469
+ **kwargs: Additional keyword arguments.
376
470
 
471
+ Returns:
472
+ A context manager that yields a DuckDBDriver instance.
377
473
  """
378
- with self.provide_connection(*args, **kwargs) as connection:
379
- yield self.driver_type(connection, use_cursor=True)
474
+
475
+ @contextmanager
476
+ def session_manager() -> "Generator[DuckDBDriver, None, None]":
477
+ with self.provide_connection(*args, **kwargs) as connection:
478
+ # Create statement config with parameter style info if not already set
479
+ statement_config = self.statement_config
480
+ if statement_config.allowed_parameter_styles is None:
481
+ statement_config = replace(
482
+ statement_config,
483
+ allowed_parameter_styles=self.supported_parameter_styles,
484
+ target_parameter_style=self.preferred_parameter_style,
485
+ )
486
+
487
+ driver = self.driver_type(connection=connection, config=statement_config)
488
+ yield driver
489
+
490
+ return session_manager()