sqlspec 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- sqlspec/__init__.py +16 -3
- sqlspec/_serialization.py +3 -10
- sqlspec/_sql.py +1147 -0
- sqlspec/_typing.py +343 -41
- sqlspec/adapters/adbc/__init__.py +2 -6
- sqlspec/adapters/adbc/config.py +474 -149
- sqlspec/adapters/adbc/driver.py +330 -621
- sqlspec/adapters/aiosqlite/__init__.py +2 -6
- sqlspec/adapters/aiosqlite/config.py +143 -57
- sqlspec/adapters/aiosqlite/driver.py +269 -431
- sqlspec/adapters/asyncmy/__init__.py +3 -8
- sqlspec/adapters/asyncmy/config.py +247 -202
- sqlspec/adapters/asyncmy/driver.py +218 -436
- sqlspec/adapters/asyncpg/__init__.py +4 -7
- sqlspec/adapters/asyncpg/config.py +329 -176
- sqlspec/adapters/asyncpg/driver.py +417 -487
- sqlspec/adapters/bigquery/__init__.py +2 -2
- sqlspec/adapters/bigquery/config.py +407 -0
- sqlspec/adapters/bigquery/driver.py +600 -553
- sqlspec/adapters/duckdb/__init__.py +4 -1
- sqlspec/adapters/duckdb/config.py +432 -321
- sqlspec/adapters/duckdb/driver.py +392 -406
- sqlspec/adapters/oracledb/__init__.py +3 -8
- sqlspec/adapters/oracledb/config.py +625 -0
- sqlspec/adapters/oracledb/driver.py +548 -921
- sqlspec/adapters/psqlpy/__init__.py +4 -7
- sqlspec/adapters/psqlpy/config.py +372 -203
- sqlspec/adapters/psqlpy/driver.py +197 -533
- sqlspec/adapters/psycopg/__init__.py +3 -8
- sqlspec/adapters/psycopg/config.py +741 -0
- sqlspec/adapters/psycopg/driver.py +734 -694
- sqlspec/adapters/sqlite/__init__.py +2 -6
- sqlspec/adapters/sqlite/config.py +146 -81
- sqlspec/adapters/sqlite/driver.py +242 -405
- sqlspec/base.py +220 -784
- sqlspec/config.py +354 -0
- sqlspec/driver/__init__.py +22 -0
- sqlspec/driver/_async.py +252 -0
- sqlspec/driver/_common.py +338 -0
- sqlspec/driver/_sync.py +261 -0
- sqlspec/driver/mixins/__init__.py +17 -0
- sqlspec/driver/mixins/_pipeline.py +523 -0
- sqlspec/driver/mixins/_result_utils.py +122 -0
- sqlspec/driver/mixins/_sql_translator.py +35 -0
- sqlspec/driver/mixins/_storage.py +993 -0
- sqlspec/driver/mixins/_type_coercion.py +131 -0
- sqlspec/exceptions.py +299 -7
- sqlspec/extensions/aiosql/__init__.py +10 -0
- sqlspec/extensions/aiosql/adapter.py +474 -0
- sqlspec/extensions/litestar/__init__.py +1 -6
- sqlspec/extensions/litestar/_utils.py +1 -5
- sqlspec/extensions/litestar/config.py +5 -6
- sqlspec/extensions/litestar/handlers.py +13 -12
- sqlspec/extensions/litestar/plugin.py +22 -24
- sqlspec/extensions/litestar/providers.py +37 -55
- sqlspec/loader.py +528 -0
- sqlspec/service/__init__.py +3 -0
- sqlspec/service/base.py +24 -0
- sqlspec/service/pagination.py +26 -0
- sqlspec/statement/__init__.py +21 -0
- sqlspec/statement/builder/__init__.py +54 -0
- sqlspec/statement/builder/_ddl_utils.py +119 -0
- sqlspec/statement/builder/_parsing_utils.py +135 -0
- sqlspec/statement/builder/base.py +328 -0
- sqlspec/statement/builder/ddl.py +1379 -0
- sqlspec/statement/builder/delete.py +80 -0
- sqlspec/statement/builder/insert.py +274 -0
- sqlspec/statement/builder/merge.py +95 -0
- sqlspec/statement/builder/mixins/__init__.py +65 -0
- sqlspec/statement/builder/mixins/_aggregate_functions.py +151 -0
- sqlspec/statement/builder/mixins/_case_builder.py +91 -0
- sqlspec/statement/builder/mixins/_common_table_expr.py +91 -0
- sqlspec/statement/builder/mixins/_delete_from.py +34 -0
- sqlspec/statement/builder/mixins/_from.py +61 -0
- sqlspec/statement/builder/mixins/_group_by.py +119 -0
- sqlspec/statement/builder/mixins/_having.py +35 -0
- sqlspec/statement/builder/mixins/_insert_from_select.py +48 -0
- sqlspec/statement/builder/mixins/_insert_into.py +36 -0
- sqlspec/statement/builder/mixins/_insert_values.py +69 -0
- sqlspec/statement/builder/mixins/_join.py +110 -0
- sqlspec/statement/builder/mixins/_limit_offset.py +53 -0
- sqlspec/statement/builder/mixins/_merge_clauses.py +405 -0
- sqlspec/statement/builder/mixins/_order_by.py +46 -0
- sqlspec/statement/builder/mixins/_pivot.py +82 -0
- sqlspec/statement/builder/mixins/_returning.py +37 -0
- sqlspec/statement/builder/mixins/_select_columns.py +60 -0
- sqlspec/statement/builder/mixins/_set_ops.py +122 -0
- sqlspec/statement/builder/mixins/_unpivot.py +80 -0
- sqlspec/statement/builder/mixins/_update_from.py +54 -0
- sqlspec/statement/builder/mixins/_update_set.py +91 -0
- sqlspec/statement/builder/mixins/_update_table.py +29 -0
- sqlspec/statement/builder/mixins/_where.py +374 -0
- sqlspec/statement/builder/mixins/_window_functions.py +86 -0
- sqlspec/statement/builder/protocols.py +20 -0
- sqlspec/statement/builder/select.py +206 -0
- sqlspec/statement/builder/update.py +178 -0
- sqlspec/statement/filters.py +571 -0
- sqlspec/statement/parameters.py +736 -0
- sqlspec/statement/pipelines/__init__.py +67 -0
- sqlspec/statement/pipelines/analyzers/__init__.py +9 -0
- sqlspec/statement/pipelines/analyzers/_analyzer.py +649 -0
- sqlspec/statement/pipelines/base.py +315 -0
- sqlspec/statement/pipelines/context.py +119 -0
- sqlspec/statement/pipelines/result_types.py +41 -0
- sqlspec/statement/pipelines/transformers/__init__.py +8 -0
- sqlspec/statement/pipelines/transformers/_expression_simplifier.py +256 -0
- sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +623 -0
- sqlspec/statement/pipelines/transformers/_remove_comments.py +66 -0
- sqlspec/statement/pipelines/transformers/_remove_hints.py +81 -0
- sqlspec/statement/pipelines/validators/__init__.py +23 -0
- sqlspec/statement/pipelines/validators/_dml_safety.py +275 -0
- sqlspec/statement/pipelines/validators/_parameter_style.py +297 -0
- sqlspec/statement/pipelines/validators/_performance.py +703 -0
- sqlspec/statement/pipelines/validators/_security.py +990 -0
- sqlspec/statement/pipelines/validators/base.py +67 -0
- sqlspec/statement/result.py +527 -0
- sqlspec/statement/splitter.py +701 -0
- sqlspec/statement/sql.py +1198 -0
- sqlspec/storage/__init__.py +15 -0
- sqlspec/storage/backends/__init__.py +0 -0
- sqlspec/storage/backends/base.py +166 -0
- sqlspec/storage/backends/fsspec.py +315 -0
- sqlspec/storage/backends/obstore.py +464 -0
- sqlspec/storage/protocol.py +170 -0
- sqlspec/storage/registry.py +315 -0
- sqlspec/typing.py +157 -36
- sqlspec/utils/correlation.py +155 -0
- sqlspec/utils/deprecation.py +3 -6
- sqlspec/utils/fixtures.py +6 -11
- sqlspec/utils/logging.py +135 -0
- sqlspec/utils/module_loader.py +45 -43
- sqlspec/utils/serializers.py +4 -0
- sqlspec/utils/singleton.py +6 -8
- sqlspec/utils/sync_tools.py +15 -27
- sqlspec/utils/text.py +58 -26
- {sqlspec-0.11.1.dist-info → sqlspec-0.12.0.dist-info}/METADATA +97 -26
- sqlspec-0.12.0.dist-info/RECORD +145 -0
- sqlspec/adapters/bigquery/config/__init__.py +0 -3
- sqlspec/adapters/bigquery/config/_common.py +0 -40
- sqlspec/adapters/bigquery/config/_sync.py +0 -87
- sqlspec/adapters/oracledb/config/__init__.py +0 -9
- sqlspec/adapters/oracledb/config/_asyncio.py +0 -186
- sqlspec/adapters/oracledb/config/_common.py +0 -131
- sqlspec/adapters/oracledb/config/_sync.py +0 -186
- sqlspec/adapters/psycopg/config/__init__.py +0 -19
- sqlspec/adapters/psycopg/config/_async.py +0 -169
- sqlspec/adapters/psycopg/config/_common.py +0 -56
- sqlspec/adapters/psycopg/config/_sync.py +0 -168
- sqlspec/filters.py +0 -331
- sqlspec/mixins.py +0 -305
- sqlspec/statement.py +0 -378
- sqlspec-0.11.1.dist-info/RECORD +0 -69
- {sqlspec-0.11.1.dist-info → sqlspec-0.12.0.dist-info}/WHEEL +0 -0
- {sqlspec-0.11.1.dist-info → sqlspec-0.12.0.dist-info}/licenses/LICENSE +0 -0
- {sqlspec-0.11.1.dist-info → sqlspec-0.12.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from sqlspec.adapters.bigquery.config import
|
|
1
|
+
from sqlspec.adapters.bigquery.config import CONNECTION_FIELDS, BigQueryConfig
|
|
2
2
|
from sqlspec.adapters.bigquery.driver import BigQueryConnection, BigQueryDriver
|
|
3
3
|
|
|
4
|
-
__all__ = ("
|
|
4
|
+
__all__ = ("CONNECTION_FIELDS", "BigQueryConfig", "BigQueryConnection", "BigQueryDriver")
|
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
"""BigQuery database configuration with direct field-based configuration."""
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
import logging
|
|
5
|
+
from dataclasses import replace
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional
|
|
7
|
+
|
|
8
|
+
from google.cloud.bigquery import LoadJobConfig, QueryJobConfig
|
|
9
|
+
|
|
10
|
+
from sqlspec.adapters.bigquery.driver import BigQueryConnection, BigQueryDriver
|
|
11
|
+
from sqlspec.config import NoPoolSyncConfig
|
|
12
|
+
from sqlspec.exceptions import ImproperConfigurationError
|
|
13
|
+
from sqlspec.statement.sql import SQLConfig
|
|
14
|
+
from sqlspec.typing import DictRow, Empty
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from collections.abc import Generator
|
|
18
|
+
from contextlib import AbstractContextManager
|
|
19
|
+
|
|
20
|
+
from google.api_core.client_info import ClientInfo
|
|
21
|
+
from google.api_core.client_options import ClientOptions
|
|
22
|
+
from google.auth.credentials import Credentials
|
|
23
|
+
from sqlglot.dialects.dialect import DialectType
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
CONNECTION_FIELDS = frozenset(
|
|
28
|
+
{
|
|
29
|
+
"project",
|
|
30
|
+
"location",
|
|
31
|
+
"credentials",
|
|
32
|
+
"dataset_id",
|
|
33
|
+
"credentials_path",
|
|
34
|
+
"client_options",
|
|
35
|
+
"client_info",
|
|
36
|
+
"default_query_job_config",
|
|
37
|
+
"default_load_job_config",
|
|
38
|
+
"use_query_cache",
|
|
39
|
+
"maximum_bytes_billed",
|
|
40
|
+
"enable_bigquery_ml",
|
|
41
|
+
"enable_gemini_integration",
|
|
42
|
+
"query_timeout_ms",
|
|
43
|
+
"job_timeout_ms",
|
|
44
|
+
"reservation_id",
|
|
45
|
+
"edition",
|
|
46
|
+
"enable_cross_cloud",
|
|
47
|
+
"enable_bigquery_omni",
|
|
48
|
+
"use_avro_logical_types",
|
|
49
|
+
"parquet_enable_list_inference",
|
|
50
|
+
"enable_column_level_security",
|
|
51
|
+
"enable_row_level_security",
|
|
52
|
+
"enable_dataframes",
|
|
53
|
+
"dataframes_backend",
|
|
54
|
+
"enable_continuous_queries",
|
|
55
|
+
"enable_vector_search",
|
|
56
|
+
}
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
__all__ = ("CONNECTION_FIELDS", "BigQueryConfig")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class BigQueryConfig(NoPoolSyncConfig[BigQueryConnection, BigQueryDriver]):
|
|
63
|
+
"""Enhanced BigQuery configuration with comprehensive feature support.
|
|
64
|
+
|
|
65
|
+
BigQuery is Google Cloud's serverless, highly scalable data warehouse with
|
|
66
|
+
advanced analytics, machine learning, and AI capabilities. This configuration
|
|
67
|
+
supports all BigQuery features including:
|
|
68
|
+
|
|
69
|
+
- Gemini in BigQuery for AI-powered analytics
|
|
70
|
+
- BigQuery ML for machine learning workflows
|
|
71
|
+
- BigQuery DataFrames for Python-based analytics
|
|
72
|
+
- Multi-modal data analysis (text, images, video, audio)
|
|
73
|
+
- Cross-cloud data access (AWS S3, Azure Blob Storage)
|
|
74
|
+
- Vector search and embeddings
|
|
75
|
+
- Continuous queries for real-time processing
|
|
76
|
+
- Advanced security and governance features
|
|
77
|
+
- Parquet and Arrow format optimization
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
__slots__ = (
|
|
81
|
+
"_connection_instance",
|
|
82
|
+
"_dialect",
|
|
83
|
+
"client_info",
|
|
84
|
+
"client_options",
|
|
85
|
+
"credentials",
|
|
86
|
+
"credentials_path",
|
|
87
|
+
"dataframes_backend",
|
|
88
|
+
"dataset_id",
|
|
89
|
+
"default_load_job_config",
|
|
90
|
+
"default_query_job_config",
|
|
91
|
+
"default_row_type",
|
|
92
|
+
"edition",
|
|
93
|
+
"enable_bigquery_ml",
|
|
94
|
+
"enable_bigquery_omni",
|
|
95
|
+
"enable_column_level_security",
|
|
96
|
+
"enable_continuous_queries",
|
|
97
|
+
"enable_cross_cloud",
|
|
98
|
+
"enable_dataframes",
|
|
99
|
+
"enable_gemini_integration",
|
|
100
|
+
"enable_row_level_security",
|
|
101
|
+
"enable_vector_search",
|
|
102
|
+
"extras",
|
|
103
|
+
"job_timeout_ms",
|
|
104
|
+
"location",
|
|
105
|
+
"maximum_bytes_billed",
|
|
106
|
+
"on_connection_create",
|
|
107
|
+
"on_job_complete",
|
|
108
|
+
"on_job_start",
|
|
109
|
+
"parquet_enable_list_inference",
|
|
110
|
+
"pool_instance",
|
|
111
|
+
"project",
|
|
112
|
+
"query_timeout_ms",
|
|
113
|
+
"reservation_id",
|
|
114
|
+
"statement_config",
|
|
115
|
+
"use_avro_logical_types",
|
|
116
|
+
"use_query_cache",
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
is_async: ClassVar[bool] = False
|
|
120
|
+
supports_connection_pooling: ClassVar[bool] = False
|
|
121
|
+
|
|
122
|
+
driver_type: type[BigQueryDriver] = BigQueryDriver
|
|
123
|
+
connection_type: type[BigQueryConnection] = BigQueryConnection
|
|
124
|
+
|
|
125
|
+
# Parameter style support information
|
|
126
|
+
supported_parameter_styles: ClassVar[tuple[str, ...]] = ("named_at",)
|
|
127
|
+
"""BigQuery only supports @name (named_at) parameter style."""
|
|
128
|
+
|
|
129
|
+
preferred_parameter_style: ClassVar[str] = "named_at"
|
|
130
|
+
"""BigQuery's native parameter style is @name (named_at)."""
|
|
131
|
+
|
|
132
|
+
def __init__(
|
|
133
|
+
self,
|
|
134
|
+
statement_config: Optional[SQLConfig] = None,
|
|
135
|
+
default_row_type: type[DictRow] = DictRow,
|
|
136
|
+
# Core connection parameters
|
|
137
|
+
project: Optional[str] = None,
|
|
138
|
+
location: Optional[str] = None,
|
|
139
|
+
credentials: Optional["Credentials"] = None,
|
|
140
|
+
dataset_id: Optional[str] = None,
|
|
141
|
+
credentials_path: Optional[str] = None,
|
|
142
|
+
# Client configuration
|
|
143
|
+
client_options: Optional["ClientOptions"] = None,
|
|
144
|
+
client_info: Optional["ClientInfo"] = None,
|
|
145
|
+
# Job configuration
|
|
146
|
+
default_query_job_config: Optional["QueryJobConfig"] = None,
|
|
147
|
+
default_load_job_config: Optional["LoadJobConfig"] = None,
|
|
148
|
+
# Advanced BigQuery features
|
|
149
|
+
use_query_cache: Optional[bool] = None,
|
|
150
|
+
maximum_bytes_billed: Optional[int] = None,
|
|
151
|
+
# BigQuery ML and AI configuration
|
|
152
|
+
enable_bigquery_ml: Optional[bool] = None,
|
|
153
|
+
enable_gemini_integration: Optional[bool] = None,
|
|
154
|
+
# Performance and scaling options
|
|
155
|
+
query_timeout_ms: Optional[int] = None,
|
|
156
|
+
job_timeout_ms: Optional[int] = None,
|
|
157
|
+
# BigQuery editions and reservations
|
|
158
|
+
reservation_id: Optional[str] = None,
|
|
159
|
+
edition: Optional[str] = None,
|
|
160
|
+
# Cross-cloud and external data options
|
|
161
|
+
enable_cross_cloud: Optional[bool] = None,
|
|
162
|
+
enable_bigquery_omni: Optional[bool] = None,
|
|
163
|
+
# Storage and format options
|
|
164
|
+
use_avro_logical_types: Optional[bool] = None,
|
|
165
|
+
parquet_enable_list_inference: Optional[bool] = None,
|
|
166
|
+
# Security and governance
|
|
167
|
+
enable_column_level_security: Optional[bool] = None,
|
|
168
|
+
enable_row_level_security: Optional[bool] = None,
|
|
169
|
+
# DataFrames and Python integration
|
|
170
|
+
enable_dataframes: Optional[bool] = None,
|
|
171
|
+
dataframes_backend: Optional[str] = None,
|
|
172
|
+
# Continuous queries and real-time processing
|
|
173
|
+
enable_continuous_queries: Optional[bool] = None,
|
|
174
|
+
# Vector search and embeddings
|
|
175
|
+
enable_vector_search: Optional[bool] = None,
|
|
176
|
+
# Callback functions
|
|
177
|
+
on_connection_create: Optional[Callable[[BigQueryConnection], None]] = None,
|
|
178
|
+
on_job_start: Optional[Callable[[str], None]] = None,
|
|
179
|
+
on_job_complete: Optional[Callable[[str, Any], None]] = None,
|
|
180
|
+
**kwargs: Any,
|
|
181
|
+
) -> None:
|
|
182
|
+
"""Initialize BigQuery configuration with comprehensive feature support.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
statement_config: Default SQL statement configuration
|
|
186
|
+
default_row_type: Default row type for results
|
|
187
|
+
project: Google Cloud project ID
|
|
188
|
+
location: Default geographic location for jobs and datasets
|
|
189
|
+
credentials: Credentials to use for authentication
|
|
190
|
+
dataset_id: Default dataset ID to use if not specified in queries
|
|
191
|
+
credentials_path: Path to Google Cloud service account key file (JSON)
|
|
192
|
+
client_options: Client options used to set user options on the client
|
|
193
|
+
client_info: Client info used to send a user-agent string along with API requests
|
|
194
|
+
default_query_job_config: Default QueryJobConfig settings for query operations
|
|
195
|
+
default_load_job_config: Default LoadJobConfig settings for data loading operations
|
|
196
|
+
use_query_cache: Whether to use query cache for faster repeated queries
|
|
197
|
+
maximum_bytes_billed: Maximum bytes that can be billed for queries to prevent runaway costs
|
|
198
|
+
enable_bigquery_ml: Enable BigQuery ML capabilities for machine learning workflows
|
|
199
|
+
enable_gemini_integration: Enable Gemini in BigQuery for AI-powered analytics and code assistance
|
|
200
|
+
query_timeout_ms: Query timeout in milliseconds
|
|
201
|
+
job_timeout_ms: Job timeout in milliseconds
|
|
202
|
+
reservation_id: Reservation ID for slot allocation and workload management
|
|
203
|
+
edition: BigQuery edition (Standard, Enterprise, Enterprise Plus)
|
|
204
|
+
enable_cross_cloud: Enable cross-cloud data access (AWS S3, Azure Blob Storage)
|
|
205
|
+
enable_bigquery_omni: Enable BigQuery Omni for multi-cloud analytics
|
|
206
|
+
use_avro_logical_types: Use Avro logical types for better type preservation
|
|
207
|
+
parquet_enable_list_inference: Enable automatic list inference for Parquet data
|
|
208
|
+
enable_column_level_security: Enable column-level access controls and data masking
|
|
209
|
+
enable_row_level_security: Enable row-level security policies
|
|
210
|
+
enable_dataframes: Enable BigQuery DataFrames for Python-based analytics
|
|
211
|
+
dataframes_backend: Backend for BigQuery DataFrames (e.g., 'bigframes')
|
|
212
|
+
enable_continuous_queries: Enable continuous queries for real-time data processing
|
|
213
|
+
enable_vector_search: Enable vector search capabilities for AI/ML workloads
|
|
214
|
+
on_connection_create: Callback executed when connection is created
|
|
215
|
+
on_job_start: Callback executed when a BigQuery job starts
|
|
216
|
+
on_job_complete: Callback executed when a BigQuery job completes
|
|
217
|
+
**kwargs: Additional parameters (stored in extras)
|
|
218
|
+
|
|
219
|
+
Example:
|
|
220
|
+
>>> # Basic BigQuery connection
|
|
221
|
+
>>> config = BigQueryConfig(project="my-project", location="US")
|
|
222
|
+
|
|
223
|
+
>>> # Advanced configuration with ML and AI features
|
|
224
|
+
>>> config = BigQueryConfig(
|
|
225
|
+
... project="my-project",
|
|
226
|
+
... location="US",
|
|
227
|
+
... enable_bigquery_ml=True,
|
|
228
|
+
... enable_gemini_integration=True,
|
|
229
|
+
... enable_dataframes=True,
|
|
230
|
+
... enable_vector_search=True,
|
|
231
|
+
... maximum_bytes_billed=1000000000, # 1GB limit
|
|
232
|
+
... )
|
|
233
|
+
|
|
234
|
+
>>> # Enterprise configuration with reservations
|
|
235
|
+
>>> config = BigQueryConfig(
|
|
236
|
+
... project="my-project",
|
|
237
|
+
... location="US",
|
|
238
|
+
... edition="Enterprise Plus",
|
|
239
|
+
... reservation_id="my-reservation",
|
|
240
|
+
... enable_continuous_queries=True,
|
|
241
|
+
... enable_cross_cloud=True,
|
|
242
|
+
... )
|
|
243
|
+
"""
|
|
244
|
+
# Store connection parameters as instance attributes
|
|
245
|
+
self.project = project
|
|
246
|
+
self.location = location
|
|
247
|
+
self.credentials = credentials
|
|
248
|
+
self.dataset_id = dataset_id
|
|
249
|
+
self.credentials_path = credentials_path
|
|
250
|
+
self.client_options = client_options
|
|
251
|
+
self.client_info = client_info
|
|
252
|
+
self.default_query_job_config = default_query_job_config
|
|
253
|
+
self.default_load_job_config = default_load_job_config
|
|
254
|
+
self.use_query_cache = use_query_cache
|
|
255
|
+
self.maximum_bytes_billed = maximum_bytes_billed
|
|
256
|
+
self.enable_bigquery_ml = enable_bigquery_ml
|
|
257
|
+
self.enable_gemini_integration = enable_gemini_integration
|
|
258
|
+
self.query_timeout_ms = query_timeout_ms
|
|
259
|
+
self.job_timeout_ms = job_timeout_ms
|
|
260
|
+
self.reservation_id = reservation_id
|
|
261
|
+
self.edition = edition
|
|
262
|
+
self.enable_cross_cloud = enable_cross_cloud
|
|
263
|
+
self.enable_bigquery_omni = enable_bigquery_omni
|
|
264
|
+
self.use_avro_logical_types = use_avro_logical_types
|
|
265
|
+
self.parquet_enable_list_inference = parquet_enable_list_inference
|
|
266
|
+
self.enable_column_level_security = enable_column_level_security
|
|
267
|
+
self.enable_row_level_security = enable_row_level_security
|
|
268
|
+
self.enable_dataframes = enable_dataframes
|
|
269
|
+
self.dataframes_backend = dataframes_backend
|
|
270
|
+
self.enable_continuous_queries = enable_continuous_queries
|
|
271
|
+
self.enable_vector_search = enable_vector_search
|
|
272
|
+
|
|
273
|
+
self.extras = kwargs or {}
|
|
274
|
+
|
|
275
|
+
# Store other config
|
|
276
|
+
self.statement_config = statement_config or SQLConfig()
|
|
277
|
+
self.default_row_type = default_row_type
|
|
278
|
+
self.on_connection_create = on_connection_create
|
|
279
|
+
self.on_job_start = on_job_start
|
|
280
|
+
self.on_job_complete = on_job_complete
|
|
281
|
+
|
|
282
|
+
# Set up default query job config if not provided
|
|
283
|
+
if self.default_query_job_config is None:
|
|
284
|
+
self._setup_default_job_config()
|
|
285
|
+
|
|
286
|
+
# Store connection instance for reuse (BigQuery doesn't support traditional pooling)
|
|
287
|
+
self._connection_instance: Optional[BigQueryConnection] = None
|
|
288
|
+
self._dialect: DialectType = None
|
|
289
|
+
|
|
290
|
+
super().__init__()
|
|
291
|
+
|
|
292
|
+
def _setup_default_job_config(self) -> None:
|
|
293
|
+
"""Set up default job configuration based on connection settings."""
|
|
294
|
+
job_config = QueryJobConfig()
|
|
295
|
+
|
|
296
|
+
if self.dataset_id and self.project and "." not in self.dataset_id:
|
|
297
|
+
job_config.default_dataset = f"{self.project}.{self.dataset_id}"
|
|
298
|
+
if self.use_query_cache is not None:
|
|
299
|
+
job_config.use_query_cache = self.use_query_cache
|
|
300
|
+
else:
|
|
301
|
+
job_config.use_query_cache = True # Default to True
|
|
302
|
+
|
|
303
|
+
# Configure cost controls
|
|
304
|
+
if self.maximum_bytes_billed is not None:
|
|
305
|
+
job_config.maximum_bytes_billed = self.maximum_bytes_billed
|
|
306
|
+
|
|
307
|
+
# Configure timeouts
|
|
308
|
+
if self.query_timeout_ms is not None:
|
|
309
|
+
job_config.job_timeout_ms = self.query_timeout_ms
|
|
310
|
+
|
|
311
|
+
self.default_query_job_config = job_config
|
|
312
|
+
|
|
313
|
+
@property
|
|
314
|
+
def connection_config_dict(self) -> dict[str, Any]:
|
|
315
|
+
"""Return the connection configuration as a dict for BigQuery Client constructor.
|
|
316
|
+
|
|
317
|
+
Filters out BigQuery-specific enhancement flags and formats parameters
|
|
318
|
+
appropriately for the google.cloud.bigquery.Client constructor.
|
|
319
|
+
|
|
320
|
+
Returns:
|
|
321
|
+
Configuration dict for BigQuery Client constructor.
|
|
322
|
+
"""
|
|
323
|
+
client_fields = {"project", "location", "credentials", "client_options", "client_info"}
|
|
324
|
+
config = {
|
|
325
|
+
field: getattr(self, field)
|
|
326
|
+
for field in client_fields
|
|
327
|
+
if getattr(self, field, None) is not None and getattr(self, field) is not Empty
|
|
328
|
+
}
|
|
329
|
+
config.update(self.extras)
|
|
330
|
+
|
|
331
|
+
return config
|
|
332
|
+
|
|
333
|
+
def create_connection(self) -> BigQueryConnection:
|
|
334
|
+
"""Create and return a new BigQuery Client instance.
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
A new BigQuery Client instance.
|
|
338
|
+
|
|
339
|
+
Raises:
|
|
340
|
+
ImproperConfigurationError: If the connection could not be established.
|
|
341
|
+
"""
|
|
342
|
+
|
|
343
|
+
if self._connection_instance is not None:
|
|
344
|
+
return self._connection_instance
|
|
345
|
+
|
|
346
|
+
try:
|
|
347
|
+
config_dict = self.connection_config_dict
|
|
348
|
+
|
|
349
|
+
connection = self.connection_type(**config_dict)
|
|
350
|
+
if self.on_connection_create:
|
|
351
|
+
self.on_connection_create(connection)
|
|
352
|
+
|
|
353
|
+
self._connection_instance = connection
|
|
354
|
+
|
|
355
|
+
except Exception as e:
|
|
356
|
+
msg = f"Could not configure BigQuery connection for project '{self.project or 'Unknown'}'. Error: {e}"
|
|
357
|
+
raise ImproperConfigurationError(msg) from e
|
|
358
|
+
return connection
|
|
359
|
+
|
|
360
|
+
@contextlib.contextmanager
|
|
361
|
+
def provide_connection(self, *args: Any, **kwargs: Any) -> "Generator[BigQueryConnection, None, None]":
|
|
362
|
+
"""Provide a BigQuery client within a context manager.
|
|
363
|
+
|
|
364
|
+
Args:
|
|
365
|
+
*args: Additional arguments.
|
|
366
|
+
**kwargs: Additional keyword arguments.
|
|
367
|
+
|
|
368
|
+
Yields:
|
|
369
|
+
A BigQuery Client instance.
|
|
370
|
+
"""
|
|
371
|
+
connection = self.create_connection()
|
|
372
|
+
yield connection
|
|
373
|
+
|
|
374
|
+
def provide_session(self, *args: Any, **kwargs: Any) -> "AbstractContextManager[BigQueryDriver]":
|
|
375
|
+
"""Provide a BigQuery driver session context manager.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
*args: Additional arguments.
|
|
379
|
+
**kwargs: Additional keyword arguments.
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
A context manager that yields a BigQueryDriver instance.
|
|
383
|
+
"""
|
|
384
|
+
|
|
385
|
+
@contextlib.contextmanager
|
|
386
|
+
def session_manager() -> "Generator[BigQueryDriver, None, None]":
|
|
387
|
+
with self.provide_connection(*args, **kwargs) as connection:
|
|
388
|
+
# Create statement config with parameter style info if not already set
|
|
389
|
+
statement_config = self.statement_config
|
|
390
|
+
if statement_config.allowed_parameter_styles is None:
|
|
391
|
+
statement_config = replace(
|
|
392
|
+
statement_config,
|
|
393
|
+
allowed_parameter_styles=self.supported_parameter_styles,
|
|
394
|
+
target_parameter_style=self.preferred_parameter_style,
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
driver = self.driver_type(
|
|
398
|
+
connection=connection,
|
|
399
|
+
config=statement_config,
|
|
400
|
+
default_row_type=self.default_row_type,
|
|
401
|
+
default_query_job_config=self.default_query_job_config,
|
|
402
|
+
on_job_start=self.on_job_start,
|
|
403
|
+
on_job_complete=self.on_job_complete,
|
|
404
|
+
)
|
|
405
|
+
yield driver
|
|
406
|
+
|
|
407
|
+
return session_manager()
|