sqlspec 0.13.1__py3-none-any.whl → 0.16.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- sqlspec/__init__.py +71 -8
- sqlspec/__main__.py +12 -0
- sqlspec/__metadata__.py +1 -3
- sqlspec/_serialization.py +1 -2
- sqlspec/_sql.py +930 -136
- sqlspec/_typing.py +278 -142
- sqlspec/adapters/adbc/__init__.py +4 -3
- sqlspec/adapters/adbc/_types.py +12 -0
- sqlspec/adapters/adbc/config.py +116 -285
- sqlspec/adapters/adbc/driver.py +462 -340
- sqlspec/adapters/aiosqlite/__init__.py +18 -3
- sqlspec/adapters/aiosqlite/_types.py +13 -0
- sqlspec/adapters/aiosqlite/config.py +202 -150
- sqlspec/adapters/aiosqlite/driver.py +226 -247
- sqlspec/adapters/asyncmy/__init__.py +18 -3
- sqlspec/adapters/asyncmy/_types.py +12 -0
- sqlspec/adapters/asyncmy/config.py +80 -199
- sqlspec/adapters/asyncmy/driver.py +257 -215
- sqlspec/adapters/asyncpg/__init__.py +19 -4
- sqlspec/adapters/asyncpg/_types.py +17 -0
- sqlspec/adapters/asyncpg/config.py +81 -214
- sqlspec/adapters/asyncpg/driver.py +284 -359
- sqlspec/adapters/bigquery/__init__.py +17 -3
- sqlspec/adapters/bigquery/_types.py +12 -0
- sqlspec/adapters/bigquery/config.py +191 -299
- sqlspec/adapters/bigquery/driver.py +474 -634
- sqlspec/adapters/duckdb/__init__.py +14 -3
- sqlspec/adapters/duckdb/_types.py +12 -0
- sqlspec/adapters/duckdb/config.py +414 -397
- sqlspec/adapters/duckdb/driver.py +342 -393
- sqlspec/adapters/oracledb/__init__.py +19 -5
- sqlspec/adapters/oracledb/_types.py +14 -0
- sqlspec/adapters/oracledb/config.py +123 -458
- sqlspec/adapters/oracledb/driver.py +505 -531
- sqlspec/adapters/psqlpy/__init__.py +13 -3
- sqlspec/adapters/psqlpy/_types.py +11 -0
- sqlspec/adapters/psqlpy/config.py +93 -307
- sqlspec/adapters/psqlpy/driver.py +504 -213
- sqlspec/adapters/psycopg/__init__.py +19 -5
- sqlspec/adapters/psycopg/_types.py +17 -0
- sqlspec/adapters/psycopg/config.py +143 -472
- sqlspec/adapters/psycopg/driver.py +704 -825
- sqlspec/adapters/sqlite/__init__.py +14 -3
- sqlspec/adapters/sqlite/_types.py +11 -0
- sqlspec/adapters/sqlite/config.py +208 -142
- sqlspec/adapters/sqlite/driver.py +263 -278
- sqlspec/base.py +105 -9
- sqlspec/{statement/builder → builder}/__init__.py +12 -14
- sqlspec/{statement/builder/base.py → builder/_base.py} +184 -86
- sqlspec/{statement/builder/column.py → builder/_column.py} +97 -60
- sqlspec/{statement/builder/ddl.py → builder/_ddl.py} +61 -131
- sqlspec/{statement/builder → builder}/_ddl_utils.py +4 -10
- sqlspec/{statement/builder/delete.py → builder/_delete.py} +10 -30
- sqlspec/builder/_insert.py +421 -0
- sqlspec/builder/_merge.py +71 -0
- sqlspec/{statement/builder → builder}/_parsing_utils.py +49 -26
- sqlspec/builder/_select.py +170 -0
- sqlspec/{statement/builder/update.py → builder/_update.py} +16 -20
- sqlspec/builder/mixins/__init__.py +55 -0
- sqlspec/builder/mixins/_cte_and_set_ops.py +222 -0
- sqlspec/{statement/builder/mixins/_delete_from.py → builder/mixins/_delete_operations.py} +8 -1
- sqlspec/builder/mixins/_insert_operations.py +244 -0
- sqlspec/{statement/builder/mixins/_join.py → builder/mixins/_join_operations.py} +45 -13
- sqlspec/{statement/builder/mixins/_merge_clauses.py → builder/mixins/_merge_operations.py} +188 -30
- sqlspec/builder/mixins/_order_limit_operations.py +135 -0
- sqlspec/builder/mixins/_pivot_operations.py +153 -0
- sqlspec/builder/mixins/_select_operations.py +604 -0
- sqlspec/builder/mixins/_update_operations.py +202 -0
- sqlspec/builder/mixins/_where_clause.py +644 -0
- sqlspec/cli.py +247 -0
- sqlspec/config.py +183 -138
- sqlspec/core/__init__.py +63 -0
- sqlspec/core/cache.py +871 -0
- sqlspec/core/compiler.py +417 -0
- sqlspec/core/filters.py +830 -0
- sqlspec/core/hashing.py +310 -0
- sqlspec/core/parameters.py +1237 -0
- sqlspec/core/result.py +677 -0
- sqlspec/{statement → core}/splitter.py +321 -191
- sqlspec/core/statement.py +676 -0
- sqlspec/driver/__init__.py +7 -10
- sqlspec/driver/_async.py +422 -163
- sqlspec/driver/_common.py +545 -287
- sqlspec/driver/_sync.py +426 -160
- sqlspec/driver/mixins/__init__.py +2 -13
- sqlspec/driver/mixins/_result_tools.py +193 -0
- sqlspec/driver/mixins/_sql_translator.py +65 -14
- sqlspec/exceptions.py +5 -252
- sqlspec/extensions/aiosql/adapter.py +93 -96
- sqlspec/extensions/litestar/__init__.py +2 -1
- sqlspec/extensions/litestar/cli.py +48 -0
- sqlspec/extensions/litestar/config.py +0 -1
- sqlspec/extensions/litestar/handlers.py +15 -26
- sqlspec/extensions/litestar/plugin.py +21 -16
- sqlspec/extensions/litestar/providers.py +17 -52
- sqlspec/loader.py +423 -104
- sqlspec/migrations/__init__.py +35 -0
- sqlspec/migrations/base.py +414 -0
- sqlspec/migrations/commands.py +443 -0
- sqlspec/migrations/loaders.py +402 -0
- sqlspec/migrations/runner.py +213 -0
- sqlspec/migrations/tracker.py +140 -0
- sqlspec/migrations/utils.py +129 -0
- sqlspec/protocols.py +51 -186
- sqlspec/storage/__init__.py +1 -1
- sqlspec/storage/backends/base.py +37 -40
- sqlspec/storage/backends/fsspec.py +136 -112
- sqlspec/storage/backends/obstore.py +138 -160
- sqlspec/storage/capabilities.py +5 -4
- sqlspec/storage/registry.py +57 -106
- sqlspec/typing.py +136 -115
- sqlspec/utils/__init__.py +2 -2
- sqlspec/utils/correlation.py +0 -3
- sqlspec/utils/deprecation.py +6 -6
- sqlspec/utils/fixtures.py +6 -6
- sqlspec/utils/logging.py +0 -2
- sqlspec/utils/module_loader.py +7 -12
- sqlspec/utils/singleton.py +0 -1
- sqlspec/utils/sync_tools.py +17 -38
- sqlspec/utils/text.py +12 -51
- sqlspec/utils/type_guards.py +482 -235
- {sqlspec-0.13.1.dist-info → sqlspec-0.16.2.dist-info}/METADATA +7 -2
- sqlspec-0.16.2.dist-info/RECORD +134 -0
- sqlspec-0.16.2.dist-info/entry_points.txt +2 -0
- sqlspec/driver/connection.py +0 -207
- sqlspec/driver/mixins/_csv_writer.py +0 -91
- sqlspec/driver/mixins/_pipeline.py +0 -512
- sqlspec/driver/mixins/_result_utils.py +0 -140
- sqlspec/driver/mixins/_storage.py +0 -926
- sqlspec/driver/mixins/_type_coercion.py +0 -130
- sqlspec/driver/parameters.py +0 -138
- sqlspec/service/__init__.py +0 -4
- sqlspec/service/_util.py +0 -147
- sqlspec/service/base.py +0 -1131
- sqlspec/service/pagination.py +0 -26
- sqlspec/statement/__init__.py +0 -21
- sqlspec/statement/builder/insert.py +0 -288
- sqlspec/statement/builder/merge.py +0 -95
- sqlspec/statement/builder/mixins/__init__.py +0 -65
- sqlspec/statement/builder/mixins/_aggregate_functions.py +0 -250
- sqlspec/statement/builder/mixins/_case_builder.py +0 -91
- sqlspec/statement/builder/mixins/_common_table_expr.py +0 -90
- sqlspec/statement/builder/mixins/_from.py +0 -63
- sqlspec/statement/builder/mixins/_group_by.py +0 -118
- sqlspec/statement/builder/mixins/_having.py +0 -35
- sqlspec/statement/builder/mixins/_insert_from_select.py +0 -47
- sqlspec/statement/builder/mixins/_insert_into.py +0 -36
- sqlspec/statement/builder/mixins/_insert_values.py +0 -67
- sqlspec/statement/builder/mixins/_limit_offset.py +0 -53
- sqlspec/statement/builder/mixins/_order_by.py +0 -46
- sqlspec/statement/builder/mixins/_pivot.py +0 -79
- sqlspec/statement/builder/mixins/_returning.py +0 -37
- sqlspec/statement/builder/mixins/_select_columns.py +0 -61
- sqlspec/statement/builder/mixins/_set_ops.py +0 -122
- sqlspec/statement/builder/mixins/_unpivot.py +0 -77
- sqlspec/statement/builder/mixins/_update_from.py +0 -55
- sqlspec/statement/builder/mixins/_update_set.py +0 -94
- sqlspec/statement/builder/mixins/_update_table.py +0 -29
- sqlspec/statement/builder/mixins/_where.py +0 -401
- sqlspec/statement/builder/mixins/_window_functions.py +0 -86
- sqlspec/statement/builder/select.py +0 -221
- sqlspec/statement/filters.py +0 -596
- sqlspec/statement/parameter_manager.py +0 -220
- sqlspec/statement/parameters.py +0 -867
- sqlspec/statement/pipelines/__init__.py +0 -210
- sqlspec/statement/pipelines/analyzers/__init__.py +0 -9
- sqlspec/statement/pipelines/analyzers/_analyzer.py +0 -646
- sqlspec/statement/pipelines/context.py +0 -115
- sqlspec/statement/pipelines/transformers/__init__.py +0 -7
- sqlspec/statement/pipelines/transformers/_expression_simplifier.py +0 -88
- sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +0 -1247
- sqlspec/statement/pipelines/transformers/_remove_comments_and_hints.py +0 -76
- sqlspec/statement/pipelines/validators/__init__.py +0 -23
- sqlspec/statement/pipelines/validators/_dml_safety.py +0 -290
- sqlspec/statement/pipelines/validators/_parameter_style.py +0 -370
- sqlspec/statement/pipelines/validators/_performance.py +0 -718
- sqlspec/statement/pipelines/validators/_security.py +0 -967
- sqlspec/statement/result.py +0 -435
- sqlspec/statement/sql.py +0 -1704
- sqlspec/statement/sql_compiler.py +0 -140
- sqlspec/utils/cached_property.py +0 -25
- sqlspec-0.13.1.dist-info/RECORD +0 -150
- {sqlspec-0.13.1.dist-info → sqlspec-0.16.2.dist-info}/WHEEL +0 -0
- {sqlspec-0.13.1.dist-info → sqlspec-0.16.2.dist-info}/licenses/LICENSE +0 -0
- {sqlspec-0.13.1.dist-info → sqlspec-0.16.2.dist-info}/licenses/NOTICE +0 -0
|
@@ -1,4 +1,18 @@
|
|
|
1
|
-
from sqlspec.adapters.bigquery.
|
|
2
|
-
from sqlspec.adapters.bigquery.
|
|
1
|
+
from sqlspec.adapters.bigquery._types import BigQueryConnection
|
|
2
|
+
from sqlspec.adapters.bigquery.config import BigQueryConfig, BigQueryConnectionParams
|
|
3
|
+
from sqlspec.adapters.bigquery.driver import (
|
|
4
|
+
BigQueryCursor,
|
|
5
|
+
BigQueryDriver,
|
|
6
|
+
BigQueryExceptionHandler,
|
|
7
|
+
bigquery_statement_config,
|
|
8
|
+
)
|
|
3
9
|
|
|
4
|
-
__all__ = (
|
|
10
|
+
__all__ = (
|
|
11
|
+
"BigQueryConfig",
|
|
12
|
+
"BigQueryConnection",
|
|
13
|
+
"BigQueryConnectionParams",
|
|
14
|
+
"BigQueryCursor",
|
|
15
|
+
"BigQueryDriver",
|
|
16
|
+
"BigQueryExceptionHandler",
|
|
17
|
+
"bigquery_statement_config",
|
|
18
|
+
)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING
|
|
2
|
+
|
|
3
|
+
from google.cloud.bigquery import Client
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from typing_extensions import TypeAlias
|
|
7
|
+
|
|
8
|
+
BigQueryConnection: TypeAlias = Client
|
|
9
|
+
else:
|
|
10
|
+
BigQueryConnection = Client
|
|
11
|
+
|
|
12
|
+
__all__ = ("BigQueryConnection",)
|
|
@@ -2,60 +2,81 @@
|
|
|
2
2
|
|
|
3
3
|
import contextlib
|
|
4
4
|
import logging
|
|
5
|
-
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional, TypedDict, Union
|
|
6
6
|
|
|
7
7
|
from google.cloud.bigquery import LoadJobConfig, QueryJobConfig
|
|
8
|
+
from typing_extensions import NotRequired
|
|
8
9
|
|
|
9
|
-
from sqlspec.adapters.bigquery.
|
|
10
|
+
from sqlspec.adapters.bigquery._types import BigQueryConnection
|
|
11
|
+
from sqlspec.adapters.bigquery.driver import BigQueryCursor, BigQueryDriver, bigquery_statement_config
|
|
10
12
|
from sqlspec.config import NoPoolSyncConfig
|
|
11
13
|
from sqlspec.exceptions import ImproperConfigurationError
|
|
12
|
-
from sqlspec.
|
|
13
|
-
from sqlspec.typing import DictRow, Empty
|
|
14
|
+
from sqlspec.typing import Empty
|
|
14
15
|
|
|
15
16
|
if TYPE_CHECKING:
|
|
16
17
|
from collections.abc import Generator
|
|
17
|
-
from contextlib import AbstractContextManager
|
|
18
18
|
|
|
19
19
|
from google.api_core.client_info import ClientInfo
|
|
20
20
|
from google.api_core.client_options import ClientOptions
|
|
21
21
|
from google.auth.credentials import Credentials
|
|
22
|
-
|
|
22
|
+
|
|
23
|
+
from sqlspec.core.statement import StatementConfig
|
|
24
|
+
|
|
23
25
|
|
|
24
26
|
logger = logging.getLogger(__name__)
|
|
25
27
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
28
|
+
|
|
29
|
+
class BigQueryConnectionParams(TypedDict, total=False):
|
|
30
|
+
"""Standard BigQuery connection parameters.
|
|
31
|
+
|
|
32
|
+
Includes both official BigQuery client parameters and BigQuery-specific configuration options.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
# Official BigQuery client constructor parameters
|
|
36
|
+
project: NotRequired[str]
|
|
37
|
+
location: NotRequired[str]
|
|
38
|
+
credentials: NotRequired["Credentials"]
|
|
39
|
+
client_options: NotRequired["ClientOptions"]
|
|
40
|
+
client_info: NotRequired["ClientInfo"]
|
|
41
|
+
|
|
42
|
+
# BigQuery-specific configuration options
|
|
43
|
+
default_query_job_config: NotRequired[QueryJobConfig]
|
|
44
|
+
default_load_job_config: NotRequired[LoadJobConfig]
|
|
45
|
+
dataset_id: NotRequired[str]
|
|
46
|
+
credentials_path: NotRequired[str]
|
|
47
|
+
use_query_cache: NotRequired[bool]
|
|
48
|
+
maximum_bytes_billed: NotRequired[int]
|
|
49
|
+
enable_bigquery_ml: NotRequired[bool]
|
|
50
|
+
enable_gemini_integration: NotRequired[bool]
|
|
51
|
+
query_timeout_ms: NotRequired[int]
|
|
52
|
+
job_timeout_ms: NotRequired[int]
|
|
53
|
+
reservation_id: NotRequired[str]
|
|
54
|
+
edition: NotRequired[str]
|
|
55
|
+
enable_cross_cloud: NotRequired[bool]
|
|
56
|
+
enable_bigquery_omni: NotRequired[bool]
|
|
57
|
+
use_avro_logical_types: NotRequired[bool]
|
|
58
|
+
parquet_enable_list_inference: NotRequired[bool]
|
|
59
|
+
enable_column_level_security: NotRequired[bool]
|
|
60
|
+
enable_row_level_security: NotRequired[bool]
|
|
61
|
+
enable_dataframes: NotRequired[bool]
|
|
62
|
+
dataframes_backend: NotRequired[str]
|
|
63
|
+
enable_continuous_queries: NotRequired[bool]
|
|
64
|
+
enable_vector_search: NotRequired[bool]
|
|
65
|
+
extra: NotRequired[dict[str, Any]]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class BigQueryDriverFeatures(TypedDict, total=False):
|
|
69
|
+
"""BigQuery driver-specific features configuration.
|
|
70
|
+
|
|
71
|
+
Only non-standard BigQuery client parameters that are SQLSpec-specific extensions.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
on_job_start: NotRequired["Callable[[str], None]"]
|
|
75
|
+
on_job_complete: NotRequired["Callable[[str, Any], None]"]
|
|
76
|
+
on_connection_create: NotRequired["Callable[[Any], None]"]
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
__all__ = ("BigQueryConfig", "BigQueryConnectionParams", "BigQueryDriverFeatures")
|
|
59
80
|
|
|
60
81
|
|
|
61
82
|
class BigQueryConfig(NoPoolSyncConfig[BigQueryConnection, BigQueryDriver]):
|
|
@@ -64,269 +85,120 @@ class BigQueryConfig(NoPoolSyncConfig[BigQueryConnection, BigQueryDriver]):
|
|
|
64
85
|
BigQuery is Google Cloud's serverless, highly scalable data warehouse with
|
|
65
86
|
advanced analytics, machine learning, and AI capabilities. This configuration
|
|
66
87
|
supports all BigQuery features including:
|
|
67
|
-
|
|
68
|
-
- Gemini in BigQuery for AI-powered analytics
|
|
69
|
-
- BigQuery ML for machine learning workflows
|
|
70
|
-
- BigQuery DataFrames for Python-based analytics
|
|
71
|
-
- Multi-modal data analysis (text, images, video, audio)
|
|
72
|
-
- Cross-cloud data access (AWS S3, Azure Blob Storage)
|
|
73
|
-
- Vector search and embeddings
|
|
74
|
-
- Continuous queries for real-time processing
|
|
75
|
-
- Advanced security and governance features
|
|
76
|
-
- Parquet and Arrow format optimization
|
|
77
88
|
"""
|
|
78
89
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
"_dialect",
|
|
82
|
-
"client_info",
|
|
83
|
-
"client_options",
|
|
84
|
-
"credentials",
|
|
85
|
-
"credentials_path",
|
|
86
|
-
"dataframes_backend",
|
|
87
|
-
"dataset_id",
|
|
88
|
-
"default_load_job_config",
|
|
89
|
-
"default_query_job_config",
|
|
90
|
-
"default_row_type",
|
|
91
|
-
"edition",
|
|
92
|
-
"enable_bigquery_ml",
|
|
93
|
-
"enable_bigquery_omni",
|
|
94
|
-
"enable_column_level_security",
|
|
95
|
-
"enable_continuous_queries",
|
|
96
|
-
"enable_cross_cloud",
|
|
97
|
-
"enable_dataframes",
|
|
98
|
-
"enable_gemini_integration",
|
|
99
|
-
"enable_row_level_security",
|
|
100
|
-
"enable_vector_search",
|
|
101
|
-
"extras",
|
|
102
|
-
"job_timeout_ms",
|
|
103
|
-
"location",
|
|
104
|
-
"maximum_bytes_billed",
|
|
105
|
-
"on_connection_create",
|
|
106
|
-
"on_job_complete",
|
|
107
|
-
"on_job_start",
|
|
108
|
-
"parquet_enable_list_inference",
|
|
109
|
-
"pool_instance",
|
|
110
|
-
"project",
|
|
111
|
-
"query_timeout_ms",
|
|
112
|
-
"reservation_id",
|
|
113
|
-
"statement_config",
|
|
114
|
-
"use_avro_logical_types",
|
|
115
|
-
"use_query_cache",
|
|
116
|
-
)
|
|
117
|
-
|
|
118
|
-
is_async: ClassVar[bool] = False
|
|
119
|
-
supports_connection_pooling: ClassVar[bool] = False
|
|
120
|
-
|
|
121
|
-
driver_type: type[BigQueryDriver] = BigQueryDriver
|
|
122
|
-
connection_type: type[BigQueryConnection] = BigQueryConnection
|
|
123
|
-
|
|
124
|
-
# Parameter style support information
|
|
125
|
-
supported_parameter_styles: ClassVar[tuple[str, ...]] = ("named_at",)
|
|
126
|
-
"""BigQuery only supports @name (named_at) parameter style."""
|
|
127
|
-
|
|
128
|
-
preferred_parameter_style: ClassVar[str] = "named_at"
|
|
129
|
-
"""BigQuery's native parameter style is @name (named_at)."""
|
|
90
|
+
driver_type: ClassVar[type[BigQueryDriver]] = BigQueryDriver
|
|
91
|
+
connection_type: "ClassVar[type[BigQueryConnection]]" = BigQueryConnection
|
|
130
92
|
|
|
131
93
|
def __init__(
|
|
132
94
|
self,
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
dataset_id: Optional[str] = None,
|
|
140
|
-
credentials_path: Optional[str] = None,
|
|
141
|
-
# Client configuration
|
|
142
|
-
client_options: Optional["ClientOptions"] = None,
|
|
143
|
-
client_info: Optional["ClientInfo"] = None,
|
|
144
|
-
# Job configuration
|
|
145
|
-
default_query_job_config: Optional["QueryJobConfig"] = None,
|
|
146
|
-
default_load_job_config: Optional["LoadJobConfig"] = None,
|
|
147
|
-
# Advanced BigQuery features
|
|
148
|
-
use_query_cache: Optional[bool] = None,
|
|
149
|
-
maximum_bytes_billed: Optional[int] = None,
|
|
150
|
-
# BigQuery ML and AI configuration
|
|
151
|
-
enable_bigquery_ml: Optional[bool] = None,
|
|
152
|
-
enable_gemini_integration: Optional[bool] = None,
|
|
153
|
-
# Performance and scaling options
|
|
154
|
-
query_timeout_ms: Optional[int] = None,
|
|
155
|
-
job_timeout_ms: Optional[int] = None,
|
|
156
|
-
# BigQuery editions and reservations
|
|
157
|
-
reservation_id: Optional[str] = None,
|
|
158
|
-
edition: Optional[str] = None,
|
|
159
|
-
# Cross-cloud and external data options
|
|
160
|
-
enable_cross_cloud: Optional[bool] = None,
|
|
161
|
-
enable_bigquery_omni: Optional[bool] = None,
|
|
162
|
-
# Storage and format options
|
|
163
|
-
use_avro_logical_types: Optional[bool] = None,
|
|
164
|
-
parquet_enable_list_inference: Optional[bool] = None,
|
|
165
|
-
# Security and governance
|
|
166
|
-
enable_column_level_security: Optional[bool] = None,
|
|
167
|
-
enable_row_level_security: Optional[bool] = None,
|
|
168
|
-
# DataFrames and Python integration
|
|
169
|
-
enable_dataframes: Optional[bool] = None,
|
|
170
|
-
dataframes_backend: Optional[str] = None,
|
|
171
|
-
# Continuous queries and real-time processing
|
|
172
|
-
enable_continuous_queries: Optional[bool] = None,
|
|
173
|
-
# Vector search and embeddings
|
|
174
|
-
enable_vector_search: Optional[bool] = None,
|
|
175
|
-
# Callback functions
|
|
176
|
-
on_connection_create: Optional[Callable[[BigQueryConnection], None]] = None,
|
|
177
|
-
on_job_start: Optional[Callable[[str], None]] = None,
|
|
178
|
-
on_job_complete: Optional[Callable[[str, Any], None]] = None,
|
|
179
|
-
**kwargs: Any,
|
|
95
|
+
*,
|
|
96
|
+
connection_instance: "Optional[BigQueryConnection]" = None,
|
|
97
|
+
connection_config: "Optional[Union[BigQueryConnectionParams, dict[str, Any]]]" = None,
|
|
98
|
+
migration_config: Optional[dict[str, Any]] = None,
|
|
99
|
+
statement_config: "Optional[StatementConfig]" = None,
|
|
100
|
+
driver_features: "Optional[Union[BigQueryDriverFeatures, dict[str, Any]]]" = None,
|
|
180
101
|
) -> None:
|
|
181
102
|
"""Initialize BigQuery configuration with comprehensive feature support.
|
|
182
103
|
|
|
183
104
|
Args:
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
dataset_id: Default dataset ID to use if not specified in queries
|
|
190
|
-
credentials_path: Path to Google Cloud service account key file (JSON)
|
|
191
|
-
client_options: Client options used to set user options on the client
|
|
192
|
-
client_info: Client info used to send a user-agent string along with API requests
|
|
193
|
-
default_query_job_config: Default QueryJobConfig settings for query operations
|
|
194
|
-
default_load_job_config: Default LoadJobConfig settings for data loading operations
|
|
195
|
-
use_query_cache: Whether to use query cache for faster repeated queries
|
|
196
|
-
maximum_bytes_billed: Maximum bytes that can be billed for queries to prevent runaway costs
|
|
197
|
-
enable_bigquery_ml: Enable BigQuery ML capabilities for machine learning workflows
|
|
198
|
-
enable_gemini_integration: Enable Gemini in BigQuery for AI-powered analytics and code assistance
|
|
199
|
-
query_timeout_ms: Query timeout in milliseconds
|
|
200
|
-
job_timeout_ms: Job timeout in milliseconds
|
|
201
|
-
reservation_id: Reservation ID for slot allocation and workload management
|
|
202
|
-
edition: BigQuery edition (Standard, Enterprise, Enterprise Plus)
|
|
203
|
-
enable_cross_cloud: Enable cross-cloud data access (AWS S3, Azure Blob Storage)
|
|
204
|
-
enable_bigquery_omni: Enable BigQuery Omni for multi-cloud analytics
|
|
205
|
-
use_avro_logical_types: Use Avro logical types for better type preservation
|
|
206
|
-
parquet_enable_list_inference: Enable automatic list inference for Parquet data
|
|
207
|
-
enable_column_level_security: Enable column-level access controls and data masking
|
|
208
|
-
enable_row_level_security: Enable row-level security policies
|
|
209
|
-
enable_dataframes: Enable BigQuery DataFrames for Python-based analytics
|
|
210
|
-
dataframes_backend: Backend for BigQuery DataFrames (e.g., 'bigframes')
|
|
211
|
-
enable_continuous_queries: Enable continuous queries for real-time data processing
|
|
212
|
-
enable_vector_search: Enable vector search capabilities for AI/ML workloads
|
|
213
|
-
on_connection_create: Callback executed when connection is created
|
|
214
|
-
on_job_start: Callback executed when a BigQuery job starts
|
|
215
|
-
on_job_complete: Callback executed when a BigQuery job completes
|
|
216
|
-
**kwargs: Additional parameters (stored in extras)
|
|
105
|
+
connection_config: Standard connection configuration parameters
|
|
106
|
+
connection_instance: Existing connection instance to use
|
|
107
|
+
migration_config: Migration configuration
|
|
108
|
+
statement_config: Statement configuration override
|
|
109
|
+
driver_features: BigQuery-specific driver features and configurations
|
|
217
110
|
|
|
218
111
|
Example:
|
|
219
112
|
>>> # Basic BigQuery connection
|
|
220
|
-
>>> config = BigQueryConfig(
|
|
113
|
+
>>> config = BigQueryConfig(
|
|
114
|
+
... connection_config={
|
|
115
|
+
... "project": "my-project",
|
|
116
|
+
... "location": "US",
|
|
117
|
+
... }
|
|
118
|
+
... )
|
|
221
119
|
|
|
222
120
|
>>> # Advanced configuration with ML and AI features
|
|
223
121
|
>>> config = BigQueryConfig(
|
|
224
|
-
...
|
|
225
|
-
...
|
|
226
|
-
...
|
|
227
|
-
...
|
|
228
|
-
...
|
|
229
|
-
...
|
|
230
|
-
...
|
|
122
|
+
... connection_config={
|
|
123
|
+
... "project": "my-project",
|
|
124
|
+
... "location": "US",
|
|
125
|
+
... "enable_bigquery_ml": True,
|
|
126
|
+
... "enable_gemini_integration": True,
|
|
127
|
+
... "enable_dataframes": True,
|
|
128
|
+
... "enable_vector_search": True,
|
|
129
|
+
... "maximum_bytes_billed": 1000000000, # 1GB limit
|
|
130
|
+
... }
|
|
231
131
|
... )
|
|
232
132
|
|
|
233
133
|
>>> # Enterprise configuration with reservations
|
|
234
134
|
>>> config = BigQueryConfig(
|
|
235
|
-
...
|
|
236
|
-
...
|
|
237
|
-
...
|
|
238
|
-
...
|
|
239
|
-
...
|
|
240
|
-
...
|
|
135
|
+
... connection_config={
|
|
136
|
+
... "project": "my-project",
|
|
137
|
+
... "location": "US",
|
|
138
|
+
... "edition": "Enterprise Plus",
|
|
139
|
+
... "reservation_id": "my-reservation",
|
|
140
|
+
... "enable_continuous_queries": True,
|
|
141
|
+
... "enable_cross_cloud": True,
|
|
142
|
+
... }
|
|
241
143
|
... )
|
|
242
144
|
"""
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
self.
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
self.
|
|
249
|
-
self.
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
self.
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
self.
|
|
258
|
-
self.job_timeout_ms = job_timeout_ms
|
|
259
|
-
self.reservation_id = reservation_id
|
|
260
|
-
self.edition = edition
|
|
261
|
-
self.enable_cross_cloud = enable_cross_cloud
|
|
262
|
-
self.enable_bigquery_omni = enable_bigquery_omni
|
|
263
|
-
self.use_avro_logical_types = use_avro_logical_types
|
|
264
|
-
self.parquet_enable_list_inference = parquet_enable_list_inference
|
|
265
|
-
self.enable_column_level_security = enable_column_level_security
|
|
266
|
-
self.enable_row_level_security = enable_row_level_security
|
|
267
|
-
self.enable_dataframes = enable_dataframes
|
|
268
|
-
self.dataframes_backend = dataframes_backend
|
|
269
|
-
self.enable_continuous_queries = enable_continuous_queries
|
|
270
|
-
self.enable_vector_search = enable_vector_search
|
|
271
|
-
|
|
272
|
-
self.extras = kwargs or {}
|
|
273
|
-
|
|
274
|
-
# Store other config
|
|
275
|
-
self.statement_config = statement_config or SQLConfig(dialect="bigquery")
|
|
276
|
-
self.default_row_type = default_row_type
|
|
277
|
-
self.on_connection_create = on_connection_create
|
|
278
|
-
self.on_job_start = on_job_start
|
|
279
|
-
self.on_job_complete = on_job_complete
|
|
280
|
-
|
|
281
|
-
if self.default_query_job_config is None:
|
|
145
|
+
|
|
146
|
+
# Store connection instance
|
|
147
|
+
self._connection_instance = connection_instance
|
|
148
|
+
|
|
149
|
+
# Setup configuration following DuckDB pattern
|
|
150
|
+
self.connection_config: dict[str, Any] = dict(connection_config) if connection_config else {}
|
|
151
|
+
if "extra" in self.connection_config:
|
|
152
|
+
extras = self.connection_config.pop("extra")
|
|
153
|
+
self.connection_config.update(extras)
|
|
154
|
+
|
|
155
|
+
# Setup driver features
|
|
156
|
+
self.driver_features: dict[str, Any] = dict(driver_features) if driver_features else {}
|
|
157
|
+
|
|
158
|
+
# Setup default job config if not provided
|
|
159
|
+
if "default_query_job_config" not in self.connection_config:
|
|
282
160
|
self._setup_default_job_config()
|
|
283
161
|
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
self._dialect: DialectType = None
|
|
162
|
+
if statement_config is None:
|
|
163
|
+
statement_config = bigquery_statement_config
|
|
287
164
|
|
|
288
|
-
super().__init__(
|
|
165
|
+
super().__init__(
|
|
166
|
+
connection_config=self.connection_config,
|
|
167
|
+
migration_config=migration_config,
|
|
168
|
+
statement_config=statement_config,
|
|
169
|
+
driver_features=self.driver_features,
|
|
170
|
+
)
|
|
289
171
|
|
|
290
172
|
def _setup_default_job_config(self) -> None:
|
|
291
|
-
"""Set up default job configuration based on connection
|
|
173
|
+
"""Set up default job configuration based on connection config."""
|
|
174
|
+
# Check if already provided in connection_config
|
|
175
|
+
if self.connection_config.get("default_query_job_config") is not None:
|
|
176
|
+
return
|
|
177
|
+
|
|
292
178
|
job_config = QueryJobConfig()
|
|
293
179
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
if
|
|
297
|
-
job_config.
|
|
180
|
+
dataset_id = self.connection_config.get("dataset_id")
|
|
181
|
+
project = self.connection_config.get("project")
|
|
182
|
+
if dataset_id and project and "." not in dataset_id:
|
|
183
|
+
job_config.default_dataset = f"{project}.{dataset_id}"
|
|
184
|
+
|
|
185
|
+
use_query_cache = self.connection_config.get("use_query_cache")
|
|
186
|
+
if use_query_cache is not None:
|
|
187
|
+
job_config.use_query_cache = use_query_cache
|
|
298
188
|
else:
|
|
299
189
|
job_config.use_query_cache = True # Default to True
|
|
300
190
|
|
|
301
191
|
# Configure cost controls
|
|
302
|
-
|
|
303
|
-
|
|
192
|
+
maximum_bytes_billed = self.connection_config.get("maximum_bytes_billed")
|
|
193
|
+
if maximum_bytes_billed is not None:
|
|
194
|
+
job_config.maximum_bytes_billed = maximum_bytes_billed
|
|
304
195
|
|
|
305
196
|
# Configure timeouts
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
self.default_query_job_config = job_config
|
|
197
|
+
query_timeout_ms = self.connection_config.get("query_timeout_ms")
|
|
198
|
+
if query_timeout_ms is not None:
|
|
199
|
+
job_config.job_timeout_ms = query_timeout_ms
|
|
310
200
|
|
|
311
|
-
|
|
312
|
-
def connection_config_dict(self) -> dict[str, Any]:
|
|
313
|
-
"""Return the connection configuration as a dict for BigQuery Client constructor.
|
|
314
|
-
|
|
315
|
-
Filters out BigQuery-specific enhancement flags and formats parameters
|
|
316
|
-
appropriately for the google.cloud.bigquery.Client constructor.
|
|
317
|
-
|
|
318
|
-
Returns:
|
|
319
|
-
Configuration dict for BigQuery Client constructor.
|
|
320
|
-
"""
|
|
321
|
-
client_fields = {"project", "location", "credentials", "client_options", "client_info"}
|
|
322
|
-
config = {
|
|
323
|
-
field: getattr(self, field)
|
|
324
|
-
for field in client_fields
|
|
325
|
-
if getattr(self, field, None) is not None and getattr(self, field) is not Empty
|
|
326
|
-
}
|
|
327
|
-
config.update(self.extras)
|
|
328
|
-
|
|
329
|
-
return config
|
|
201
|
+
self.connection_config["default_query_job_config"] = job_config
|
|
330
202
|
|
|
331
203
|
def create_connection(self) -> BigQueryConnection:
|
|
332
204
|
"""Create and return a new BigQuery Client instance.
|
|
@@ -342,21 +214,39 @@ class BigQueryConfig(NoPoolSyncConfig[BigQueryConnection, BigQueryDriver]):
|
|
|
342
214
|
return self._connection_instance
|
|
343
215
|
|
|
344
216
|
try:
|
|
345
|
-
|
|
346
|
-
|
|
217
|
+
# Filter out extra fields and keep only official BigQuery client constructor fields
|
|
218
|
+
client_fields = {"project", "location", "credentials", "client_options", "client_info"}
|
|
219
|
+
config_dict: dict[str, Any] = {
|
|
220
|
+
field: value
|
|
221
|
+
for field, value in self.connection_config.items()
|
|
222
|
+
if field in client_fields and value is not None and value is not Empty
|
|
223
|
+
}
|
|
347
224
|
connection = self.connection_type(**config_dict)
|
|
348
|
-
|
|
349
|
-
|
|
225
|
+
|
|
226
|
+
# Store BigQuery-specific config in driver_features for driver access
|
|
227
|
+
default_query_job_config = self.connection_config.get("default_query_job_config")
|
|
228
|
+
if default_query_job_config is not None:
|
|
229
|
+
self.driver_features["default_query_job_config"] = default_query_job_config
|
|
230
|
+
|
|
231
|
+
default_load_job_config = self.connection_config.get("default_load_job_config")
|
|
232
|
+
if default_load_job_config is not None:
|
|
233
|
+
self.driver_features["default_load_job_config"] = default_load_job_config
|
|
234
|
+
|
|
235
|
+
# Call connection create callback from driver features
|
|
236
|
+
on_connection_create = self.driver_features.get("on_connection_create")
|
|
237
|
+
if on_connection_create:
|
|
238
|
+
on_connection_create(connection)
|
|
350
239
|
|
|
351
240
|
self._connection_instance = connection
|
|
352
241
|
|
|
353
242
|
except Exception as e:
|
|
354
|
-
|
|
243
|
+
project = self.connection_config.get("project", "Unknown")
|
|
244
|
+
msg = f"Could not configure BigQuery connection for project '{project}'. Error: {e}"
|
|
355
245
|
raise ImproperConfigurationError(msg) from e
|
|
356
246
|
return connection
|
|
357
247
|
|
|
358
248
|
@contextlib.contextmanager
|
|
359
|
-
def provide_connection(self, *
|
|
249
|
+
def provide_connection(self, *_args: Any, **_kwargs: Any) -> "Generator[BigQueryConnection, None, None]":
|
|
360
250
|
"""Provide a BigQuery client within a context manager.
|
|
361
251
|
|
|
362
252
|
Args:
|
|
@@ -369,38 +259,40 @@ class BigQueryConfig(NoPoolSyncConfig[BigQueryConnection, BigQueryDriver]):
|
|
|
369
259
|
connection = self.create_connection()
|
|
370
260
|
yield connection
|
|
371
261
|
|
|
372
|
-
|
|
262
|
+
@contextlib.contextmanager
|
|
263
|
+
def provide_session(
|
|
264
|
+
self, *_args: Any, statement_config: "Optional[StatementConfig]" = None, **_kwargs: Any
|
|
265
|
+
) -> "Generator[BigQueryDriver, None, None]":
|
|
373
266
|
"""Provide a BigQuery driver session context manager.
|
|
374
267
|
|
|
375
268
|
Args:
|
|
376
269
|
*args: Additional arguments.
|
|
270
|
+
statement_config: Optional statement configuration override.
|
|
377
271
|
**kwargs: Additional keyword arguments.
|
|
378
272
|
|
|
379
|
-
|
|
273
|
+
Yields:
|
|
380
274
|
A context manager that yields a BigQueryDriver instance.
|
|
381
275
|
"""
|
|
382
276
|
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
return session_manager()
|
|
277
|
+
with self.provide_connection(*_args, **_kwargs) as connection:
|
|
278
|
+
# Use shared config or user-provided config or instance default
|
|
279
|
+
final_statement_config = statement_config or self.statement_config
|
|
280
|
+
|
|
281
|
+
driver = self.driver_type(
|
|
282
|
+
connection=connection, statement_config=final_statement_config, driver_features=self.driver_features
|
|
283
|
+
)
|
|
284
|
+
yield driver
|
|
285
|
+
|
|
286
|
+
def get_signature_namespace(self) -> "dict[str, type[Any]]":
|
|
287
|
+
"""Get the signature namespace for BigQuery types.
|
|
288
|
+
|
|
289
|
+
This provides all BigQuery-specific types that Litestar needs to recognize
|
|
290
|
+
to avoid serialization attempts.
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
Dictionary mapping type names to types.
|
|
294
|
+
"""
|
|
295
|
+
|
|
296
|
+
namespace = super().get_signature_namespace()
|
|
297
|
+
namespace.update({"BigQueryConnection": BigQueryConnection, "BigQueryCursor": BigQueryCursor})
|
|
298
|
+
return namespace
|