sqlspec 0.27.0__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- sqlspec/_typing.py +93 -0
- sqlspec/adapters/adbc/adk/store.py +21 -11
- sqlspec/adapters/adbc/data_dictionary.py +27 -5
- sqlspec/adapters/adbc/driver.py +83 -14
- sqlspec/adapters/aiosqlite/adk/store.py +27 -18
- sqlspec/adapters/asyncmy/adk/store.py +26 -16
- sqlspec/adapters/asyncpg/adk/store.py +26 -16
- sqlspec/adapters/asyncpg/data_dictionary.py +24 -17
- sqlspec/adapters/bigquery/adk/store.py +30 -21
- sqlspec/adapters/bigquery/config.py +11 -0
- sqlspec/adapters/bigquery/driver.py +138 -1
- sqlspec/adapters/duckdb/adk/store.py +21 -11
- sqlspec/adapters/duckdb/driver.py +87 -1
- sqlspec/adapters/oracledb/adk/store.py +89 -206
- sqlspec/adapters/oracledb/driver.py +183 -2
- sqlspec/adapters/oracledb/litestar/store.py +22 -24
- sqlspec/adapters/psqlpy/adk/store.py +28 -27
- sqlspec/adapters/psqlpy/data_dictionary.py +24 -17
- sqlspec/adapters/psqlpy/driver.py +7 -10
- sqlspec/adapters/psycopg/adk/store.py +51 -33
- sqlspec/adapters/psycopg/data_dictionary.py +48 -34
- sqlspec/adapters/sqlite/adk/store.py +29 -19
- sqlspec/config.py +100 -2
- sqlspec/core/filters.py +18 -10
- sqlspec/core/result.py +133 -2
- sqlspec/driver/_async.py +89 -0
- sqlspec/driver/_common.py +64 -29
- sqlspec/driver/_sync.py +95 -0
- sqlspec/extensions/adk/migrations/0001_create_adk_tables.py +2 -2
- sqlspec/extensions/adk/service.py +3 -3
- sqlspec/extensions/adk/store.py +8 -8
- sqlspec/extensions/aiosql/adapter.py +3 -15
- sqlspec/extensions/fastapi/__init__.py +21 -0
- sqlspec/extensions/fastapi/extension.py +331 -0
- sqlspec/extensions/fastapi/providers.py +543 -0
- sqlspec/extensions/flask/__init__.py +36 -0
- sqlspec/extensions/flask/_state.py +71 -0
- sqlspec/extensions/flask/_utils.py +40 -0
- sqlspec/extensions/flask/extension.py +389 -0
- sqlspec/extensions/litestar/config.py +3 -6
- sqlspec/extensions/litestar/plugin.py +26 -2
- sqlspec/extensions/starlette/__init__.py +10 -0
- sqlspec/extensions/starlette/_state.py +25 -0
- sqlspec/extensions/starlette/_utils.py +52 -0
- sqlspec/extensions/starlette/extension.py +254 -0
- sqlspec/extensions/starlette/middleware.py +154 -0
- sqlspec/protocols.py +40 -0
- sqlspec/storage/_utils.py +1 -14
- sqlspec/storage/backends/fsspec.py +3 -5
- sqlspec/storage/backends/local.py +1 -1
- sqlspec/storage/backends/obstore.py +10 -18
- sqlspec/typing.py +16 -0
- sqlspec/utils/__init__.py +25 -4
- sqlspec/utils/arrow_helpers.py +81 -0
- sqlspec/utils/module_loader.py +203 -3
- sqlspec/utils/portal.py +311 -0
- sqlspec/utils/serializers.py +110 -1
- sqlspec/utils/sync_tools.py +15 -5
- sqlspec/utils/type_guards.py +25 -0
- {sqlspec-0.27.0.dist-info → sqlspec-0.28.0.dist-info}/METADATA +2 -2
- {sqlspec-0.27.0.dist-info → sqlspec-0.28.0.dist-info}/RECORD +64 -50
- {sqlspec-0.27.0.dist-info → sqlspec-0.28.0.dist-info}/WHEEL +0 -0
- {sqlspec-0.27.0.dist-info → sqlspec-0.28.0.dist-info}/entry_points.txt +0 -0
- {sqlspec-0.27.0.dist-info → sqlspec-0.28.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -117,7 +117,7 @@ class PostgresAsyncDataDictionary(AsyncDataDictionaryBase):
|
|
|
117
117
|
async def get_columns(
|
|
118
118
|
self, driver: AsyncDriverAdapterBase, table: str, schema: "str | None" = None
|
|
119
119
|
) -> "list[dict[str, Any]]":
|
|
120
|
-
"""Get column information for a table using
|
|
120
|
+
"""Get column information for a table using pg_catalog.
|
|
121
121
|
|
|
122
122
|
Args:
|
|
123
123
|
driver: AsyncPG driver instance
|
|
@@ -130,25 +130,32 @@ class PostgresAsyncDataDictionary(AsyncDataDictionaryBase):
|
|
|
130
130
|
- data_type: PostgreSQL data type
|
|
131
131
|
- is_nullable: Whether column allows NULL (YES/NO)
|
|
132
132
|
- column_default: Default value if any
|
|
133
|
+
|
|
134
|
+
Notes:
|
|
135
|
+
Uses pg_catalog instead of information_schema to avoid potential
|
|
136
|
+
issues with PostgreSQL 'name' type in some drivers.
|
|
133
137
|
"""
|
|
134
138
|
asyncpg_driver = cast("AsyncpgDriver", driver)
|
|
135
139
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
140
|
+
schema_name = schema or "public"
|
|
141
|
+
sql = """
|
|
142
|
+
SELECT
|
|
143
|
+
a.attname::text AS column_name,
|
|
144
|
+
pg_catalog.format_type(a.atttypid, a.atttypmod) AS data_type,
|
|
145
|
+
CASE WHEN a.attnotnull THEN 'NO' ELSE 'YES' END AS is_nullable,
|
|
146
|
+
pg_catalog.pg_get_expr(d.adbin, d.adrelid)::text AS column_default
|
|
147
|
+
FROM pg_catalog.pg_attribute a
|
|
148
|
+
JOIN pg_catalog.pg_class c ON a.attrelid = c.oid
|
|
149
|
+
JOIN pg_catalog.pg_namespace n ON c.relnamespace = n.oid
|
|
150
|
+
LEFT JOIN pg_catalog.pg_attrdef d ON a.attrelid = d.adrelid AND a.attnum = d.adnum
|
|
151
|
+
WHERE c.relname = $1
|
|
152
|
+
AND n.nspname = $2
|
|
153
|
+
AND a.attnum > 0
|
|
154
|
+
AND NOT a.attisdropped
|
|
155
|
+
ORDER BY a.attnum
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
result = await asyncpg_driver.execute(sql, (table, schema_name))
|
|
152
159
|
return result.data or []
|
|
153
160
|
|
|
154
161
|
def list_available_features(self) -> "list[str]":
|
|
@@ -8,7 +8,7 @@ from google.cloud.bigquery import QueryJobConfig, ScalarQueryParameter
|
|
|
8
8
|
from sqlspec.extensions.adk import BaseAsyncADKStore, EventRecord, SessionRecord
|
|
9
9
|
from sqlspec.utils.logging import get_logger
|
|
10
10
|
from sqlspec.utils.serializers import from_json, to_json
|
|
11
|
-
from sqlspec.utils.sync_tools import async_
|
|
11
|
+
from sqlspec.utils.sync_tools import async_, run_
|
|
12
12
|
|
|
13
13
|
if TYPE_CHECKING:
|
|
14
14
|
from sqlspec.adapters.bigquery.config import BigQueryConfig
|
|
@@ -102,7 +102,7 @@ class BigQueryADKStore(BaseAsyncADKStore["BigQueryConfig"]):
|
|
|
102
102
|
return f"`{self._dataset_id}.{table_name}`"
|
|
103
103
|
return f"`{table_name}`"
|
|
104
104
|
|
|
105
|
-
def _get_create_sessions_table_sql(self) -> str:
|
|
105
|
+
async def _get_create_sessions_table_sql(self) -> str:
|
|
106
106
|
"""Get BigQuery CREATE TABLE SQL for sessions.
|
|
107
107
|
|
|
108
108
|
Returns:
|
|
@@ -136,7 +136,7 @@ class BigQueryADKStore(BaseAsyncADKStore["BigQueryConfig"]):
|
|
|
136
136
|
CLUSTER BY app_name, user_id
|
|
137
137
|
"""
|
|
138
138
|
|
|
139
|
-
def _get_create_events_table_sql(self) -> str:
|
|
139
|
+
async def _get_create_events_table_sql(self) -> str:
|
|
140
140
|
"""Get BigQuery CREATE TABLE SQL for events.
|
|
141
141
|
|
|
142
142
|
Returns:
|
|
@@ -193,9 +193,9 @@ class BigQueryADKStore(BaseAsyncADKStore["BigQueryConfig"]):
|
|
|
193
193
|
|
|
194
194
|
def _create_tables(self) -> None:
|
|
195
195
|
"""Synchronous implementation of create_tables."""
|
|
196
|
-
with self._config.
|
|
197
|
-
|
|
198
|
-
|
|
196
|
+
with self._config.provide_session() as driver:
|
|
197
|
+
driver.execute_script(run_(self._get_create_sessions_table_sql)())
|
|
198
|
+
driver.execute_script(run_(self._get_create_events_table_sql)())
|
|
199
199
|
logger.debug("Created BigQuery ADK tables: %s, %s", self._session_table, self._events_table)
|
|
200
200
|
|
|
201
201
|
async def create_tables(self) -> None:
|
|
@@ -351,20 +351,29 @@ class BigQueryADKStore(BaseAsyncADKStore["BigQueryConfig"]):
|
|
|
351
351
|
"""
|
|
352
352
|
await async_(self._update_session_state)(session_id, state)
|
|
353
353
|
|
|
354
|
-
def _list_sessions(self, app_name: str, user_id: str) -> "list[SessionRecord]":
|
|
354
|
+
def _list_sessions(self, app_name: str, user_id: "str | None") -> "list[SessionRecord]":
|
|
355
355
|
"""Synchronous implementation of list_sessions."""
|
|
356
356
|
table_name = self._get_full_table_name(self._session_table)
|
|
357
|
-
sql = f"""
|
|
358
|
-
SELECT id, app_name, user_id, JSON_VALUE(state) as state, create_time, update_time
|
|
359
|
-
FROM {table_name}
|
|
360
|
-
WHERE app_name = @app_name AND user_id = @user_id
|
|
361
|
-
ORDER BY update_time DESC
|
|
362
|
-
"""
|
|
363
357
|
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
358
|
+
if user_id is None:
|
|
359
|
+
sql = f"""
|
|
360
|
+
SELECT id, app_name, user_id, JSON_VALUE(state) as state, create_time, update_time
|
|
361
|
+
FROM {table_name}
|
|
362
|
+
WHERE app_name = @app_name
|
|
363
|
+
ORDER BY update_time DESC
|
|
364
|
+
"""
|
|
365
|
+
params = [ScalarQueryParameter("app_name", "STRING", app_name)]
|
|
366
|
+
else:
|
|
367
|
+
sql = f"""
|
|
368
|
+
SELECT id, app_name, user_id, JSON_VALUE(state) as state, create_time, update_time
|
|
369
|
+
FROM {table_name}
|
|
370
|
+
WHERE app_name = @app_name AND user_id = @user_id
|
|
371
|
+
ORDER BY update_time DESC
|
|
372
|
+
"""
|
|
373
|
+
params = [
|
|
374
|
+
ScalarQueryParameter("app_name", "STRING", app_name),
|
|
375
|
+
ScalarQueryParameter("user_id", "STRING", user_id),
|
|
376
|
+
]
|
|
368
377
|
|
|
369
378
|
with self._config.provide_connection() as conn:
|
|
370
379
|
job_config = QueryJobConfig(query_parameters=params)
|
|
@@ -383,18 +392,18 @@ class BigQueryADKStore(BaseAsyncADKStore["BigQueryConfig"]):
|
|
|
383
392
|
for row in results
|
|
384
393
|
]
|
|
385
394
|
|
|
386
|
-
async def list_sessions(self, app_name: str, user_id: str) -> "list[SessionRecord]":
|
|
387
|
-
"""List
|
|
395
|
+
async def list_sessions(self, app_name: str, user_id: str | None = None) -> "list[SessionRecord]":
|
|
396
|
+
"""List sessions for an app, optionally filtered by user.
|
|
388
397
|
|
|
389
398
|
Args:
|
|
390
399
|
app_name: Application name.
|
|
391
|
-
user_id: User identifier.
|
|
400
|
+
user_id: User identifier. If None, lists all sessions for the app.
|
|
392
401
|
|
|
393
402
|
Returns:
|
|
394
403
|
List of session records ordered by update_time DESC.
|
|
395
404
|
|
|
396
405
|
Notes:
|
|
397
|
-
Uses clustering on (app_name, user_id) for efficiency.
|
|
406
|
+
Uses clustering on (app_name, user_id) when user_id is provided for efficiency.
|
|
398
407
|
"""
|
|
399
408
|
return await async_(self._list_sessions)(app_name, user_id)
|
|
400
409
|
|
|
@@ -67,6 +67,17 @@ class BigQueryDriverFeatures(TypedDict):
|
|
|
67
67
|
"""BigQuery driver-specific features configuration.
|
|
68
68
|
|
|
69
69
|
Only non-standard BigQuery client parameters that are SQLSpec-specific extensions.
|
|
70
|
+
|
|
71
|
+
Attributes:
|
|
72
|
+
connection_instance: Pre-existing BigQuery connection instance to use.
|
|
73
|
+
on_job_start: Callback invoked when a query job starts.
|
|
74
|
+
on_job_complete: Callback invoked when a query job completes.
|
|
75
|
+
on_connection_create: Callback invoked when connection is created.
|
|
76
|
+
json_serializer: Custom JSON serializer for dict/list parameter conversion.
|
|
77
|
+
Defaults to sqlspec.utils.serializers.to_json if not provided.
|
|
78
|
+
enable_uuid_conversion: Enable automatic UUID string conversion.
|
|
79
|
+
When True (default), UUID strings are automatically converted to UUID objects.
|
|
80
|
+
When False, UUID strings are treated as regular strings.
|
|
70
81
|
"""
|
|
71
82
|
|
|
72
83
|
connection_instance: NotRequired["BigQueryConnection"]
|
|
@@ -33,8 +33,11 @@ if TYPE_CHECKING:
|
|
|
33
33
|
from collections.abc import Callable
|
|
34
34
|
from contextlib import AbstractContextManager
|
|
35
35
|
|
|
36
|
-
from sqlspec.
|
|
36
|
+
from sqlspec.builder import QueryBuilder
|
|
37
|
+
from sqlspec.core import SQL, SQLResult, Statement, StatementFilter
|
|
38
|
+
from sqlspec.core.result import ArrowResult
|
|
37
39
|
from sqlspec.driver import SyncDataDictionaryBase
|
|
40
|
+
from sqlspec.typing import StatementParameters
|
|
38
41
|
|
|
39
42
|
logger = logging.getLogger(__name__)
|
|
40
43
|
|
|
@@ -758,3 +761,137 @@ class BigQueryDriver(SyncDriverAdapterBase):
|
|
|
758
761
|
|
|
759
762
|
self._data_dictionary = BigQuerySyncDataDictionary()
|
|
760
763
|
return self._data_dictionary
|
|
764
|
+
|
|
765
|
+
def _storage_api_available(self) -> bool:
|
|
766
|
+
"""Check if BigQuery Storage API is available.
|
|
767
|
+
|
|
768
|
+
Returns:
|
|
769
|
+
True if Storage API is available and working, False otherwise
|
|
770
|
+
"""
|
|
771
|
+
try:
|
|
772
|
+
from google.cloud import bigquery_storage_v1 # type: ignore[attr-defined]
|
|
773
|
+
|
|
774
|
+
# Try to create client (will fail if API not enabled or credentials missing)
|
|
775
|
+
_ = bigquery_storage_v1.BigQueryReadClient()
|
|
776
|
+
except ImportError:
|
|
777
|
+
# Package not installed
|
|
778
|
+
return False
|
|
779
|
+
except Exception:
|
|
780
|
+
# API not enabled or permissions issue
|
|
781
|
+
return False
|
|
782
|
+
else:
|
|
783
|
+
return True
|
|
784
|
+
|
|
785
|
+
def select_to_arrow(
|
|
786
|
+
self,
|
|
787
|
+
statement: "Statement | QueryBuilder",
|
|
788
|
+
/,
|
|
789
|
+
*parameters: "StatementParameters | StatementFilter",
|
|
790
|
+
statement_config: "StatementConfig | None" = None,
|
|
791
|
+
return_format: str = "table",
|
|
792
|
+
native_only: bool = False,
|
|
793
|
+
batch_size: int | None = None,
|
|
794
|
+
arrow_schema: Any = None,
|
|
795
|
+
**kwargs: Any,
|
|
796
|
+
) -> "ArrowResult":
|
|
797
|
+
"""Execute query and return results as Apache Arrow (BigQuery native with Storage API).
|
|
798
|
+
|
|
799
|
+
BigQuery provides native Arrow via Storage API (query_job.to_arrow()).
|
|
800
|
+
Requires google-cloud-bigquery-storage package and API enabled.
|
|
801
|
+
Falls back to dict conversion if Storage API not available.
|
|
802
|
+
|
|
803
|
+
Args:
|
|
804
|
+
statement: SQL statement, string, or QueryBuilder
|
|
805
|
+
*parameters: Query parameters or filters
|
|
806
|
+
statement_config: Optional statement configuration override
|
|
807
|
+
return_format: "table" for pyarrow.Table (default), "batch" for RecordBatch
|
|
808
|
+
native_only: If True, raise error if Storage API unavailable (default: False)
|
|
809
|
+
batch_size: Batch size hint (for future streaming implementation)
|
|
810
|
+
arrow_schema: Optional pyarrow.Schema for type casting
|
|
811
|
+
**kwargs: Additional keyword arguments
|
|
812
|
+
|
|
813
|
+
Returns:
|
|
814
|
+
ArrowResult with native Arrow data (if Storage API available) or converted data
|
|
815
|
+
|
|
816
|
+
Raises:
|
|
817
|
+
MissingDependencyError: If pyarrow not installed, or if Storage API not available and native_only=True
|
|
818
|
+
SQLExecutionError: If query execution fails
|
|
819
|
+
|
|
820
|
+
Example:
|
|
821
|
+
>>> # Will use native Arrow if Storage API available, otherwise converts
|
|
822
|
+
>>> result = driver.select_to_arrow(
|
|
823
|
+
... "SELECT * FROM dataset.users WHERE age > @age",
|
|
824
|
+
... {"age": 18},
|
|
825
|
+
... )
|
|
826
|
+
>>> df = result.to_pandas()
|
|
827
|
+
|
|
828
|
+
>>> # Force native Arrow (raises if Storage API unavailable)
|
|
829
|
+
>>> result = driver.select_to_arrow(
|
|
830
|
+
... "SELECT * FROM dataset.users", native_only=True
|
|
831
|
+
... )
|
|
832
|
+
"""
|
|
833
|
+
from sqlspec.utils.module_loader import ensure_pyarrow
|
|
834
|
+
|
|
835
|
+
ensure_pyarrow()
|
|
836
|
+
|
|
837
|
+
# Check Storage API availability
|
|
838
|
+
if not self._storage_api_available():
|
|
839
|
+
if native_only:
|
|
840
|
+
from sqlspec.exceptions import MissingDependencyError
|
|
841
|
+
|
|
842
|
+
msg = (
|
|
843
|
+
"BigQuery native Arrow requires Storage API.\n"
|
|
844
|
+
"1. Install: pip install google-cloud-bigquery-storage\n"
|
|
845
|
+
"2. Enable API: https://console.cloud.google.com/apis/library/bigquerystorage.googleapis.com\n"
|
|
846
|
+
"3. Grant permissions: roles/bigquery.dataViewer"
|
|
847
|
+
)
|
|
848
|
+
raise MissingDependencyError(
|
|
849
|
+
package="google-cloud-bigquery-storage", install_package="google-cloud-bigquery-storage"
|
|
850
|
+
) from RuntimeError(msg)
|
|
851
|
+
|
|
852
|
+
# Fallback to conversion path
|
|
853
|
+
result: ArrowResult = super().select_to_arrow(
|
|
854
|
+
statement,
|
|
855
|
+
*parameters,
|
|
856
|
+
statement_config=statement_config,
|
|
857
|
+
return_format=return_format,
|
|
858
|
+
native_only=native_only,
|
|
859
|
+
batch_size=batch_size,
|
|
860
|
+
arrow_schema=arrow_schema,
|
|
861
|
+
**kwargs,
|
|
862
|
+
)
|
|
863
|
+
return result
|
|
864
|
+
|
|
865
|
+
# Use native path with Storage API
|
|
866
|
+
import pyarrow as pa
|
|
867
|
+
|
|
868
|
+
from sqlspec.core.result import create_arrow_result
|
|
869
|
+
|
|
870
|
+
# Prepare statement
|
|
871
|
+
config = statement_config or self.statement_config
|
|
872
|
+
prepared_statement = self.prepare_statement(statement, parameters, statement_config=config, kwargs=kwargs)
|
|
873
|
+
|
|
874
|
+
# Get compiled SQL and parameters
|
|
875
|
+
sql, driver_params = self._get_compiled_sql(prepared_statement, config)
|
|
876
|
+
|
|
877
|
+
# Execute query using existing _run_query_job method
|
|
878
|
+
with self.handle_database_exceptions():
|
|
879
|
+
query_job = self._run_query_job(sql, driver_params)
|
|
880
|
+
query_job.result() # Wait for completion
|
|
881
|
+
|
|
882
|
+
# Native Arrow via Storage API
|
|
883
|
+
arrow_table = query_job.to_arrow()
|
|
884
|
+
|
|
885
|
+
# Apply schema casting if requested
|
|
886
|
+
if arrow_schema is not None:
|
|
887
|
+
arrow_table = arrow_table.cast(arrow_schema)
|
|
888
|
+
|
|
889
|
+
# Convert to batch if requested
|
|
890
|
+
if return_format == "batch":
|
|
891
|
+
batches = arrow_table.to_batches()
|
|
892
|
+
arrow_data: Any = batches[0] if batches else pa.RecordBatch.from_pydict({})
|
|
893
|
+
else:
|
|
894
|
+
arrow_data = arrow_table
|
|
895
|
+
|
|
896
|
+
# Create ArrowResult
|
|
897
|
+
return create_arrow_result(statement=prepared_statement, data=arrow_data, rows_affected=arrow_data.num_rows)
|
|
@@ -315,29 +315,39 @@ class DuckdbADKStore(BaseSyncADKStore["DuckDBConfig"]):
|
|
|
315
315
|
conn.execute(delete_session_sql, (session_id,))
|
|
316
316
|
conn.commit()
|
|
317
317
|
|
|
318
|
-
def list_sessions(self, app_name: str, user_id: str) -> "list[SessionRecord]":
|
|
319
|
-
"""List
|
|
318
|
+
def list_sessions(self, app_name: str, user_id: str | None = None) -> "list[SessionRecord]":
|
|
319
|
+
"""List sessions for an app, optionally filtered by user.
|
|
320
320
|
|
|
321
321
|
Args:
|
|
322
322
|
app_name: Application name.
|
|
323
|
-
user_id: User identifier.
|
|
323
|
+
user_id: User identifier. If None, lists all sessions for the app.
|
|
324
324
|
|
|
325
325
|
Returns:
|
|
326
326
|
List of session records ordered by update_time DESC.
|
|
327
327
|
|
|
328
328
|
Notes:
|
|
329
|
-
Uses composite index on (app_name, user_id).
|
|
330
|
-
"""
|
|
331
|
-
sql = f"""
|
|
332
|
-
SELECT id, app_name, user_id, state, create_time, update_time
|
|
333
|
-
FROM {self._session_table}
|
|
334
|
-
WHERE app_name = ? AND user_id = ?
|
|
335
|
-
ORDER BY update_time DESC
|
|
329
|
+
Uses composite index on (app_name, user_id) when user_id is provided.
|
|
336
330
|
"""
|
|
331
|
+
if user_id is None:
|
|
332
|
+
sql = f"""
|
|
333
|
+
SELECT id, app_name, user_id, state, create_time, update_time
|
|
334
|
+
FROM {self._session_table}
|
|
335
|
+
WHERE app_name = ?
|
|
336
|
+
ORDER BY update_time DESC
|
|
337
|
+
"""
|
|
338
|
+
params: tuple[str, ...] = (app_name,)
|
|
339
|
+
else:
|
|
340
|
+
sql = f"""
|
|
341
|
+
SELECT id, app_name, user_id, state, create_time, update_time
|
|
342
|
+
FROM {self._session_table}
|
|
343
|
+
WHERE app_name = ? AND user_id = ?
|
|
344
|
+
ORDER BY update_time DESC
|
|
345
|
+
"""
|
|
346
|
+
params = (app_name, user_id)
|
|
337
347
|
|
|
338
348
|
try:
|
|
339
349
|
with self._config.provide_connection() as conn:
|
|
340
|
-
cursor = conn.execute(sql,
|
|
350
|
+
cursor = conn.execute(sql, params)
|
|
341
351
|
rows = cursor.fetchall()
|
|
342
352
|
|
|
343
353
|
return [
|
|
@@ -15,6 +15,7 @@ from sqlspec.core.statement import SQL, StatementConfig
|
|
|
15
15
|
from sqlspec.driver import SyncDriverAdapterBase
|
|
16
16
|
from sqlspec.exceptions import (
|
|
17
17
|
CheckViolationError,
|
|
18
|
+
DatabaseConnectionError,
|
|
18
19
|
DataError,
|
|
19
20
|
ForeignKeyViolationError,
|
|
20
21
|
IntegrityError,
|
|
@@ -32,9 +33,12 @@ if TYPE_CHECKING:
|
|
|
32
33
|
from contextlib import AbstractContextManager
|
|
33
34
|
|
|
34
35
|
from sqlspec.adapters.duckdb._types import DuckDBConnection
|
|
35
|
-
from sqlspec.
|
|
36
|
+
from sqlspec.builder import QueryBuilder
|
|
37
|
+
from sqlspec.core import Statement, StatementFilter
|
|
38
|
+
from sqlspec.core.result import ArrowResult, SQLResult
|
|
36
39
|
from sqlspec.driver import ExecutionResult
|
|
37
40
|
from sqlspec.driver._sync import SyncDataDictionaryBase
|
|
41
|
+
from sqlspec.typing import StatementParameters
|
|
38
42
|
|
|
39
43
|
__all__ = ("DuckDBCursor", "DuckDBDriver", "DuckDBExceptionHandler", "duckdb_statement_config")
|
|
40
44
|
|
|
@@ -447,3 +451,85 @@ class DuckDBDriver(SyncDriverAdapterBase):
|
|
|
447
451
|
if self._data_dictionary is None:
|
|
448
452
|
self._data_dictionary = DuckDBSyncDataDictionary()
|
|
449
453
|
return self._data_dictionary
|
|
454
|
+
|
|
455
|
+
def select_to_arrow(
|
|
456
|
+
self,
|
|
457
|
+
statement: "Statement | QueryBuilder",
|
|
458
|
+
/,
|
|
459
|
+
*parameters: "StatementParameters | StatementFilter",
|
|
460
|
+
statement_config: "StatementConfig | None" = None,
|
|
461
|
+
return_format: str = "table",
|
|
462
|
+
native_only: bool = False,
|
|
463
|
+
batch_size: int | None = None,
|
|
464
|
+
arrow_schema: Any = None,
|
|
465
|
+
**kwargs: Any,
|
|
466
|
+
) -> "ArrowResult":
|
|
467
|
+
"""Execute query and return results as Apache Arrow (DuckDB native path).
|
|
468
|
+
|
|
469
|
+
DuckDB provides native Arrow support via cursor.arrow().
|
|
470
|
+
This is the fastest path due to DuckDB's columnar architecture.
|
|
471
|
+
|
|
472
|
+
Args:
|
|
473
|
+
statement: SQL statement, string, or QueryBuilder
|
|
474
|
+
*parameters: Query parameters or filters
|
|
475
|
+
statement_config: Optional statement configuration override
|
|
476
|
+
return_format: "table" for pyarrow.Table (default), "batch" for RecordBatch
|
|
477
|
+
native_only: Ignored for DuckDB (always uses native path)
|
|
478
|
+
batch_size: Batch size hint (for future streaming implementation)
|
|
479
|
+
arrow_schema: Optional pyarrow.Schema for type casting
|
|
480
|
+
**kwargs: Additional keyword arguments
|
|
481
|
+
|
|
482
|
+
Returns:
|
|
483
|
+
ArrowResult with native Arrow data
|
|
484
|
+
|
|
485
|
+
Raises:
|
|
486
|
+
MissingDependencyError: If pyarrow not installed
|
|
487
|
+
SQLExecutionError: If query execution fails
|
|
488
|
+
|
|
489
|
+
Example:
|
|
490
|
+
>>> result = driver.select_to_arrow(
|
|
491
|
+
... "SELECT * FROM users WHERE age > ?", 18
|
|
492
|
+
... )
|
|
493
|
+
>>> df = result.to_pandas() # Fast zero-copy conversion
|
|
494
|
+
"""
|
|
495
|
+
from sqlspec.utils.module_loader import ensure_pyarrow
|
|
496
|
+
|
|
497
|
+
ensure_pyarrow()
|
|
498
|
+
|
|
499
|
+
import pyarrow as pa
|
|
500
|
+
|
|
501
|
+
from sqlspec.core.result import create_arrow_result
|
|
502
|
+
|
|
503
|
+
# Prepare statement
|
|
504
|
+
config = statement_config or self.statement_config
|
|
505
|
+
prepared_statement = self.prepare_statement(statement, parameters, statement_config=config, kwargs=kwargs)
|
|
506
|
+
|
|
507
|
+
# Execute query and get native Arrow
|
|
508
|
+
with self.with_cursor(self.connection) as cursor, self.handle_database_exceptions():
|
|
509
|
+
if cursor is None:
|
|
510
|
+
msg = "Failed to create cursor"
|
|
511
|
+
raise DatabaseConnectionError(msg)
|
|
512
|
+
|
|
513
|
+
# Get compiled SQL and parameters
|
|
514
|
+
sql, driver_params = self._get_compiled_sql(prepared_statement, config)
|
|
515
|
+
|
|
516
|
+
# Execute query
|
|
517
|
+
cursor.execute(sql, driver_params or ())
|
|
518
|
+
|
|
519
|
+
# DuckDB native Arrow (zero-copy!)
|
|
520
|
+
arrow_reader = cursor.arrow()
|
|
521
|
+
arrow_table = arrow_reader.read_all()
|
|
522
|
+
|
|
523
|
+
# Apply schema casting if requested
|
|
524
|
+
if arrow_schema is not None:
|
|
525
|
+
arrow_table = arrow_table.cast(arrow_schema)
|
|
526
|
+
|
|
527
|
+
# Convert to batch if requested
|
|
528
|
+
if return_format == "batch":
|
|
529
|
+
batches = arrow_table.to_batches()
|
|
530
|
+
arrow_data: Any = batches[0] if batches else pa.RecordBatch.from_pydict({})
|
|
531
|
+
else:
|
|
532
|
+
arrow_data = arrow_table
|
|
533
|
+
|
|
534
|
+
# Create ArrowResult
|
|
535
|
+
return create_arrow_result(statement=prepared_statement, data=arrow_data, rows_affected=arrow_data.num_rows)
|