PyPI - sqlspec - Versions diffs - 0.12.1__py3-none-any.whl → 0.12.2__py3-none-any.whl - Mend

sqlspec 0.12.1py3-none-any.whl → 0.12.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sqlspec might be problematic. Click here for more details.

Files changed (21) hide show

sqlspec/adapters/aiosqlite/driver.py +16 -11
sqlspec/adapters/bigquery/driver.py +113 -21
sqlspec/adapters/duckdb/driver.py +18 -13
sqlspec/adapters/psycopg/config.py +20 -3
sqlspec/adapters/psycopg/driver.py +82 -1
sqlspec/adapters/sqlite/driver.py +50 -10
sqlspec/driver/mixins/_storage.py +83 -36
sqlspec/loader.py +8 -30
sqlspec/statement/builder/base.py +3 -1
sqlspec/statement/builder/ddl.py +14 -1
sqlspec/statement/pipelines/analyzers/_analyzer.py +1 -5
sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +56 -2
sqlspec/statement/sql.py +40 -6
sqlspec/storage/backends/fsspec.py +29 -27
sqlspec/storage/backends/obstore.py +55 -34
sqlspec/storage/protocol.py +28 -25
{sqlspec-0.12.1.dist-info → sqlspec-0.12.2.dist-info}/METADATA +1 -1
{sqlspec-0.12.1.dist-info → sqlspec-0.12.2.dist-info}/RECORD +21 -21
{sqlspec-0.12.1.dist-info → sqlspec-0.12.2.dist-info}/WHEEL +0 -0
{sqlspec-0.12.1.dist-info → sqlspec-0.12.2.dist-info}/licenses/LICENSE +0 -0
{sqlspec-0.12.1.dist-info → sqlspec-0.12.2.dist-info}/licenses/NOTICE +0 -0

sqlspec/adapters/aiosqlite/driver.py CHANGED Viewed

@@ -203,8 +203,7 @@ class AiosqliteDriver(
         return result
     async def _bulk_load_file(self, file_path: Path, table_name: str, format: str, mode: str, **options: Any) -> int:
-        """Database-specific bulk load implementation."""
-        # TODO: convert this to use the storage backend.  it has async support
+        """Database-specific bulk load implementation using storage backend."""
         if format != "csv":
             msg = f"aiosqlite driver only supports CSV for bulk loading, not {format}."
             raise NotImplementedError(msg)
@@ -215,15 +214,21 @@ class AiosqliteDriver(
                 if mode == "replace":
                     await cursor.execute(f"DELETE FROM {table_name}")
-                # Using sync file IO here as it's a fallback path and aiofiles is not a dependency
-                with Path(file_path).open(encoding="utf-8") as f:  # noqa: ASYNC230
-                    reader = csv.reader(f, **options)
-                    header = next(reader)  # Skip header
-                    placeholders = ", ".join("?" for _ in header)
-                    sql = f"INSERT INTO {table_name} VALUES ({placeholders})"
-                    data_iter = list(reader)
-                    await cursor.executemany(sql, data_iter)
-                    rowcount = cursor.rowcount
+                # Use async storage backend to read the file
+                file_path_str = str(file_path)
+                backend = self._get_storage_backend(file_path_str)
+                content = await backend.read_text_async(file_path_str, encoding="utf-8")
+                # Parse CSV content
+                import io
+                csv_file = io.StringIO(content)
+                reader = csv.reader(csv_file, **options)
+                header = next(reader)  # Skip header
+                placeholders = ", ".join("?" for _ in header)
+                sql = f"INSERT INTO {table_name} VALUES ({placeholders})"
+                data_iter = list(reader)
+                await cursor.executemany(sql, data_iter)
+                rowcount = cursor.rowcount
                 await conn.commit()
                 return rowcount
         finally:

sqlspec/adapters/bigquery/driver.py CHANGED Viewed

@@ -1,6 +1,8 @@
+import contextlib
 import datetime
 import io
 import logging
+import uuid
 from collections.abc import Iterator
 from decimal import Decimal
 from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional, Union, cast
@@ -8,10 +10,12 @@ from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional, Union, cast
 from google.cloud.bigquery import (
     ArrayQueryParameter,
     Client,
+    ExtractJobConfig,
     LoadJobConfig,
     QueryJob,
     QueryJobConfig,
     ScalarQueryParameter,
+    SourceFormat,
     WriteDisposition,
 )
 from google.cloud.bigquery.table import Row as BigQueryRow
@@ -32,6 +36,8 @@ from sqlspec.typing import DictRow, ModelDTOT, RowT
 from sqlspec.utils.serializers import to_json
 if TYPE_CHECKING:
+    from pathlib import Path
     from sqlglot.dialects.dialect import DialectType
@@ -258,23 +264,17 @@ class BigQueryDriver(
                     param_value,
                     type(param_value),
                 )
-        # Let BigQuery generate the job ID to avoid collisions
-        # This is the recommended approach for production code and works better with emulators
-        logger.warning("About to send to BigQuery - SQL: %r", sql_str)
-        logger.warning("Query parameters in job config: %r", final_job_config.query_parameters)
         query_job = conn.query(sql_str, job_config=final_job_config)
         # Get the auto-generated job ID for callbacks
         if self.on_job_start and query_job.job_id:
-            try:
+            with contextlib.suppress(Exception):
+                # Callback errors should not interfere with job execution
                 self.on_job_start(query_job.job_id)
-            except Exception as e:
-                logger.warning("Job start callback failed: %s", str(e), extra={"adapter": "bigquery"})
         if self.on_job_complete and query_job.job_id:
-            try:
+            with contextlib.suppress(Exception):
+                # Callback errors should not interfere with job execution
                 self.on_job_complete(query_job.job_id, query_job)
-            except Exception as e:
-                logger.warning("Job complete callback failed: %s", str(e), extra={"adapter": "bigquery"})
         return query_job
@@ -529,28 +529,120 @@ class BigQueryDriver(
     # BigQuery Native Export Support
     # ============================================================================
-    def _export_native(self, query: str, destination_uri: str, format: str, **options: Any) -> int:
-        """BigQuery native export implementation.
+    def _export_native(self, query: str, destination_uri: "Union[str, Path]", format: str, **options: Any) -> int:
+        """BigQuery native export implementation with automatic GCS staging.
-        For local files, BigQuery doesn't support direct export, so we raise NotImplementedError
-        to trigger the fallback mechanism that uses fetch + write.
+        For GCS URIs, uses direct export. For other locations, automatically stages
+        through a temporary GCS location and transfers to the final destination.
         Args:
             query: SQL query to execute
-            destination_uri: Destination URI (local file path or gs:// URI)
+            destination_uri: Destination URI (local file path, gs:// URI, or Path object)
             format: Export format (parquet, csv, json, avro)
-            **options: Additional export options
+            **options: Additional export options including 'gcs_staging_bucket'
         Returns:
             Number of rows exported
         Raises:
-            NotImplementedError: Always, to trigger fallback to fetch + write
+            NotImplementedError: If no staging bucket is configured for non-GCS destinations
         """
-        # BigQuery only supports native export to GCS, not local files
-        # By raising NotImplementedError, the mixin will fall back to fetch + write
-        msg = "BigQuery native export only supports GCS URIs, using fallback for local files"
-        raise NotImplementedError(msg)
+        destination_str = str(destination_uri)
+        # If it's already a GCS URI, use direct export
+        if destination_str.startswith("gs://"):
+            return self._export_to_gcs_native(query, destination_str, format, **options)
+        # For non-GCS destinations, check if staging is configured
+        staging_bucket = options.get("gcs_staging_bucket") or getattr(self.config, "gcs_staging_bucket", None)
+        if not staging_bucket:
+            # Fall back to fetch + write for non-GCS destinations without staging
+            msg = "BigQuery native export requires GCS staging bucket for non-GCS destinations"
+            raise NotImplementedError(msg)
+        # Generate temporary GCS path
+        from datetime import timezone
+        timestamp = datetime.datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
+        temp_filename = f"bigquery_export_{timestamp}_{uuid.uuid4().hex[:8]}.{format}"
+        temp_gcs_uri = f"gs://{staging_bucket}/temp_exports/{temp_filename}"
+        try:
+            # Export to temporary GCS location
+            rows_exported = self._export_to_gcs_native(query, temp_gcs_uri, format, **options)
+            # Transfer from GCS to final destination using storage backend
+            backend, path = self._resolve_backend_and_path(destination_str)
+            gcs_backend = self._get_storage_backend(temp_gcs_uri)
+            # Download from GCS and upload to final destination
+            data = gcs_backend.read_bytes(temp_gcs_uri)
+            backend.write_bytes(path, data)
+            return rows_exported
+        finally:
+            # Clean up temporary file
+            try:
+                gcs_backend = self._get_storage_backend(temp_gcs_uri)
+                gcs_backend.delete(temp_gcs_uri)
+            except Exception as e:
+                logger.warning("Failed to clean up temporary GCS file %s: %s", temp_gcs_uri, e)
+    def _export_to_gcs_native(self, query: str, gcs_uri: str, format: str, **options: Any) -> int:
+        """Direct BigQuery export to GCS.
+        Args:
+            query: SQL query to execute
+            gcs_uri: GCS destination URI (must start with gs://)
+            format: Export format (parquet, csv, json, avro)
+            **options: Additional export options
+        Returns:
+            Number of rows exported
+        """
+        # First, run the query and store results in a temporary table
+        temp_table_id = f"temp_export_{uuid.uuid4().hex[:8]}"
+        dataset_id = getattr(self.connection, "default_dataset", None) or options.get("dataset", "temp")
+        # Create a temporary table with query results
+        query_with_table = f"CREATE OR REPLACE TABLE `{dataset_id}.{temp_table_id}` AS {query}"
+        create_job = self._run_query_job(query_with_table, [])
+        create_job.result()
+        # Get row count
+        count_query = f"SELECT COUNT(*) as cnt FROM `{dataset_id}.{temp_table_id}`"
+        count_job = self._run_query_job(count_query, [])
+        count_result = list(count_job.result())
+        row_count = count_result[0]["cnt"] if count_result else 0
+        try:
+            # Configure extract job
+            extract_config = ExtractJobConfig(**options)  # type: ignore[no-untyped-call]
+            # Set format
+            format_mapping = {
+                "parquet": SourceFormat.PARQUET,
+                "csv": SourceFormat.CSV,
+                "json": SourceFormat.NEWLINE_DELIMITED_JSON,
+                "avro": SourceFormat.AVRO,
+            }
+            extract_config.destination_format = format_mapping.get(format, SourceFormat.PARQUET)
+            # Extract table to GCS
+            table_ref = self.connection.dataset(dataset_id).table(temp_table_id)
+            extract_job = self.connection.extract_table(table_ref, gcs_uri, job_config=extract_config)
+            extract_job.result()
+            return row_count
+        finally:
+            # Clean up temporary table
+            try:
+                delete_query = f"DROP TABLE IF EXISTS `{dataset_id}.{temp_table_id}`"
+                delete_job = self._run_query_job(delete_query, [])
+                delete_job.result()
+            except Exception as e:
+                logger.warning("Failed to clean up temporary table %s: %s", temp_table_id, e)
     # ============================================================================
     # BigQuery Native Arrow Support

sqlspec/adapters/duckdb/driver.py CHANGED Viewed

@@ -2,6 +2,7 @@ import contextlib
 import uuid
 from collections.abc import Generator
 from contextlib import contextmanager
+from pathlib import Path
 from typing import TYPE_CHECKING, Any, ClassVar, Optional, Union, cast
 from duckdb import DuckDBPyConnection
@@ -251,7 +252,7 @@ class DuckDBDriver(
                 return True
         return False
-    def _export_native(self, query: str, destination_uri: str, format: str, **options: Any) -> int:
+    def _export_native(self, query: str, destination_uri: Union[str, Path], format: str, **options: Any) -> int:
         conn = self._connection(None)
         copy_options: list[str] = []
@@ -283,19 +284,21 @@ class DuckDBDriver(
             raise ValueError(msg)
         options_str = f"({', '.join(copy_options)})" if copy_options else ""
-        copy_sql = f"COPY ({query}) TO '{destination_uri}' {options_str}"
+        copy_sql = f"COPY ({query}) TO '{destination_uri!s}' {options_str}"
         result_rel = conn.execute(copy_sql)
         result = result_rel.fetchone() if result_rel else None
         return result[0] if result else 0
-    def _import_native(self, source_uri: str, table_name: str, format: str, mode: str, **options: Any) -> int:
+    def _import_native(
+        self, source_uri: Union[str, Path], table_name: str, format: str, mode: str, **options: Any
+    ) -> int:
         conn = self._connection(None)
         if format == "parquet":
-            read_func = f"read_parquet('{source_uri}')"
+            read_func = f"read_parquet('{source_uri!s}')"
         elif format == "csv":
-            read_func = f"read_csv_auto('{source_uri}')"
+            read_func = f"read_csv_auto('{source_uri!s}')"
         elif format == "json":
-            read_func = f"read_json_auto('{source_uri}')"
+            read_func = f"read_json_auto('{source_uri!s}')"
         else:
             msg = f"Unsupported format for DuckDB native import: {format}"
             raise ValueError(msg)
@@ -320,16 +323,16 @@ class DuckDBDriver(
         return int(count_result[0]) if count_result else 0
     def _read_parquet_native(
-        self, source_uri: str, columns: Optional[list[str]] = None, **options: Any
+        self, source_uri: Union[str, Path], columns: Optional[list[str]] = None, **options: Any
     ) -> "SQLResult[dict[str, Any]]":
         conn = self._connection(None)
         if isinstance(source_uri, list):
             file_list = "[" + ", ".join(f"'{f}'" for f in source_uri) + "]"
             read_func = f"read_parquet({file_list})"
-        elif "*" in source_uri or "?" in source_uri:
-            read_func = f"read_parquet('{source_uri}')"
+        elif "*" in str(source_uri) or "?" in str(source_uri):
+            read_func = f"read_parquet('{source_uri!s}')"
         else:
-            read_func = f"read_parquet('{source_uri}')"
+            read_func = f"read_parquet('{source_uri!s}')"
         column_list = ", ".join(columns) if columns else "*"
         query = f"SELECT {column_list} FROM {read_func}"
@@ -353,7 +356,9 @@ class DuckDBDriver(
             statement=SQL(query), data=rows, column_names=column_names, rows_affected=num_rows, operation_type="SELECT"
         )
-    def _write_parquet_native(self, data: Union[str, "ArrowTable"], destination_uri: str, **options: Any) -> None:
+    def _write_parquet_native(
+        self, data: Union[str, "ArrowTable"], destination_uri: Union[str, Path], **options: Any
+    ) -> None:
         conn = self._connection(None)
         copy_options: list[str] = ["FORMAT PARQUET"]
         if "compression" in options:
@@ -364,13 +369,13 @@ class DuckDBDriver(
         options_str = f"({', '.join(copy_options)})"
         if isinstance(data, str):
-            copy_sql = f"COPY ({data}) TO '{destination_uri}' {options_str}"
+            copy_sql = f"COPY ({data}) TO '{destination_uri!s}' {options_str}"
             conn.execute(copy_sql)
         else:
             temp_name = f"_arrow_data_{uuid.uuid4().hex[:8]}"
             conn.register(temp_name, data)
             try:
-                copy_sql = f"COPY {temp_name} TO '{destination_uri}' {options_str}"
+                copy_sql = f"COPY {temp_name} TO '{destination_uri!s}' {options_str}"
                 conn.execute(copy_sql)
             finally:
                 with contextlib.suppress(Exception):

sqlspec/adapters/psycopg/config.py CHANGED Viewed

@@ -304,7 +304,7 @@ class PsycopgSyncConfig(SyncDatabaseConfig[PsycopgSyncConnection, ConnectionPool
             if conninfo:
                 # If conninfo is provided, use it directly
                 # Don't pass kwargs when using conninfo string
-                pool = ConnectionPool(conninfo, **pool_params)
+                pool = ConnectionPool(conninfo, open=True, **pool_params)
             else:
                 # Otherwise, pass connection parameters via kwargs
                 # Remove any non-connection parameters
@@ -312,7 +312,7 @@ class PsycopgSyncConfig(SyncDatabaseConfig[PsycopgSyncConnection, ConnectionPool
                 all_config.pop("row_factory", None)
                 # Remove pool-specific settings that may have been left
                 all_config.pop("kwargs", None)
-                pool = ConnectionPool("", kwargs=all_config, **pool_params)
+                pool = ConnectionPool("", kwargs=all_config, open=True, **pool_params)
             logger.info("Psycopg connection pool created successfully", extra={"adapter": "psycopg"})
         except Exception as e:
@@ -328,11 +328,19 @@ class PsycopgSyncConfig(SyncDatabaseConfig[PsycopgSyncConnection, ConnectionPool
         logger.info("Closing Psycopg connection pool", extra={"adapter": "psycopg"})
         try:
+            # Set a flag to prevent __del__ from running cleanup
+            # This avoids the "cannot join current thread" error during garbage collection
+            if hasattr(self.pool_instance, "_closed"):
+                self.pool_instance._closed = True
             self.pool_instance.close()
             logger.info("Psycopg connection pool closed successfully", extra={"adapter": "psycopg"})
         except Exception as e:
             logger.exception("Failed to close Psycopg connection pool", extra={"adapter": "psycopg", "error": str(e)})
             raise
+        finally:
+            # Clear the reference to help garbage collection
+            self.pool_instance = None
     def create_connection(self) -> "PsycopgSyncConnection":
         """Create a single connection (not from pool).
@@ -657,7 +665,16 @@ class PsycopgAsyncConfig(AsyncDatabaseConfig[PsycopgAsyncConnection, AsyncConnec
         if not self.pool_instance:
             return
-        await self.pool_instance.close()
+        try:
+            # Set a flag to prevent __del__ from running cleanup
+            # This avoids the "cannot join current thread" error during garbage collection
+            if hasattr(self.pool_instance, "_closed"):
+                self.pool_instance._closed = True
+            await self.pool_instance.close()
+        finally:
+            # Clear the reference to help garbage collection
+            self.pool_instance = None
     async def create_connection(self) -> "PsycopgAsyncConnection":  # pyright: ignore
         """Create a single async connection (not from pool).

sqlspec/adapters/psycopg/driver.py CHANGED Viewed

@@ -20,6 +20,7 @@ from sqlspec.driver.mixins import (
     ToSchemaMixin,
     TypeCoercionMixin,
 )
+from sqlspec.exceptions import PipelineExecutionError
 from sqlspec.statement.parameters import ParameterStyle
 from sqlspec.statement.result import ArrowResult, DMLResultDict, ScriptResultDict, SelectResultDict, SQLResult
 from sqlspec.statement.splitter import split_sql_script
@@ -113,6 +114,12 @@ class PsycopgSyncDriver(
         **kwargs: Any,
     ) -> Union[SelectResultDict, DMLResultDict]:
         conn = self._connection(connection)
+        # Check if this is a COPY command
+        sql_upper = sql.strip().upper()
+        if sql_upper.startswith("COPY") and ("FROM STDIN" in sql_upper or "TO STDOUT" in sql_upper):
+            return self._handle_copy_command(sql, parameters, conn)
         with conn.cursor() as cursor:
             cursor.execute(cast("Query", sql), parameters)
             # Check if the statement returns rows by checking cursor.description
@@ -123,6 +130,38 @@ class PsycopgSyncDriver(
                 return {"data": fetched_data, "column_names": column_names, "rows_affected": len(fetched_data)}
             return {"rows_affected": cursor.rowcount, "status_message": cursor.statusmessage or "OK"}
+    def _handle_copy_command(
+        self, sql: str, data: Any, connection: PsycopgSyncConnection
+    ) -> Union[SelectResultDict, DMLResultDict]:
+        """Handle PostgreSQL COPY commands using cursor.copy() method."""
+        sql_upper = sql.strip().upper()
+        with connection.cursor() as cursor:
+            if "TO STDOUT" in sql_upper:
+                # COPY TO STDOUT - read data from the database
+                output_data: list[Any] = []
+                with cursor.copy(cast("Query", sql)) as copy:
+                    output_data.extend(row for row in copy)
+                # Return as SelectResultDict with the raw COPY data
+                return {"data": output_data, "column_names": ["copy_data"], "rows_affected": len(output_data)}
+            # COPY FROM STDIN - write data to the database
+            with cursor.copy(cast("Query", sql)) as copy:
+                if data:
+                    # If data is provided, write it to the copy stream
+                    if isinstance(data, (str, bytes)):
+                        copy.write(data)
+                    elif isinstance(data, (list, tuple)):
+                        # If data is a list/tuple of rows, write each row
+                        for row in data:
+                            copy.write_row(row)
+                    else:
+                        # Single row
+                        copy.write_row(data)
+            # For COPY operations, cursor.rowcount contains the number of rows affected
+            return {"rows_affected": cursor.rowcount or -1, "status_message": cursor.statusmessage or "COPY COMPLETE"}
     def _execute_many(
         self, sql: str, param_list: Any, connection: Optional[PsycopgSyncConnection] = None, **kwargs: Any
     ) -> DMLResultDict:
@@ -242,7 +281,6 @@ class PsycopgSyncDriver(
         Returns:
             List of SQLResult objects from all operations
         """
-        from sqlspec.exceptions import PipelineExecutionError
         results = []
         connection = self._connection()
@@ -489,6 +527,12 @@ class PsycopgAsyncDriver(
         **kwargs: Any,
     ) -> Union[SelectResultDict, DMLResultDict]:
         conn = self._connection(connection)
+        # Check if this is a COPY command
+        sql_upper = sql.strip().upper()
+        if sql_upper.startswith("COPY") and ("FROM STDIN" in sql_upper or "TO STDOUT" in sql_upper):
+            return await self._handle_copy_command(sql, parameters, conn)
         async with conn.cursor() as cursor:
             await cursor.execute(cast("Query", sql), parameters)
@@ -510,6 +554,38 @@ class PsycopgAsyncDriver(
             }
             return dml_result
+    async def _handle_copy_command(
+        self, sql: str, data: Any, connection: PsycopgAsyncConnection
+    ) -> Union[SelectResultDict, DMLResultDict]:
+        """Handle PostgreSQL COPY commands using cursor.copy() method."""
+        sql_upper = sql.strip().upper()
+        async with connection.cursor() as cursor:
+            if "TO STDOUT" in sql_upper:
+                # COPY TO STDOUT - read data from the database
+                output_data = []
+                async with cursor.copy(cast("Query", sql)) as copy:
+                    output_data.extend([row async for row in copy])
+                # Return as SelectResultDict with the raw COPY data
+                return {"data": output_data, "column_names": ["copy_data"], "rows_affected": len(output_data)}
+            # COPY FROM STDIN - write data to the database
+            async with cursor.copy(cast("Query", sql)) as copy:
+                if data:
+                    # If data is provided, write it to the copy stream
+                    if isinstance(data, (str, bytes)):
+                        await copy.write(data)
+                    elif isinstance(data, (list, tuple)):
+                        # If data is a list/tuple of rows, write each row
+                        for row in data:
+                            await copy.write_row(row)
+                    else:
+                        # Single row
+                        await copy.write_row(data)
+            # For COPY operations, cursor.rowcount contains the number of rows affected
+            return {"rows_affected": cursor.rowcount or -1, "status_message": cursor.statusmessage or "COPY COMPLETE"}
     async def _execute_many(
         self, sql: str, param_list: Any, connection: Optional[PsycopgAsyncConnection] = None, **kwargs: Any
     ) -> DMLResultDict:
@@ -595,6 +671,11 @@ class PsycopgAsyncDriver(
         if statement.expression:
             operation_type = str(statement.expression.key).upper()
+        # Handle case where we got a SelectResultDict but it was routed here due to parsing being disabled
+        if is_dict_with_field(result, "data") and is_dict_with_field(result, "column_names"):
+            # This is actually a SELECT result, wrap it properly
+            return await self._wrap_select_result(statement, cast("SelectResultDict", result), **kwargs)
         if is_dict_with_field(result, "statements_executed"):
             return SQLResult[RowT](
                 statement=statement,

sqlspec/adapters/sqlite/driver.py CHANGED Viewed

@@ -197,8 +197,41 @@ class SqliteDriver(
         result: ScriptResultDict = {"statements_executed": -1, "status_message": "SCRIPT EXECUTED"}
         return result
+    def _ingest_arrow_table(self, table: Any, table_name: str, mode: str = "create", **options: Any) -> int:
+        """SQLite-specific Arrow table ingestion using CSV conversion.
+        Since SQLite only supports CSV bulk loading, we convert the Arrow table
+        to CSV format first using the storage backend for efficient operations.
+        """
+        import io
+        import tempfile
+        import pyarrow.csv as pa_csv
+        # Convert Arrow table to CSV in memory
+        csv_buffer = io.BytesIO()
+        pa_csv.write_csv(table, csv_buffer)
+        csv_content = csv_buffer.getvalue()
+        # Create a temporary file path
+        temp_filename = f"sqlspec_temp_{table_name}_{id(self)}.csv"
+        temp_path = Path(tempfile.gettempdir()) / temp_filename
+        # Use storage backend to write the CSV content
+        backend = self._get_storage_backend(temp_path)
+        backend.write_bytes(str(temp_path), csv_content)
+        try:
+            # Use SQLite's CSV bulk load
+            return self._bulk_load_file(temp_path, table_name, "csv", mode, **options)
+        finally:
+            # Clean up using storage backend
+            with contextlib.suppress(Exception):
+                # Best effort cleanup
+                backend.delete(str(temp_path))
     def _bulk_load_file(self, file_path: Path, table_name: str, format: str, mode: str, **options: Any) -> int:
-        """Database-specific bulk load implementation."""
+        """Database-specific bulk load implementation using storage backend."""
         if format != "csv":
             msg = f"SQLite driver only supports CSV for bulk loading, not {format}."
             raise NotImplementedError(msg)
@@ -208,16 +241,23 @@ class SqliteDriver(
             if mode == "replace":
                 cursor.execute(f"DELETE FROM {table_name}")
-            with Path(file_path).open(encoding="utf-8") as f:
-                reader = csv.reader(f, **options)
-                header = next(reader)  # Skip header
-                placeholders = ", ".join("?" for _ in header)
-                sql = f"INSERT INTO {table_name} VALUES ({placeholders})"
+            # Use storage backend to read the file
+            backend = self._get_storage_backend(file_path)
+            content = backend.read_text(str(file_path), encoding="utf-8")
+            # Parse CSV content
+            import io
+            csv_file = io.StringIO(content)
+            reader = csv.reader(csv_file, **options)
+            header = next(reader)  # Skip header
+            placeholders = ", ".join("?" for _ in header)
+            sql = f"INSERT INTO {table_name} VALUES ({placeholders})"
-                # executemany is efficient for bulk inserts
-                data_iter = list(reader)  # Read all data into memory
-                cursor.executemany(sql, data_iter)
-                return cursor.rowcount
+            # executemany is efficient for bulk inserts
+            data_iter = list(reader)  # Read all data into memory
+            cursor.executemany(sql, data_iter)
+            return cursor.rowcount
     def _wrap_select_result(
         self, statement: SQL, result: SelectResultDict, schema_type: Optional[type[ModelDTOT]] = None, **kwargs: Any

sqlspec 0.12.1__py3-none-any.whl → 0.12.2__py3-none-any.whl

Potentially problematic release.

sqlspec 0.12.1py3-none-any.whl → 0.12.2py3-none-any.whl