PyPI - sqlspec - Versions diffs - 0.12.1__py3-none-any.whl → 0.13.0__py3-none-any.whl - Mend

sqlspec 0.12.1py3-none-any.whl → 0.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sqlspec might be problematic. Click here for more details.

Files changed (113) hide show

sqlspec/_sql.py +21 -180
sqlspec/adapters/adbc/config.py +10 -12
sqlspec/adapters/adbc/driver.py +120 -118
sqlspec/adapters/aiosqlite/config.py +3 -3
sqlspec/adapters/aiosqlite/driver.py +116 -141
sqlspec/adapters/asyncmy/config.py +3 -4
sqlspec/adapters/asyncmy/driver.py +123 -135
sqlspec/adapters/asyncpg/config.py +3 -7
sqlspec/adapters/asyncpg/driver.py +98 -140
sqlspec/adapters/bigquery/config.py +4 -5
sqlspec/adapters/bigquery/driver.py +231 -181
sqlspec/adapters/duckdb/config.py +3 -6
sqlspec/adapters/duckdb/driver.py +132 -124
sqlspec/adapters/oracledb/config.py +6 -5
sqlspec/adapters/oracledb/driver.py +242 -259
sqlspec/adapters/psqlpy/config.py +3 -7
sqlspec/adapters/psqlpy/driver.py +118 -93
sqlspec/adapters/psycopg/config.py +34 -30
sqlspec/adapters/psycopg/driver.py +342 -214
sqlspec/adapters/sqlite/config.py +3 -3
sqlspec/adapters/sqlite/driver.py +150 -104
sqlspec/config.py +0 -4
sqlspec/driver/_async.py +89 -98
sqlspec/driver/_common.py +52 -17
sqlspec/driver/_sync.py +81 -105
sqlspec/driver/connection.py +207 -0
sqlspec/driver/mixins/_csv_writer.py +91 -0
sqlspec/driver/mixins/_pipeline.py +38 -49
sqlspec/driver/mixins/_result_utils.py +27 -9
sqlspec/driver/mixins/_storage.py +149 -216
sqlspec/driver/mixins/_type_coercion.py +3 -4
sqlspec/driver/parameters.py +138 -0
sqlspec/exceptions.py +10 -2
sqlspec/extensions/aiosql/adapter.py +0 -10
sqlspec/extensions/litestar/handlers.py +0 -1
sqlspec/extensions/litestar/plugin.py +0 -3
sqlspec/extensions/litestar/providers.py +0 -14
sqlspec/loader.py +31 -118
sqlspec/protocols.py +542 -0
sqlspec/service/__init__.py +3 -2
sqlspec/service/_util.py +147 -0
sqlspec/service/base.py +1116 -9
sqlspec/statement/builder/__init__.py +42 -32
sqlspec/statement/builder/_ddl_utils.py +0 -10
sqlspec/statement/builder/_parsing_utils.py +10 -4
sqlspec/statement/builder/base.py +70 -23
sqlspec/statement/builder/column.py +283 -0
sqlspec/statement/builder/ddl.py +102 -65
sqlspec/statement/builder/delete.py +23 -7
sqlspec/statement/builder/insert.py +29 -15
sqlspec/statement/builder/merge.py +4 -4
sqlspec/statement/builder/mixins/_aggregate_functions.py +113 -14
sqlspec/statement/builder/mixins/_common_table_expr.py +0 -1
sqlspec/statement/builder/mixins/_delete_from.py +1 -1
sqlspec/statement/builder/mixins/_from.py +10 -8
sqlspec/statement/builder/mixins/_group_by.py +0 -1
sqlspec/statement/builder/mixins/_insert_from_select.py +0 -1
sqlspec/statement/builder/mixins/_insert_values.py +0 -2
sqlspec/statement/builder/mixins/_join.py +20 -13
sqlspec/statement/builder/mixins/_limit_offset.py +3 -3
sqlspec/statement/builder/mixins/_merge_clauses.py +3 -4
sqlspec/statement/builder/mixins/_order_by.py +2 -2
sqlspec/statement/builder/mixins/_pivot.py +4 -7
sqlspec/statement/builder/mixins/_select_columns.py +6 -5
sqlspec/statement/builder/mixins/_unpivot.py +6 -9
sqlspec/statement/builder/mixins/_update_from.py +2 -1
sqlspec/statement/builder/mixins/_update_set.py +11 -8
sqlspec/statement/builder/mixins/_where.py +61 -34
sqlspec/statement/builder/select.py +32 -17
sqlspec/statement/builder/update.py +25 -11
sqlspec/statement/filters.py +39 -14
sqlspec/statement/parameter_manager.py +220 -0
sqlspec/statement/parameters.py +210 -79
sqlspec/statement/pipelines/__init__.py +166 -23
sqlspec/statement/pipelines/analyzers/_analyzer.py +22 -25
sqlspec/statement/pipelines/context.py +35 -39
sqlspec/statement/pipelines/transformers/__init__.py +2 -3
sqlspec/statement/pipelines/transformers/_expression_simplifier.py +19 -187
sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +667 -43
sqlspec/statement/pipelines/transformers/_remove_comments_and_hints.py +76 -0
sqlspec/statement/pipelines/validators/_dml_safety.py +33 -18
sqlspec/statement/pipelines/validators/_parameter_style.py +87 -14
sqlspec/statement/pipelines/validators/_performance.py +38 -23
sqlspec/statement/pipelines/validators/_security.py +39 -62
sqlspec/statement/result.py +37 -129
sqlspec/statement/splitter.py +0 -12
sqlspec/statement/sql.py +885 -379
sqlspec/statement/sql_compiler.py +140 -0
sqlspec/storage/__init__.py +10 -2
sqlspec/storage/backends/fsspec.py +82 -35
sqlspec/storage/backends/obstore.py +66 -49
sqlspec/storage/capabilities.py +101 -0
sqlspec/storage/registry.py +56 -83
sqlspec/typing.py +6 -434
sqlspec/utils/cached_property.py +25 -0
sqlspec/utils/correlation.py +0 -2
sqlspec/utils/logging.py +0 -6
sqlspec/utils/sync_tools.py +0 -4
sqlspec/utils/text.py +0 -5
sqlspec/utils/type_guards.py +892 -0
{sqlspec-0.12.1.dist-info → sqlspec-0.13.0.dist-info}/METADATA +1 -1
sqlspec-0.13.0.dist-info/RECORD +150 -0
sqlspec/statement/builder/protocols.py +0 -20
sqlspec/statement/pipelines/base.py +0 -315
sqlspec/statement/pipelines/result_types.py +0 -41
sqlspec/statement/pipelines/transformers/_remove_comments.py +0 -66
sqlspec/statement/pipelines/transformers/_remove_hints.py +0 -81
sqlspec/statement/pipelines/validators/base.py +0 -67
sqlspec/storage/protocol.py +0 -170
sqlspec-0.12.1.dist-info/RECORD +0 -145
{sqlspec-0.12.1.dist-info → sqlspec-0.13.0.dist-info}/WHEEL +0 -0
{sqlspec-0.12.1.dist-info → sqlspec-0.13.0.dist-info}/licenses/LICENSE +0 -0
{sqlspec-0.12.1.dist-info → sqlspec-0.13.0.dist-info}/licenses/NOTICE +0 -0

sqlspec/driver/mixins/_storage.py CHANGED Viewed

@@ -9,8 +9,6 @@ and storage backend operations for optimal performance.
 """
 # pyright: reportCallIssue=false, reportAttributeAccessIssue=false, reportArgumentType=false
-import csv
-import json
 import logging
 import tempfile
 from abc import ABC
@@ -19,58 +17,37 @@ from pathlib import Path
 from typing import TYPE_CHECKING, Any, ClassVar, Optional, Union, cast
 from urllib.parse import urlparse
+from sqlspec.driver.mixins._csv_writer import write_csv
+from sqlspec.driver.parameters import separate_filters_and_parameters
 from sqlspec.exceptions import MissingDependencyError
 from sqlspec.statement import SQL, ArrowResult, StatementFilter
-from sqlspec.statement.sql import SQLConfig
 from sqlspec.storage import storage_registry
 from sqlspec.typing import ArrowTable, RowT, StatementParameters
+from sqlspec.utils.serializers import to_json
 from sqlspec.utils.sync_tools import async_
 if TYPE_CHECKING:
     from sqlglot.dialects.dialect import DialectType
+    from sqlspec.protocols import ObjectStoreProtocol
     from sqlspec.statement import SQLResult, Statement
-    from sqlspec.storage.protocol import ObjectStoreProtocol
+    from sqlspec.statement.sql import SQLConfig
     from sqlspec.typing import ConnectionT
 __all__ = ("AsyncStorageMixin", "SyncStorageMixin")
 logger = logging.getLogger(__name__)
-# Constants
 WINDOWS_PATH_MIN_LENGTH = 3
-def _separate_filters_from_parameters(
-    parameters: "tuple[Any, ...]",
-) -> "tuple[list[StatementFilter], Optional[StatementParameters]]":
-    """Separate filters from parameters in positional args."""
-    filters: list[StatementFilter] = []
-    params: list[Any] = []
-    for arg in parameters:
-        if isinstance(arg, StatementFilter):
-            filters.append(arg)
-        else:
-            # Everything else is treated as parameters
-            params.append(arg)
-    # Convert to appropriate parameter format
-    if len(params) == 0:
-        return filters, None
-    if len(params) == 1:
-        return filters, params[0]
-    return filters, params
 class StorageMixinBase(ABC):
     """Base class with common storage functionality."""
     __slots__ = ()
-    # These attributes are expected to be provided by the driver class
-    config: Any  # Driver config - drivers use 'config' not '_config'
-    _connection: Any  # Database connection
+    config: Any
+    _connection: Any
     dialect: "DialectType"
     supports_native_parquet_export: "ClassVar[bool]"
     supports_native_parquet_import: "ClassVar[bool]"
@@ -85,25 +62,29 @@ class StorageMixinBase(ABC):
             raise MissingDependencyError(msg)
     @staticmethod
-    def _get_storage_backend(uri_or_key: str) -> "ObjectStoreProtocol":
+    def _get_storage_backend(uri_or_key: "Union[str, Path]") -> "ObjectStoreProtocol":
         """Get storage backend by URI or key with intelligent routing."""
-        return storage_registry.get(uri_or_key)
+        if isinstance(uri_or_key, Path):
+            return storage_registry.get(uri_or_key)
+        return storage_registry.get(str(uri_or_key))
     @staticmethod
-    def _is_uri(path_or_uri: str) -> bool:
+    def _is_uri(path_or_uri: "Union[str, Path]") -> bool:
         """Check if input is a URI rather than a relative path."""
+        path_str = str(path_or_uri)
         schemes = {"s3", "gs", "gcs", "az", "azure", "abfs", "abfss", "file", "http", "https"}
-        if "://" in path_or_uri:
-            scheme = path_or_uri.split("://", maxsplit=1)[0].lower()
+        if "://" in path_str:
+            scheme = path_str.split("://", maxsplit=1)[0].lower()
             return scheme in schemes
-        if len(path_or_uri) >= WINDOWS_PATH_MIN_LENGTH and path_or_uri[1:3] == ":\\":
+        if len(path_str) >= WINDOWS_PATH_MIN_LENGTH and path_str[1:3] == ":\\":
             return True
-        return bool(path_or_uri.startswith("/"))
+        return bool(path_str.startswith("/"))
     @staticmethod
-    def _detect_format(uri: str) -> str:
+    def _detect_format(uri: "Union[str, Path]") -> str:
         """Detect file format from URI extension."""
-        parsed = urlparse(uri)
+        uri_str = str(uri)
+        parsed = urlparse(uri_str)
         path = Path(parsed.path)
         extension = path.suffix.lower().lstrip(".")
@@ -120,28 +101,24 @@ class StorageMixinBase(ABC):
         return format_map.get(extension, "csv")
-    def _resolve_backend_and_path(self, uri: str) -> "tuple[ObjectStoreProtocol, str]":
+    def _resolve_backend_and_path(self, uri: "Union[str, Path]") -> "tuple[ObjectStoreProtocol, str]":
         """Resolve backend and path from URI with Phase 3 URI-first routing.
         Args:
-            uri: URI to resolve (e.g., "s3://bucket/path", "file:///local/path")
+            uri: URI to resolve (e.g., "s3://bucket/path", "file:///local/path", Path object)
         Returns:
             Tuple of (backend, path) where path is relative to the backend's base path
         """
-        # Convert Path objects to string
-        uri = str(uri)
-        original_path = uri
+        uri_str = str(uri)
+        original_path = uri_str
-        # Convert absolute paths to file:// URIs if needed
-        if self._is_uri(uri) and "://" not in uri:
-            # It's an absolute path without scheme
-            uri = f"file://{uri}"
+        if self._is_uri(uri_str) and "://" not in uri_str:
+            uri_str = f"file://{uri_str}"
-        backend = self._get_storage_backend(uri)
+        backend = self._get_storage_backend(uri_str)
-        # For file:// URIs, return just the path part for the backend
-        path = uri[7:] if uri.startswith("file://") else original_path
+        path = uri_str[7:] if uri_str.startswith("file://") else original_path
         return backend, path
@@ -151,12 +128,9 @@ class StorageMixinBase(ABC):
         import pyarrow as pa
         if not rows:
-            # Empty table with column names
-            # Create empty arrays for each column
             empty_data: dict[str, list[Any]] = {col: [] for col in columns}
             return pa.table(empty_data)
-        # Convert rows to columnar format
         if isinstance(rows[0], dict):
             # Dict rows
             data = {col: [cast("dict[str, Any]", row).get(col) for row in rows] for col in columns}
@@ -227,7 +201,7 @@ class SyncStorageMixin(StorageMixinBase):
         """
         self._ensure_pyarrow_installed()
-        filters, params = _separate_filters_from_parameters(parameters)
+        filters, params = separate_filters_and_parameters(parameters)
         # Convert to SQL object for processing
         # Use a custom config if transformations will add parameters
         if _config is None:
@@ -241,9 +215,9 @@ class SyncStorageMixin(StorageMixinBase):
         # Only pass params if it's not None to avoid adding None as a parameter
         if params is not None:
-            sql = SQL(statement, params, *filters, _config=_config, _dialect=self.dialect, **kwargs)
+            sql = SQL(statement, params, *filters, config=_config, **kwargs)
         else:
-            sql = SQL(statement, *filters, _config=_config, _dialect=self.dialect, **kwargs)
+            sql = SQL(statement, *filters, config=_config, **kwargs)
         return self._fetch_arrow_table(sql, connection=_connection, **kwargs)
@@ -261,11 +235,9 @@ class SyncStorageMixin(StorageMixinBase):
         Returns:
             ArrowResult with converted data
         """
-        # Check if this SQL object has validation issues due to transformer-generated parameters
         try:
             result = cast("SQLResult", self.execute(sql, _connection=connection))  # type: ignore[attr-defined]
         except Exception:
-            # Get the compiled SQL and parameters
             compiled_sql, compiled_params = sql.compile("qmark")
             # Execute directly via the driver's _execute method
@@ -293,7 +265,7 @@ class SyncStorageMixin(StorageMixinBase):
         statement: "Statement",
         /,
         *parameters: "Union[StatementParameters, StatementFilter]",
-        destination_uri: str,
+        destination_uri: "Union[str, Path]",
         format: "Optional[str]" = None,
         _connection: "Optional[ConnectionT]" = None,
         _config: "Optional[SQLConfig]" = None,
@@ -315,21 +287,21 @@ class SyncStorageMixin(StorageMixinBase):
         Returns:
             Number of rows exported
         """
-        # Create SQL object with proper parameter handling
-        filters, params = _separate_filters_from_parameters(parameters)
+        filters, params = separate_filters_and_parameters(parameters)
         # For storage operations, disable transformations that might add unwanted parameters
         if _config is None:
             _config = self.config
+            if _config and not _config.dialect:
+                _config = replace(_config, dialect=self.dialect)
         if _config and _config.enable_transformations:
-            from dataclasses import replace
             _config = replace(_config, enable_transformations=False)
-        if params is not None:
-            sql = SQL(statement, params, *filters, _config=_config, _dialect=self.dialect)
-        else:
-            sql = SQL(statement, *filters, _config=_config, _dialect=self.dialect)
+        sql = (
+            SQL(statement, parameters=params, config=_config) if params is not None else SQL(statement, config=_config)
+        )
+        for filter_ in filters:
+            sql = sql.filter(filter_)
         return self._export_to_storage(
             sql, destination_uri=destination_uri, format=format, _connection=_connection, **options
@@ -337,40 +309,25 @@ class SyncStorageMixin(StorageMixinBase):
     def _export_to_storage(
         self,
-        statement: "Statement",
-        /,
-        *parameters: "Union[StatementParameters, StatementFilter]",
-        destination_uri: str,
+        sql: "SQL",
+        destination_uri: "Union[str, Path]",
         format: "Optional[str]" = None,
         _connection: "Optional[ConnectionT]" = None,
-        _config: "Optional[SQLConfig]" = None,
         **kwargs: Any,
     ) -> int:
-        # Convert query to string for format detection
-        if hasattr(statement, "to_sql"):  # SQL object
-            query_str = cast("SQL", statement).to_sql()
-        elif isinstance(statement, str):
-            query_str = statement
-        else:  # sqlglot Expression
-            query_str = str(statement)
-        # Auto-detect format if not provided
-        # If no format is specified and detection fails (returns "csv" as default),
-        # default to "parquet" for export operations as it's the most common use case
+        """Protected method for sync export operation implementation."""
         detected_format = self._detect_format(destination_uri)
         if format:
             file_format = format
-        elif detected_format == "csv" and not destination_uri.endswith((".csv", ".tsv", ".txt")):
+        elif detected_format == "csv" and not str(destination_uri).endswith((".csv", ".tsv", ".txt")):
             # Detection returned default "csv" but file doesn't actually have CSV extension
-            # Default to parquet for better compatibility with tests and common usage
             file_format = "parquet"
         else:
             file_format = detected_format
-        # Special handling for parquet format - if we're exporting to parquet but the
         # destination doesn't have .parquet extension, add it to ensure compatibility
         # with pyarrow.parquet.read_table() which requires the extension
-        if file_format == "parquet" and not destination_uri.endswith(".parquet"):
+        if file_format == "parquet" and not str(destination_uri).endswith(".parquet"):
             destination_uri = f"{destination_uri}.parquet"
         # Use storage backend - resolve AFTER modifying destination_uri
@@ -378,41 +335,30 @@ class SyncStorageMixin(StorageMixinBase):
         # Try native database export first
         if file_format == "parquet" and self.supports_native_parquet_export:
-            # If we have a SQL object with parameters, compile it first
-            if hasattr(statement, "compile") and hasattr(statement, "parameters") and statement.parameters:
-                _compiled_sql, _compiled_params = statement.compile(placeholder_style=self.default_parameter_style)  # type: ignore[attr-defined]
-            else:
-                try:
-                    return self._export_native(query_str, destination_uri, file_format, **kwargs)
-                except NotImplementedError:
-                    # Fall through to use storage backend
-                    pass
+            try:
+                compiled_sql, _ = sql.compile(placeholder_style="static")
+                return self._export_native(compiled_sql, destination_uri, file_format, **kwargs)
+            except NotImplementedError:
+                # Fall through to use storage backend
+                pass
         if file_format == "parquet":
-            # Use Arrow for efficient transfer - if statement is already a SQL object, use it directly
-            if hasattr(statement, "compile"):  # It's already a SQL object from export_to_storage
-                # For parquet export via Arrow, just use the SQL object directly
-                sql_obj = cast("SQL", statement)
-                # Pass connection parameter correctly
-                arrow_result = self._fetch_arrow_table(sql_obj, connection=_connection, **kwargs)
-            else:
-                # Create SQL object if it's still a string
-                arrow_result = self.fetch_arrow_table(statement, *parameters, _connection=_connection, _config=_config)
-            # ArrowResult.data is never None according to the type definition
+            # Use Arrow for efficient transfer
+            arrow_result = self._fetch_arrow_table(sql, connection=_connection, **kwargs)
             arrow_table = arrow_result.data
             num_rows = arrow_table.num_rows
             backend.write_arrow(path, arrow_table, **kwargs)
             return num_rows
-        # Pass the SQL object if available, otherwise create one
-        if isinstance(statement, str):
-            sql_obj = SQL(statement, _config=_config, _dialect=self.dialect)
-        else:
-            sql_obj = cast("SQL", statement)
-        return self._export_via_backend(sql_obj, backend, path, file_format, **kwargs)
+        return self._export_via_backend(sql, backend, path, file_format, **kwargs)
     def import_from_storage(
-        self, source_uri: str, table_name: str, format: "Optional[str]" = None, mode: str = "create", **options: Any
+        self,
+        source_uri: "Union[str, Path]",
+        table_name: str,
+        format: "Optional[str]" = None,
+        mode: str = "create",
+        **options: Any,
     ) -> int:
         """Import data from storage with intelligent routing.
@@ -431,7 +377,12 @@ class SyncStorageMixin(StorageMixinBase):
         return self._import_from_storage(source_uri, table_name, format, mode, **options)
     def _import_from_storage(
-        self, source_uri: str, table_name: str, format: "Optional[str]" = None, mode: str = "create", **options: Any
+        self,
+        source_uri: "Union[str, Path]",
+        table_name: str,
+        format: "Optional[str]" = None,
+        mode: str = "create",
+        **options: Any,
     ) -> int:
         """Protected method for import operation implementation.
@@ -461,7 +412,23 @@ class SyncStorageMixin(StorageMixinBase):
                 arrow_table = backend.read_arrow(path, **options)
                 return self.ingest_arrow_table(arrow_table, table_name, mode=mode)
             except AttributeError:
-                pass
+                # Backend doesn't support read_arrow, try alternative approach
+                try:
+                    import pyarrow.parquet as pq
+                    # Read Parquet file directly
+                    with tempfile.NamedTemporaryFile(mode="wb", suffix=".parquet", delete=False) as tmp:
+                        tmp.write(backend.read_bytes(path))
+                        tmp_path = Path(tmp.name)
+                    try:
+                        arrow_table = pq.read_table(tmp_path)
+                        return self.ingest_arrow_table(arrow_table, table_name, mode=mode)
+                    finally:
+                        tmp_path.unlink(missing_ok=True)
+                except ImportError:
+                    # PyArrow not installed, cannot import Parquet
+                    msg = "PyArrow is required to import Parquet files. Install with: pip install pyarrow"
+                    raise ImportError(msg) from None
         # Use traditional import through temporary file
         return self._import_via_backend(backend, path, table_name, file_format, mode, **options)
@@ -471,23 +438,27 @@ class SyncStorageMixin(StorageMixinBase):
     # ============================================================================
     def _read_parquet_native(
-        self, source_uri: str, columns: "Optional[list[str]]" = None, **options: Any
+        self, source_uri: "Union[str, Path]", columns: "Optional[list[str]]" = None, **options: Any
     ) -> "SQLResult":
         """Database-specific native Parquet reading. Override in drivers."""
         msg = "Driver should implement _read_parquet_native"
         raise NotImplementedError(msg)
-    def _write_parquet_native(self, data: Union[str, ArrowTable], destination_uri: str, **options: Any) -> None:
+    def _write_parquet_native(
+        self, data: Union[str, ArrowTable], destination_uri: "Union[str, Path]", **options: Any
+    ) -> None:
         """Database-specific native Parquet writing. Override in drivers."""
         msg = "Driver should implement _write_parquet_native"
         raise NotImplementedError(msg)
-    def _export_native(self, query: str, destination_uri: str, format: str, **options: Any) -> int:
+    def _export_native(self, query: str, destination_uri: "Union[str, Path]", format: str, **options: Any) -> int:
         """Database-specific native export. Override in drivers."""
         msg = "Driver should implement _export_native"
         raise NotImplementedError(msg)
-    def _import_native(self, source_uri: str, table_name: str, format: str, mode: str, **options: Any) -> int:
+    def _import_native(
+        self, source_uri: "Union[str, Path]", table_name: str, format: str, mode: str, **options: Any
+    ) -> int:
         """Database-specific native import. Override in drivers."""
         msg = "Driver should implement _import_native"
         raise NotImplementedError(msg)
@@ -515,10 +486,8 @@ class SyncStorageMixin(StorageMixinBase):
             backend.write_arrow(path, arrow_table, **options)
             return len(result.data or [])
-        # Convert to appropriate format and write to backend
         compression = options.get("compression")
-        # Create temp file with appropriate suffix
         suffix = f".{format}"
         if compression == "gzip":
             suffix += ".gz"
@@ -526,7 +495,6 @@ class SyncStorageMixin(StorageMixinBase):
         with tempfile.NamedTemporaryFile(mode="w", suffix=suffix, delete=False, encoding="utf-8") as tmp:
             tmp_path = Path(tmp.name)
-        # Handle compression and writing
         if compression == "gzip":
             import gzip
@@ -580,41 +548,24 @@ class SyncStorageMixin(StorageMixinBase):
     @staticmethod
     def _write_csv(result: "SQLResult", file: Any, **options: Any) -> None:
         """Write result to CSV file."""
-        # Remove options that csv.writer doesn't understand
-        csv_options = options.copy()
-        csv_options.pop("compression", None)  # Handle compression separately
-        csv_options.pop("partition_by", None)  # Not applicable to CSV
-        writer = csv.writer(file, **csv_options)  # TODO: anything better?
-        if result.column_names:
-            writer.writerow(result.column_names)
-        if result.data:
-            # Handle dict rows by extracting values in column order
-            if result.data and isinstance(result.data[0], dict):
-                rows = []
-                for row_dict in result.data:
-                    # Extract values in the same order as column_names
-                    row_values = [row_dict.get(col) for col in result.column_names or []]
-                    rows.append(row_values)
-                writer.writerows(rows)
-            else:
-                writer.writerows(result.data)
+        write_csv(result, file, **options)
     @staticmethod
     def _write_json(result: "SQLResult", file: Any, **options: Any) -> None:
         """Write result to JSON file."""
+        _ = options
         if result.data and result.column_names:
-            # Check if data is already in dict format
             if result.data and isinstance(result.data[0], dict):
                 # Data is already dictionaries, use as-is
                 rows = result.data
             else:
-                # Convert tuples/lists to list of dicts
                 rows = [dict(zip(result.column_names, row)) for row in result.data]
-            json.dump(rows, file, **options)  # TODO: use sqlspec.utils.serializer
+            json_str = to_json(rows)
+            file.write(json_str)
         else:
-            json.dump([], file)  # TODO: use sqlspec.utils.serializer
+            json_str = to_json([])
+            file.write(json_str)
     def _bulk_load_file(self, file_path: Path, table_name: str, format: str, mode: str, **options: Any) -> int:
         """Database-specific bulk load implementation. Override in drivers."""
@@ -689,7 +640,7 @@ class AsyncStorageMixin(StorageMixinBase):
         """
         self._ensure_pyarrow_installed()
-        filters, params = _separate_filters_from_parameters(parameters)
+        filters, params = separate_filters_and_parameters(parameters)
         # Convert to SQL object for processing
         # Use a custom config if transformations will add parameters
         if _config is None:
@@ -698,18 +649,15 @@ class AsyncStorageMixin(StorageMixinBase):
         # If no parameters provided but we have transformations enabled,
         # disable parameter validation entirely to allow transformer-added parameters
         if params is None and _config and _config.enable_transformations:
-            from dataclasses import replace
             # Disable validation entirely for transformer-generated parameters
             _config = replace(_config, strict_mode=False, enable_validation=False)
         # Only pass params if it's not None to avoid adding None as a parameter
         if params is not None:
-            sql = SQL(statement, params, *filters, _config=_config, _dialect=self.dialect, **kwargs)
+            sql = SQL(statement, params, *filters, config=_config, **kwargs)
         else:
-            sql = SQL(statement, *filters, _config=_config, _dialect=self.dialect, **kwargs)
+            sql = SQL(statement, *filters, config=_config, **kwargs)
-        # Delegate to protected method that drivers can override
         return await self._fetch_arrow_table(sql, connection=_connection, **kwargs)
     async def _fetch_arrow_table(
@@ -733,7 +681,6 @@ class AsyncStorageMixin(StorageMixinBase):
         # Execute regular query
         result = await self.execute(sql, _connection=connection)  # type: ignore[attr-defined]
-        # Convert to Arrow table
         arrow_table = self._rows_to_arrow_table(result.data or [], result.column_names or [])
         return ArrowResult(statement=sql, data=arrow_table)
@@ -743,37 +690,37 @@ class AsyncStorageMixin(StorageMixinBase):
         statement: "Statement",
         /,
         *parameters: "Union[StatementParameters, StatementFilter]",
-        destination_uri: str,
+        destination_uri: "Union[str, Path]",
         format: "Optional[str]" = None,
         _connection: "Optional[ConnectionT]" = None,
         _config: "Optional[SQLConfig]" = None,
-        **options: Any,
+        **kwargs: Any,
     ) -> int:
-        # Create SQL object with proper parameter handling
-        filters, params = _separate_filters_from_parameters(parameters)
+        filters, params = separate_filters_and_parameters(parameters)
         # For storage operations, disable transformations that might add unwanted parameters
         if _config is None:
             _config = self.config
+            if _config and not _config.dialect:
+                _config = replace(_config, dialect=self.dialect)
         if _config and _config.enable_transformations:
-            from dataclasses import replace
             _config = replace(_config, enable_transformations=False)
-        if params is not None:
-            sql = SQL(statement, params, *filters, _config=_config, _dialect=self.dialect, **options)
-        else:
-            sql = SQL(statement, *filters, _config=_config, _dialect=self.dialect, **options)
+        sql = (
+            SQL(statement, parameters=params, config=_config) if params is not None else SQL(statement, config=_config)
+        )
+        for filter_ in filters:
+            sql = sql.filter(filter_)
-        return await self._export_to_storage(sql, destination_uri, format, connection=_connection, **options)
+        return await self._export_to_storage(sql, destination_uri, format, connection=_connection, **kwargs)
     async def _export_to_storage(
         self,
         query: "SQL",
-        destination_uri: str,
+        destination_uri: "Union[str, Path]",
         format: "Optional[str]" = None,
         connection: "Optional[ConnectionT]" = None,
-        **options: Any,
+        **kwargs: Any,
     ) -> int:
         """Protected async method for export operation implementation.
@@ -782,28 +729,24 @@ class AsyncStorageMixin(StorageMixinBase):
             destination_uri: URI to export data to
             format: Optional format override (auto-detected from URI if not provided)
             connection: Optional connection override
-            **options: Additional export options
+            **kwargs: Additional export options
         Returns:
             Number of rows exported
         """
         # Auto-detect format if not provided
-        # If no format is specified and detection fails (returns "csv" as default),
-        # default to "parquet" for export operations as it's the most common use case
         detected_format = self._detect_format(destination_uri)
         if format:
             file_format = format
-        elif detected_format == "csv" and not destination_uri.endswith((".csv", ".tsv", ".txt")):
+        elif detected_format == "csv" and not str(destination_uri).endswith((".csv", ".tsv", ".txt")):
             # Detection returned default "csv" but file doesn't actually have CSV extension
-            # Default to parquet for better compatibility with tests and common usage
             file_format = "parquet"
         else:
             file_format = detected_format
-        # Special handling for parquet format - if we're exporting to parquet but the
         # destination doesn't have .parquet extension, add it to ensure compatibility
         # with pyarrow.parquet.read_table() which requires the extension
-        if file_format == "parquet" and not destination_uri.endswith(".parquet"):
+        if file_format == "parquet" and not str(destination_uri).endswith(".parquet"):
             destination_uri = f"{destination_uri}.parquet"
         # Use storage backend - resolve AFTER modifying destination_uri
@@ -811,34 +754,31 @@ class AsyncStorageMixin(StorageMixinBase):
         # Try native database export first
         if file_format == "parquet" and self.supports_native_parquet_export:
-            return await self._export_native(query.as_script().sql, destination_uri, file_format, **options)
+            try:
+                compiled_sql, _ = query.compile(placeholder_style="static")
+                return await self._export_native(compiled_sql, destination_uri, file_format, **kwargs)
+            except NotImplementedError:
+                # Fall through to use storage backend
+                pass
         if file_format == "parquet":
-            # For parquet export via Arrow, we need to ensure no unwanted parameter transformations
-            # If the query already has parameters from transformations, create a fresh SQL object
-            if hasattr(query, "parameters") and query.parameters and hasattr(query, "_raw_sql"):
-                # Create fresh SQL object from raw SQL without transformations
-                fresh_sql = SQL(
-                    query._raw_sql,
-                    _config=replace(self.config, enable_transformations=False)
-                    if self.config
-                    else SQLConfig(enable_transformations=False),
-                    _dialect=self.dialect,
-                )
-                arrow_result = await self._fetch_arrow_table(fresh_sql, connection=connection, **options)
-            else:
-                # query is already a SQL object, call _fetch_arrow_table directly
-                arrow_result = await self._fetch_arrow_table(query, connection=connection, **options)
+            # Use Arrow for efficient transfer
+            arrow_result = await self._fetch_arrow_table(query, connection=connection, **kwargs)
             arrow_table = arrow_result.data
             if arrow_table is not None:
-                await backend.write_arrow_async(path, arrow_table, **options)
+                await backend.write_arrow_async(path, arrow_table, **kwargs)
                 return arrow_table.num_rows
             return 0
-        return await self._export_via_backend(query, backend, path, file_format, **options)
+        return await self._export_via_backend(query, backend, path, file_format, **kwargs)
     async def import_from_storage(
-        self, source_uri: str, table_name: str, format: "Optional[str]" = None, mode: str = "create", **options: Any
+        self,
+        source_uri: "Union[str, Path]",
+        table_name: str,
+        format: "Optional[str]" = None,
+        mode: str = "create",
+        **options: Any,
     ) -> int:
         """Async import data from storage with intelligent routing.
@@ -857,7 +797,12 @@ class AsyncStorageMixin(StorageMixinBase):
         return await self._import_from_storage(source_uri, table_name, format, mode, **options)
     async def _import_from_storage(
-        self, source_uri: str, table_name: str, format: "Optional[str]" = None, mode: str = "create", **options: Any
+        self,
+        source_uri: "Union[str, Path]",
+        table_name: str,
+        format: "Optional[str]" = None,
+        mode: str = "create",
+        **options: Any,
     ) -> int:
         """Protected async method for import operation implementation.
@@ -884,12 +829,14 @@ class AsyncStorageMixin(StorageMixinBase):
     # Async Database-Specific Implementation Hooks
     # ============================================================================
-    async def _export_native(self, query: str, destination_uri: str, format: str, **options: Any) -> int:
+    async def _export_native(self, query: str, destination_uri: "Union[str, Path]", format: str, **options: Any) -> int:
         """Async database-specific native export."""
         msg = "Driver should implement _export_native"
         raise NotImplementedError(msg)
-    async def _import_native(self, source_uri: str, table_name: str, format: str, mode: str, **options: Any) -> int:
+    async def _import_native(
+        self, source_uri: "Union[str, Path]", table_name: str, format: str, mode: str, **options: Any
+    ) -> int:
         """Async database-specific native import."""
         msg = "Driver should implement _import_native"
         raise NotImplementedError(msg)
@@ -917,7 +864,6 @@ class AsyncStorageMixin(StorageMixinBase):
             await backend.write_arrow_async(path, arrow_table, **options)
             return len(result.data or [])
-        # Convert to appropriate format and write to backend
         with tempfile.NamedTemporaryFile(mode="w", suffix=f".{format}", delete=False, encoding="utf-8") as tmp:
             if format == "csv":
                 self._write_csv(result, tmp, **options)
@@ -955,37 +901,24 @@ class AsyncStorageMixin(StorageMixinBase):
     @staticmethod
     def _write_csv(result: "SQLResult", file: Any, **options: Any) -> None:
         """Reuse sync implementation."""
-        writer = csv.writer(file, **options)
-        if result.column_names:
-            writer.writerow(result.column_names)
-        if result.data:
-            # Handle dict rows by extracting values in column order
-            if result.data and isinstance(result.data[0], dict):
-                rows = []
-                for row_dict in result.data:
-                    # Extract values in the same order as column_names
-                    row_values = [row_dict.get(col) for col in result.column_names or []]
-                    rows.append(row_values)
-                writer.writerows(rows)
-            else:
-                writer.writerows(result.data)
+        write_csv(result, file, **options)
     @staticmethod
     def _write_json(result: "SQLResult", file: Any, **options: Any) -> None:
         """Reuse sync implementation."""
+        _ = options  # May be used in the future for JSON formatting options
         if result.data and result.column_names:
-            # Check if data is already in dict format
             if result.data and isinstance(result.data[0], dict):
                 # Data is already dictionaries, use as-is
                 rows = result.data
             else:
-                # Convert tuples/lists to list of dicts
                 rows = [dict(zip(result.column_names, row)) for row in result.data]
-            json.dump(rows, file, **options)
+            json_str = to_json(rows)
+            file.write(json_str)
         else:
-            json.dump([], file)
+            json_str = to_json([])
+            file.write(json_str)
     async def _bulk_load_file(self, file_path: Path, table_name: str, format: str, mode: str, **options: Any) -> int:
         """Async database-specific bulk load implementation."""

sqlspec 0.12.1__py3-none-any.whl → 0.13.0__py3-none-any.whl

Potentially problematic release.

sqlspec 0.12.1py3-none-any.whl → 0.13.0py3-none-any.whl