PyPI - sqlspec - Versions diffs - 0.12.0__py3-none-any.whl → 0.12.2__py3-none-any.whl - Mend

sqlspec 0.12.0py3-none-any.whl → 0.12.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sqlspec might be problematic. Click here for more details.

Files changed (21) hide show

sqlspec/adapters/aiosqlite/driver.py +16 -11
sqlspec/adapters/bigquery/driver.py +113 -21
sqlspec/adapters/duckdb/driver.py +18 -13
sqlspec/adapters/psycopg/config.py +55 -54
sqlspec/adapters/psycopg/driver.py +82 -1
sqlspec/adapters/sqlite/driver.py +50 -10
sqlspec/driver/mixins/_storage.py +83 -36
sqlspec/loader.py +8 -30
sqlspec/statement/builder/base.py +3 -1
sqlspec/statement/builder/ddl.py +14 -1
sqlspec/statement/pipelines/analyzers/_analyzer.py +1 -5
sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +56 -2
sqlspec/statement/sql.py +40 -6
sqlspec/storage/backends/fsspec.py +29 -27
sqlspec/storage/backends/obstore.py +55 -34
sqlspec/storage/protocol.py +28 -25
{sqlspec-0.12.0.dist-info → sqlspec-0.12.2.dist-info}/METADATA +1 -1
{sqlspec-0.12.0.dist-info → sqlspec-0.12.2.dist-info}/RECORD +21 -21
{sqlspec-0.12.0.dist-info → sqlspec-0.12.2.dist-info}/WHEEL +0 -0
{sqlspec-0.12.0.dist-info → sqlspec-0.12.2.dist-info}/licenses/LICENSE +0 -0
{sqlspec-0.12.0.dist-info → sqlspec-0.12.2.dist-info}/licenses/NOTICE +0 -0

sqlspec/adapters/sqlite/driver.py CHANGED Viewed

@@ -197,8 +197,41 @@ class SqliteDriver(
         result: ScriptResultDict = {"statements_executed": -1, "status_message": "SCRIPT EXECUTED"}
         return result
+    def _ingest_arrow_table(self, table: Any, table_name: str, mode: str = "create", **options: Any) -> int:
+        """SQLite-specific Arrow table ingestion using CSV conversion.
+        Since SQLite only supports CSV bulk loading, we convert the Arrow table
+        to CSV format first using the storage backend for efficient operations.
+        """
+        import io
+        import tempfile
+        import pyarrow.csv as pa_csv
+        # Convert Arrow table to CSV in memory
+        csv_buffer = io.BytesIO()
+        pa_csv.write_csv(table, csv_buffer)
+        csv_content = csv_buffer.getvalue()
+        # Create a temporary file path
+        temp_filename = f"sqlspec_temp_{table_name}_{id(self)}.csv"
+        temp_path = Path(tempfile.gettempdir()) / temp_filename
+        # Use storage backend to write the CSV content
+        backend = self._get_storage_backend(temp_path)
+        backend.write_bytes(str(temp_path), csv_content)
+        try:
+            # Use SQLite's CSV bulk load
+            return self._bulk_load_file(temp_path, table_name, "csv", mode, **options)
+        finally:
+            # Clean up using storage backend
+            with contextlib.suppress(Exception):
+                # Best effort cleanup
+                backend.delete(str(temp_path))
     def _bulk_load_file(self, file_path: Path, table_name: str, format: str, mode: str, **options: Any) -> int:
-        """Database-specific bulk load implementation."""
+        """Database-specific bulk load implementation using storage backend."""
         if format != "csv":
             msg = f"SQLite driver only supports CSV for bulk loading, not {format}."
             raise NotImplementedError(msg)
@@ -208,16 +241,23 @@ class SqliteDriver(
             if mode == "replace":
                 cursor.execute(f"DELETE FROM {table_name}")
-            with Path(file_path).open(encoding="utf-8") as f:
-                reader = csv.reader(f, **options)
-                header = next(reader)  # Skip header
-                placeholders = ", ".join("?" for _ in header)
-                sql = f"INSERT INTO {table_name} VALUES ({placeholders})"
+            # Use storage backend to read the file
+            backend = self._get_storage_backend(file_path)
+            content = backend.read_text(str(file_path), encoding="utf-8")
+            # Parse CSV content
+            import io
+            csv_file = io.StringIO(content)
+            reader = csv.reader(csv_file, **options)
+            header = next(reader)  # Skip header
+            placeholders = ", ".join("?" for _ in header)
+            sql = f"INSERT INTO {table_name} VALUES ({placeholders})"
-                # executemany is efficient for bulk inserts
-                data_iter = list(reader)  # Read all data into memory
-                cursor.executemany(sql, data_iter)
-                return cursor.rowcount
+            # executemany is efficient for bulk inserts
+            data_iter = list(reader)  # Read all data into memory
+            cursor.executemany(sql, data_iter)
+            return cursor.rowcount
     def _wrap_select_result(
         self, statement: SQL, result: SelectResultDict, schema_type: Optional[type[ModelDTOT]] = None, **kwargs: Any

sqlspec/driver/mixins/_storage.py CHANGED Viewed

@@ -85,25 +85,30 @@ class StorageMixinBase(ABC):
             raise MissingDependencyError(msg)
     @staticmethod
-    def _get_storage_backend(uri_or_key: str) -> "ObjectStoreProtocol":
+    def _get_storage_backend(uri_or_key: "Union[str, Path]") -> "ObjectStoreProtocol":
         """Get storage backend by URI or key with intelligent routing."""
-        return storage_registry.get(uri_or_key)
+        # Pass Path objects directly to storage registry for proper URI conversion
+        if isinstance(uri_or_key, Path):
+            return storage_registry.get(uri_or_key)
+        return storage_registry.get(str(uri_or_key))
     @staticmethod
-    def _is_uri(path_or_uri: str) -> bool:
+    def _is_uri(path_or_uri: "Union[str, Path]") -> bool:
         """Check if input is a URI rather than a relative path."""
+        path_str = str(path_or_uri)
         schemes = {"s3", "gs", "gcs", "az", "azure", "abfs", "abfss", "file", "http", "https"}
-        if "://" in path_or_uri:
-            scheme = path_or_uri.split("://", maxsplit=1)[0].lower()
+        if "://" in path_str:
+            scheme = path_str.split("://", maxsplit=1)[0].lower()
             return scheme in schemes
-        if len(path_or_uri) >= WINDOWS_PATH_MIN_LENGTH and path_or_uri[1:3] == ":\\":
+        if len(path_str) >= WINDOWS_PATH_MIN_LENGTH and path_str[1:3] == ":\\":
             return True
-        return bool(path_or_uri.startswith("/"))
+        return bool(path_str.startswith("/"))
     @staticmethod
-    def _detect_format(uri: str) -> str:
+    def _detect_format(uri: "Union[str, Path]") -> str:
         """Detect file format from URI extension."""
-        parsed = urlparse(uri)
+        uri_str = str(uri)
+        parsed = urlparse(uri_str)
         path = Path(parsed.path)
         extension = path.suffix.lower().lstrip(".")
@@ -120,28 +125,28 @@ class StorageMixinBase(ABC):
         return format_map.get(extension, "csv")
-    def _resolve_backend_and_path(self, uri: str) -> "tuple[ObjectStoreProtocol, str]":
+    def _resolve_backend_and_path(self, uri: "Union[str, Path]") -> "tuple[ObjectStoreProtocol, str]":
         """Resolve backend and path from URI with Phase 3 URI-first routing.
         Args:
-            uri: URI to resolve (e.g., "s3://bucket/path", "file:///local/path")
+            uri: URI to resolve (e.g., "s3://bucket/path", "file:///local/path", Path object)
         Returns:
             Tuple of (backend, path) where path is relative to the backend's base path
         """
         # Convert Path objects to string
-        uri = str(uri)
-        original_path = uri
+        uri_str = str(uri)
+        original_path = uri_str
         # Convert absolute paths to file:// URIs if needed
-        if self._is_uri(uri) and "://" not in uri:
+        if self._is_uri(uri_str) and "://" not in uri_str:
             # It's an absolute path without scheme
-            uri = f"file://{uri}"
+            uri_str = f"file://{uri_str}"
-        backend = self._get_storage_backend(uri)
+        backend = self._get_storage_backend(uri_str)
         # For file:// URIs, return just the path part for the backend
-        path = uri[7:] if uri.startswith("file://") else original_path
+        path = uri_str[7:] if uri_str.startswith("file://") else original_path
         return backend, path
@@ -293,7 +298,7 @@ class SyncStorageMixin(StorageMixinBase):
         statement: "Statement",
         /,
         *parameters: "Union[StatementParameters, StatementFilter]",
-        destination_uri: str,
+        destination_uri: "Union[str, Path]",
         format: "Optional[str]" = None,
         _connection: "Optional[ConnectionT]" = None,
         _config: "Optional[SQLConfig]" = None,
@@ -340,7 +345,7 @@ class SyncStorageMixin(StorageMixinBase):
         statement: "Statement",
         /,
         *parameters: "Union[StatementParameters, StatementFilter]",
-        destination_uri: str,
+        destination_uri: "Union[str, Path]",
         format: "Optional[str]" = None,
         _connection: "Optional[ConnectionT]" = None,
         _config: "Optional[SQLConfig]" = None,
@@ -360,7 +365,7 @@ class SyncStorageMixin(StorageMixinBase):
         detected_format = self._detect_format(destination_uri)
         if format:
             file_format = format
-        elif detected_format == "csv" and not destination_uri.endswith((".csv", ".tsv", ".txt")):
+        elif detected_format == "csv" and not str(destination_uri).endswith((".csv", ".tsv", ".txt")):
             # Detection returned default "csv" but file doesn't actually have CSV extension
             # Default to parquet for better compatibility with tests and common usage
             file_format = "parquet"
@@ -370,7 +375,7 @@ class SyncStorageMixin(StorageMixinBase):
         # Special handling for parquet format - if we're exporting to parquet but the
         # destination doesn't have .parquet extension, add it to ensure compatibility
         # with pyarrow.parquet.read_table() which requires the extension
-        if file_format == "parquet" and not destination_uri.endswith(".parquet"):
+        if file_format == "parquet" and not str(destination_uri).endswith(".parquet"):
             destination_uri = f"{destination_uri}.parquet"
         # Use storage backend - resolve AFTER modifying destination_uri
@@ -412,7 +417,12 @@ class SyncStorageMixin(StorageMixinBase):
         return self._export_via_backend(sql_obj, backend, path, file_format, **kwargs)
     def import_from_storage(
-        self, source_uri: str, table_name: str, format: "Optional[str]" = None, mode: str = "create", **options: Any
+        self,
+        source_uri: "Union[str, Path]",
+        table_name: str,
+        format: "Optional[str]" = None,
+        mode: str = "create",
+        **options: Any,
     ) -> int:
         """Import data from storage with intelligent routing.
@@ -431,7 +441,12 @@ class SyncStorageMixin(StorageMixinBase):
         return self._import_from_storage(source_uri, table_name, format, mode, **options)
     def _import_from_storage(
-        self, source_uri: str, table_name: str, format: "Optional[str]" = None, mode: str = "create", **options: Any
+        self,
+        source_uri: "Union[str, Path]",
+        table_name: str,
+        format: "Optional[str]" = None,
+        mode: str = "create",
+        **options: Any,
     ) -> int:
         """Protected method for import operation implementation.
@@ -461,7 +476,23 @@ class SyncStorageMixin(StorageMixinBase):
                 arrow_table = backend.read_arrow(path, **options)
                 return self.ingest_arrow_table(arrow_table, table_name, mode=mode)
             except AttributeError:
-                pass
+                # Backend doesn't support read_arrow, try alternative approach
+                try:
+                    import pyarrow.parquet as pq
+                    # Read Parquet file directly
+                    with tempfile.NamedTemporaryFile(mode="wb", suffix=".parquet", delete=False) as tmp:
+                        tmp.write(backend.read_bytes(path))
+                        tmp_path = Path(tmp.name)
+                    try:
+                        arrow_table = pq.read_table(tmp_path)
+                        return self.ingest_arrow_table(arrow_table, table_name, mode=mode)
+                    finally:
+                        tmp_path.unlink(missing_ok=True)
+                except ImportError:
+                    # PyArrow not installed, cannot import Parquet
+                    msg = "PyArrow is required to import Parquet files. Install with: pip install pyarrow"
+                    raise ImportError(msg) from None
         # Use traditional import through temporary file
         return self._import_via_backend(backend, path, table_name, file_format, mode, **options)
@@ -471,23 +502,27 @@ class SyncStorageMixin(StorageMixinBase):
     # ============================================================================
     def _read_parquet_native(
-        self, source_uri: str, columns: "Optional[list[str]]" = None, **options: Any
+        self, source_uri: "Union[str, Path]", columns: "Optional[list[str]]" = None, **options: Any
     ) -> "SQLResult":
         """Database-specific native Parquet reading. Override in drivers."""
         msg = "Driver should implement _read_parquet_native"
         raise NotImplementedError(msg)
-    def _write_parquet_native(self, data: Union[str, ArrowTable], destination_uri: str, **options: Any) -> None:
+    def _write_parquet_native(
+        self, data: Union[str, ArrowTable], destination_uri: "Union[str, Path]", **options: Any
+    ) -> None:
         """Database-specific native Parquet writing. Override in drivers."""
         msg = "Driver should implement _write_parquet_native"
         raise NotImplementedError(msg)
-    def _export_native(self, query: str, destination_uri: str, format: str, **options: Any) -> int:
+    def _export_native(self, query: str, destination_uri: "Union[str, Path]", format: str, **options: Any) -> int:
         """Database-specific native export. Override in drivers."""
         msg = "Driver should implement _export_native"
         raise NotImplementedError(msg)
-    def _import_native(self, source_uri: str, table_name: str, format: str, mode: str, **options: Any) -> int:
+    def _import_native(
+        self, source_uri: "Union[str, Path]", table_name: str, format: str, mode: str, **options: Any
+    ) -> int:
         """Database-specific native import. Override in drivers."""
         msg = "Driver should implement _import_native"
         raise NotImplementedError(msg)
@@ -743,7 +778,7 @@ class AsyncStorageMixin(StorageMixinBase):
         statement: "Statement",
         /,
         *parameters: "Union[StatementParameters, StatementFilter]",
-        destination_uri: str,
+        destination_uri: "Union[str, Path]",
         format: "Optional[str]" = None,
         _connection: "Optional[ConnectionT]" = None,
         _config: "Optional[SQLConfig]" = None,
@@ -770,7 +805,7 @@ class AsyncStorageMixin(StorageMixinBase):
     async def _export_to_storage(
         self,
         query: "SQL",
-        destination_uri: str,
+        destination_uri: "Union[str, Path]",
         format: "Optional[str]" = None,
         connection: "Optional[ConnectionT]" = None,
         **options: Any,
@@ -793,7 +828,7 @@ class AsyncStorageMixin(StorageMixinBase):
         detected_format = self._detect_format(destination_uri)
         if format:
             file_format = format
-        elif detected_format == "csv" and not destination_uri.endswith((".csv", ".tsv", ".txt")):
+        elif detected_format == "csv" and not str(destination_uri).endswith((".csv", ".tsv", ".txt")):
             # Detection returned default "csv" but file doesn't actually have CSV extension
             # Default to parquet for better compatibility with tests and common usage
             file_format = "parquet"
@@ -803,7 +838,7 @@ class AsyncStorageMixin(StorageMixinBase):
         # Special handling for parquet format - if we're exporting to parquet but the
         # destination doesn't have .parquet extension, add it to ensure compatibility
         # with pyarrow.parquet.read_table() which requires the extension
-        if file_format == "parquet" and not destination_uri.endswith(".parquet"):
+        if file_format == "parquet" and not str(destination_uri).endswith(".parquet"):
             destination_uri = f"{destination_uri}.parquet"
         # Use storage backend - resolve AFTER modifying destination_uri
@@ -838,7 +873,12 @@ class AsyncStorageMixin(StorageMixinBase):
         return await self._export_via_backend(query, backend, path, file_format, **options)
     async def import_from_storage(
-        self, source_uri: str, table_name: str, format: "Optional[str]" = None, mode: str = "create", **options: Any
+        self,
+        source_uri: "Union[str, Path]",
+        table_name: str,
+        format: "Optional[str]" = None,
+        mode: str = "create",
+        **options: Any,
     ) -> int:
         """Async import data from storage with intelligent routing.
@@ -857,7 +897,12 @@ class AsyncStorageMixin(StorageMixinBase):
         return await self._import_from_storage(source_uri, table_name, format, mode, **options)
     async def _import_from_storage(
-        self, source_uri: str, table_name: str, format: "Optional[str]" = None, mode: str = "create", **options: Any
+        self,
+        source_uri: "Union[str, Path]",
+        table_name: str,
+        format: "Optional[str]" = None,
+        mode: str = "create",
+        **options: Any,
     ) -> int:
         """Protected async method for import operation implementation.
@@ -884,12 +929,14 @@ class AsyncStorageMixin(StorageMixinBase):
     # Async Database-Specific Implementation Hooks
     # ============================================================================
-    async def _export_native(self, query: str, destination_uri: str, format: str, **options: Any) -> int:
+    async def _export_native(self, query: str, destination_uri: "Union[str, Path]", format: str, **options: Any) -> int:
         """Async database-specific native export."""
         msg = "Driver should implement _export_native"
         raise NotImplementedError(msg)
-    async def _import_native(self, source_uri: str, table_name: str, format: str, mode: str, **options: Any) -> int:
+    async def _import_native(
+        self, source_uri: "Union[str, Path]", table_name: str, format: str, mode: str, **options: Any
+    ) -> int:
         """Async database-specific native import."""
         msg = "Driver should implement _import_native"
         raise NotImplementedError(msg)

sqlspec/loader.py CHANGED Viewed

@@ -113,7 +113,7 @@ class SQLFileLoader:
         self._query_to_file: dict[str, str] = {}  # Maps query name to file path
     def _read_file_content(self, path: Union[str, Path]) -> str:
-        """Read file content using appropriate backend.
+        """Read file content using storage backend.
         Args:
             path: File path (can be local path or URI).
@@ -126,37 +126,15 @@ class SQLFileLoader:
         """
         path_str = str(path)
-        # Use storage backend for URIs (anything with a scheme)
-        if "://" in path_str:
-            try:
-                backend = self.storage_registry.get(path_str)
-                return backend.read_text(path_str, encoding=self.encoding)
-            except KeyError as e:
-                raise SQLFileNotFoundError(path_str) from e
-            except Exception as e:
-                raise SQLFileParseError(path_str, path_str, e) from e
-        # Handle local file paths
-        local_path = Path(path_str)
-        self._check_file_path(local_path)
-        content_bytes = self._read_file_content_bytes(local_path)
-        return content_bytes.decode(self.encoding)
-    @staticmethod
-    def _read_file_content_bytes(path: Path) -> bytes:
         try:
-            return path.read_bytes()
+            # Always use storage backend for consistent behavior
+            # Pass the original path object to allow storage registry to handle Path -> file:// conversion
+            backend = self.storage_registry.get(path)
+            return backend.read_text(path_str, encoding=self.encoding)
+        except KeyError as e:
+            raise SQLFileNotFoundError(path_str) from e
         except Exception as e:
-            raise SQLFileParseError(str(path), str(path), e) from e
-    @staticmethod
-    def _check_file_path(path: Union[str, Path]) -> None:
-        """Ensure the file exists and is a valid path."""
-        path_obj = Path(path).resolve()
-        if not path_obj.exists():
-            raise SQLFileNotFoundError(str(path_obj))
-        if not path_obj.is_file():
-            raise SQLFileParseError(str(path_obj), str(path_obj), ValueError("Path is not a file"))
+            raise SQLFileParseError(path_str, path_str, e) from e
     @staticmethod
     def _strip_leading_comments(sql_text: str) -> str:

sqlspec/statement/builder/base.py CHANGED Viewed

@@ -192,7 +192,9 @@ class QueryBuilder(ABC, Generic[RowT]):
                 self._raise_sql_builder_error(msg)
             cte_select_expression = query._expression.copy()
             for p_name, p_value in query._parameters.items():
-                self.add_parameter(p_value, f"cte_{alias}_{p_name}")
+                # Try to preserve original parameter name, only rename if collision
+                unique_name = self._generate_unique_parameter_name(p_name)
+                self.add_parameter(p_value, unique_name)
         elif isinstance(query, str):
             try:

sqlspec/statement/builder/ddl.py CHANGED Viewed

@@ -769,14 +769,27 @@ class CreateTableAsSelectBuilder(DDLBuilder):
             select_expr = self._select_query.expression
             select_params = getattr(self._select_query, "parameters", None)
         elif isinstance(self._select_query, SelectBuilder):
+            # Get the expression and parameters directly
             select_expr = getattr(self._select_query, "_expression", None)
             select_params = getattr(self._select_query, "_parameters", None)
+            # Apply CTEs if present
+            with_ctes = getattr(self._select_query, "_with_ctes", {})
+            if with_ctes and select_expr and isinstance(select_expr, exp.Select):
+                # Apply CTEs directly to the SELECT expression using sqlglot's with_ method
+                for alias, cte in with_ctes.items():
+                    if hasattr(select_expr, "with_"):
+                        select_expr = select_expr.with_(
+                            cte.this,  # The CTE's SELECT expression
+                            as_=alias,
+                            copy=False,
+                        )
         elif isinstance(self._select_query, str):
             select_expr = exp.maybe_parse(self._select_query)
             select_params = None
         else:
             self._raise_sql_builder_error("Unsupported type for SELECT query in CTAS.")
-        if select_expr is None or not isinstance(select_expr, exp.Select):
+        if select_expr is None:
             self._raise_sql_builder_error("SELECT query must be a valid SELECT expression.")
         # Merge parameters from SELECT if present

sqlspec/statement/pipelines/analyzers/_analyzer.py CHANGED Viewed

@@ -324,11 +324,7 @@ class StatementAnalyzer(ProcessorProtocol):
     def _analyze_subqueries(self, expression: exp.Expression, analysis: StatementAnalysis) -> None:
         """Analyze subquery complexity and nesting depth."""
         subqueries: list[exp.Expression] = list(expression.find_all(exp.Subquery))
-        subqueries.extend(
-            query
-            for in_clause in expression.find_all(exp.In)
-            if (query := in_clause.args.get("query")) and isinstance(query, exp.Select)
-        )
+        # Workaround for EXISTS clauses: sqlglot doesn't wrap EXISTS subqueries in Subquery nodes
         subqueries.extend(
             [
                 exists_clause.this

sqlspec/statement/pipelines/transformers/_literal_parameterizer.py CHANGED Viewed

@@ -34,7 +34,9 @@ class ParameterizationContext:
     in_case_when: bool = False
     in_array: bool = False
     in_in_clause: bool = False
+    in_recursive_cte: bool = False
     function_depth: int = 0
+    cte_depth: int = 0
 class ParameterizeLiterals(ProcessorProtocol):
@@ -53,6 +55,7 @@ class ParameterizeLiterals(ProcessorProtocol):
         preserve_boolean: Whether to preserve boolean literals as-is.
         preserve_numbers_in_limit: Whether to preserve numbers in LIMIT/OFFSET clauses.
         preserve_in_functions: List of function names where literals should be preserved.
+        preserve_in_recursive_cte: Whether to preserve literals in recursive CTEs (default True to avoid type inference issues).
         parameterize_arrays: Whether to parameterize array literals.
         parameterize_in_lists: Whether to parameterize IN clause lists.
         max_string_length: Maximum string length to parameterize.
@@ -68,6 +71,7 @@ class ParameterizeLiterals(ProcessorProtocol):
         preserve_boolean: bool = True,
         preserve_numbers_in_limit: bool = True,
         preserve_in_functions: Optional[list[str]] = None,
+        preserve_in_recursive_cte: bool = True,
         parameterize_arrays: bool = True,
         parameterize_in_lists: bool = True,
         max_string_length: int = DEFAULT_MAX_STRING_LENGTH,
@@ -79,7 +83,18 @@ class ParameterizeLiterals(ProcessorProtocol):
         self.preserve_null = preserve_null
         self.preserve_boolean = preserve_boolean
         self.preserve_numbers_in_limit = preserve_numbers_in_limit
-        self.preserve_in_functions = preserve_in_functions or ["COALESCE", "IFNULL", "NVL", "ISNULL"]
+        self.preserve_in_recursive_cte = preserve_in_recursive_cte
+        self.preserve_in_functions = preserve_in_functions or [
+            "COALESCE",
+            "IFNULL",
+            "NVL",
+            "ISNULL",
+            # Array functions that take dimension arguments
+            "ARRAYSIZE",  # SQLglot converts array_length to ArraySize
+            "ARRAY_UPPER",
+            "ARRAY_LOWER",
+            "ARRAY_NDIMS",
+        ]
         self.parameterize_arrays = parameterize_arrays
         self.parameterize_in_lists = parameterize_in_lists
         self.max_string_length = max_string_length
@@ -162,6 +177,17 @@ class ParameterizeLiterals(ProcessorProtocol):
                 context.in_array = True
             elif isinstance(node, exp.In):
                 context.in_in_clause = True
+            elif isinstance(node, exp.CTE):
+                context.cte_depth += 1
+                # Check if this CTE is recursive:
+                # 1. Parent WITH must be RECURSIVE
+                # 2. CTE must contain UNION (characteristic of recursive CTEs)
+                is_in_recursive_with = any(
+                    isinstance(parent, exp.With) and parent.args.get("recursive", False)
+                    for parent in reversed(context.parent_stack)
+                )
+                if is_in_recursive_with and self._contains_union(node):
+                    context.in_recursive_cte = True
         else:
             if context.parent_stack:
                 context.parent_stack.pop()
@@ -176,6 +202,10 @@ class ParameterizeLiterals(ProcessorProtocol):
                 context.in_array = False
             elif isinstance(node, exp.In):
                 context.in_in_clause = False
+            elif isinstance(node, exp.CTE):
+                context.cte_depth -= 1
+                if context.cte_depth == 0:
+                    context.in_recursive_cte = False
     def _process_literal_with_context(
         self, literal: exp.Expression, context: ParameterizationContext
@@ -206,7 +236,6 @@ class ParameterizeLiterals(ProcessorProtocol):
                 "type": type_hint,
                 "semantic_name": semantic_name,
                 "context": self._get_context_description(context),
-                # Note: We avoid calling literal.sql() for performance
             }
         )
@@ -227,6 +256,21 @@ class ParameterizeLiterals(ProcessorProtocol):
         if context.in_function_args:
             return True
+        # Preserve literals in recursive CTEs to avoid type inference issues
+        if self.preserve_in_recursive_cte and context.in_recursive_cte:
+            return True
+        # Check if this literal is being used as an alias value in SELECT
+        # e.g., 'computed' as process_status should be preserved
+        if hasattr(literal, "parent") and literal.parent:
+            parent = literal.parent
+            # Check if it's an Alias node and the literal is the expression (not the alias name)
+            if isinstance(parent, exp.Alias) and parent.this == literal:
+                # Check if this alias is in a SELECT clause
+                for ancestor in context.parent_stack:
+                    if isinstance(ancestor, exp.Select):
+                        return True
         # Check parent context more intelligently
         for parent in context.parent_stack:
             # Preserve in schema/DDL contexts
@@ -616,6 +660,16 @@ class ParameterizeLiterals(ProcessorProtocol):
         """
         return self._parameter_metadata.copy()
+    def _contains_union(self, cte_node: exp.CTE) -> bool:
+        """Check if a CTE contains a UNION (characteristic of recursive CTEs)."""
+        def has_union(node: exp.Expression) -> bool:
+            if isinstance(node, exp.Union):
+                return True
+            return any(has_union(child) for child in node.iter_expressions())
+        return cte_node.this and has_union(cte_node.this)
     def clear_parameters(self) -> None:
         """Clear the extracted parameters list."""
         self.extracted_parameters = []

sqlspec 0.12.0__py3-none-any.whl → 0.12.2__py3-none-any.whl

Potentially problematic release.

sqlspec 0.12.0py3-none-any.whl → 0.12.2py3-none-any.whl