PyPI - sqlspec - Versions diffs - 0.14.1__py3-none-any.whl → 0.16.0__py3-none-any.whl - Mend

sqlspec 0.14.1py3-none-any.whl → 0.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sqlspec might be problematic. Click here for more details.

Files changed (159) hide show

sqlspec/__init__.py +50 -25
sqlspec/__main__.py +1 -1
sqlspec/__metadata__.py +1 -3
sqlspec/_serialization.py +1 -2
sqlspec/_sql.py +480 -121
sqlspec/_typing.py +278 -142
sqlspec/adapters/adbc/__init__.py +4 -3
sqlspec/adapters/adbc/_types.py +12 -0
sqlspec/adapters/adbc/config.py +115 -260
sqlspec/adapters/adbc/driver.py +462 -367
sqlspec/adapters/aiosqlite/__init__.py +18 -3
sqlspec/adapters/aiosqlite/_types.py +13 -0
sqlspec/adapters/aiosqlite/config.py +199 -129
sqlspec/adapters/aiosqlite/driver.py +230 -269
sqlspec/adapters/asyncmy/__init__.py +18 -3
sqlspec/adapters/asyncmy/_types.py +12 -0
sqlspec/adapters/asyncmy/config.py +80 -168
sqlspec/adapters/asyncmy/driver.py +260 -225
sqlspec/adapters/asyncpg/__init__.py +19 -4
sqlspec/adapters/asyncpg/_types.py +17 -0
sqlspec/adapters/asyncpg/config.py +82 -181
sqlspec/adapters/asyncpg/driver.py +285 -383
sqlspec/adapters/bigquery/__init__.py +17 -3
sqlspec/adapters/bigquery/_types.py +12 -0
sqlspec/adapters/bigquery/config.py +191 -258
sqlspec/adapters/bigquery/driver.py +474 -646
sqlspec/adapters/duckdb/__init__.py +14 -3
sqlspec/adapters/duckdb/_types.py +12 -0
sqlspec/adapters/duckdb/config.py +415 -351
sqlspec/adapters/duckdb/driver.py +343 -413
sqlspec/adapters/oracledb/__init__.py +19 -5
sqlspec/adapters/oracledb/_types.py +14 -0
sqlspec/adapters/oracledb/config.py +123 -379
sqlspec/adapters/oracledb/driver.py +507 -560
sqlspec/adapters/psqlpy/__init__.py +13 -3
sqlspec/adapters/psqlpy/_types.py +11 -0
sqlspec/adapters/psqlpy/config.py +93 -254
sqlspec/adapters/psqlpy/driver.py +505 -234
sqlspec/adapters/psycopg/__init__.py +19 -5
sqlspec/adapters/psycopg/_types.py +17 -0
sqlspec/adapters/psycopg/config.py +143 -403
sqlspec/adapters/psycopg/driver.py +706 -872
sqlspec/adapters/sqlite/__init__.py +14 -3
sqlspec/adapters/sqlite/_types.py +11 -0
sqlspec/adapters/sqlite/config.py +202 -118
sqlspec/adapters/sqlite/driver.py +264 -303
sqlspec/base.py +105 -9
sqlspec/{statement/builder → builder}/__init__.py +12 -14
sqlspec/{statement/builder → builder}/_base.py +120 -55
sqlspec/{statement/builder → builder}/_column.py +17 -6
sqlspec/{statement/builder → builder}/_ddl.py +46 -79
sqlspec/{statement/builder → builder}/_ddl_utils.py +5 -10
sqlspec/{statement/builder → builder}/_delete.py +6 -25
sqlspec/{statement/builder → builder}/_insert.py +18 -65
sqlspec/builder/_merge.py +56 -0
sqlspec/{statement/builder → builder}/_parsing_utils.py +8 -11
sqlspec/{statement/builder → builder}/_select.py +11 -56
sqlspec/{statement/builder → builder}/_update.py +12 -18
sqlspec/{statement/builder → builder}/mixins/__init__.py +10 -14
sqlspec/{statement/builder → builder}/mixins/_cte_and_set_ops.py +48 -59
sqlspec/{statement/builder → builder}/mixins/_insert_operations.py +34 -18
sqlspec/{statement/builder → builder}/mixins/_join_operations.py +1 -3
sqlspec/{statement/builder → builder}/mixins/_merge_operations.py +19 -9
sqlspec/{statement/builder → builder}/mixins/_order_limit_operations.py +3 -3
sqlspec/{statement/builder → builder}/mixins/_pivot_operations.py +4 -8
sqlspec/{statement/builder → builder}/mixins/_select_operations.py +25 -38
sqlspec/{statement/builder → builder}/mixins/_update_operations.py +15 -16
sqlspec/{statement/builder → builder}/mixins/_where_clause.py +210 -137
sqlspec/cli.py +4 -5
sqlspec/config.py +180 -133
sqlspec/core/__init__.py +63 -0
sqlspec/core/cache.py +873 -0
sqlspec/core/compiler.py +396 -0
sqlspec/core/filters.py +830 -0
sqlspec/core/hashing.py +310 -0
sqlspec/core/parameters.py +1209 -0
sqlspec/core/result.py +664 -0
sqlspec/{statement → core}/splitter.py +321 -191
sqlspec/core/statement.py +666 -0
sqlspec/driver/__init__.py +7 -10
sqlspec/driver/_async.py +387 -176
sqlspec/driver/_common.py +527 -289
sqlspec/driver/_sync.py +390 -172
sqlspec/driver/mixins/__init__.py +2 -19
sqlspec/driver/mixins/_result_tools.py +164 -0
sqlspec/driver/mixins/_sql_translator.py +6 -3
sqlspec/exceptions.py +5 -252
sqlspec/extensions/aiosql/adapter.py +93 -96
sqlspec/extensions/litestar/cli.py +1 -1
sqlspec/extensions/litestar/config.py +0 -1
sqlspec/extensions/litestar/handlers.py +15 -26
sqlspec/extensions/litestar/plugin.py +18 -16
sqlspec/extensions/litestar/providers.py +17 -52
sqlspec/loader.py +424 -105
sqlspec/migrations/__init__.py +12 -0
sqlspec/migrations/base.py +92 -68
sqlspec/migrations/commands.py +24 -106
sqlspec/migrations/loaders.py +402 -0
sqlspec/migrations/runner.py +49 -51
sqlspec/migrations/tracker.py +31 -44
sqlspec/migrations/utils.py +64 -24
sqlspec/protocols.py +7 -183
sqlspec/storage/__init__.py +1 -1
sqlspec/storage/backends/base.py +37 -40
sqlspec/storage/backends/fsspec.py +136 -112
sqlspec/storage/backends/obstore.py +138 -160
sqlspec/storage/capabilities.py +5 -4
sqlspec/storage/registry.py +57 -106
sqlspec/typing.py +136 -115
sqlspec/utils/__init__.py +2 -3
sqlspec/utils/correlation.py +0 -3
sqlspec/utils/deprecation.py +6 -6
sqlspec/utils/fixtures.py +6 -6
sqlspec/utils/logging.py +0 -2
sqlspec/utils/module_loader.py +7 -12
sqlspec/utils/singleton.py +0 -1
sqlspec/utils/sync_tools.py +17 -38
sqlspec/utils/text.py +12 -51
sqlspec/utils/type_guards.py +443 -232
{sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/METADATA +7 -2
sqlspec-0.16.0.dist-info/RECORD +134 -0
sqlspec/adapters/adbc/transformers.py +0 -108
sqlspec/driver/connection.py +0 -207
sqlspec/driver/mixins/_cache.py +0 -114
sqlspec/driver/mixins/_csv_writer.py +0 -91
sqlspec/driver/mixins/_pipeline.py +0 -508
sqlspec/driver/mixins/_query_tools.py +0 -796
sqlspec/driver/mixins/_result_utils.py +0 -138
sqlspec/driver/mixins/_storage.py +0 -912
sqlspec/driver/mixins/_type_coercion.py +0 -128
sqlspec/driver/parameters.py +0 -138
sqlspec/statement/__init__.py +0 -21
sqlspec/statement/builder/_merge.py +0 -95
sqlspec/statement/cache.py +0 -50
sqlspec/statement/filters.py +0 -625
sqlspec/statement/parameters.py +0 -956
sqlspec/statement/pipelines/__init__.py +0 -210
sqlspec/statement/pipelines/analyzers/__init__.py +0 -9
sqlspec/statement/pipelines/analyzers/_analyzer.py +0 -646
sqlspec/statement/pipelines/context.py +0 -109
sqlspec/statement/pipelines/transformers/__init__.py +0 -7
sqlspec/statement/pipelines/transformers/_expression_simplifier.py +0 -88
sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +0 -1247
sqlspec/statement/pipelines/transformers/_remove_comments_and_hints.py +0 -76
sqlspec/statement/pipelines/validators/__init__.py +0 -23
sqlspec/statement/pipelines/validators/_dml_safety.py +0 -290
sqlspec/statement/pipelines/validators/_parameter_style.py +0 -370
sqlspec/statement/pipelines/validators/_performance.py +0 -714
sqlspec/statement/pipelines/validators/_security.py +0 -967
sqlspec/statement/result.py +0 -435
sqlspec/statement/sql.py +0 -1774
sqlspec/utils/cached_property.py +0 -25
sqlspec/utils/statement_hashing.py +0 -203
sqlspec-0.14.1.dist-info/RECORD +0 -145
/sqlspec/{statement/builder → builder}/mixins/_delete_operations.py +0 -0
{sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/WHEEL +0 -0
{sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/entry_points.txt +0 -0
{sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/licenses/LICENSE +0 -0
{sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/licenses/NOTICE +0 -0

sqlspec/{statement → core}/splitter.py RENAMED Viewed

@@ -1,19 +1,35 @@
-"""SQL script statement splitter with dialect-aware lexer-driven state machine.
-This module provides a robust way to split SQL scripts into individual statements,
-handling complex constructs like PL/SQL blocks, T-SQL batches, and nested blocks.
+"""SQL statement splitter with caching and dialect support.
+This module provides a SQL script statement splitter with caching and
+multiple dialect support.
+Components:
+- StatementSplitter: SQL splitter with caching
+- DialectConfig: Dialect configuration system
+- Token/TokenType: Tokenization system
+- Caching: LRU caching for split results
+- Pattern compilation caching
+Features:
+- Support for multiple SQL dialects (Oracle, T-SQL, PostgreSQL, MySQL, SQLite, DuckDB, BigQuery)
+- Cached pattern compilation
+- LRU caching for split results
+- Optimized tokenization
+- Complete preservation of split_sql_script function
 """
 import re
+import threading
 from abc import ABC, abstractmethod
 from collections.abc import Generator
-from dataclasses import dataclass
 from enum import Enum
 from re import Pattern
-from typing import Callable, Optional, Union
+from typing import Any, Callable, Optional, Union
+from mypy_extensions import mypyc_attr
 from typing_extensions import TypeAlias
+from sqlspec.core.cache import CacheKey, UnifiedCache
 from sqlspec.utils.logging import get_logger
 __all__ = (
@@ -27,8 +43,33 @@ __all__ = (
     "split_sql_script",
 )
+logger = get_logger("sqlspec.core.splitter")
+DEFAULT_PATTERN_CACHE_SIZE = 1000  # Compiled regex patterns
+DEFAULT_RESULT_CACHE_SIZE = 5000  # Split results
+DEFAULT_CACHE_TTL = 3600  # 1 hour TTL
-logger = get_logger("sqlspec")
+DIALECT_CONFIG_SLOTS = (
+    "_block_starters",
+    "_block_enders",
+    "_statement_terminators",
+    "_batch_separators",
+    "_special_terminators",
+    "_max_nesting_depth",
+    "_name",
+)
+TOKEN_SLOTS = ("type", "value", "line", "column", "position")
+SPLITTER_SLOTS = (
+    "_dialect",
+    "_strip_trailing_semicolon",
+    "_token_patterns",
+    "_compiled_patterns",
+    "_pattern_cache_key",
+    "_result_cache",
+    "_pattern_cache",
+)
 class TokenType(Enum):
@@ -45,15 +86,21 @@ class TokenType(Enum):
     OTHER = "OTHER"
-@dataclass
+@mypyc_attr(allow_interpreted_subclasses=True)
 class Token:
-    """Represents a single token in the SQL script."""
+    """SQL token with metadata."""
+    __slots__ = TOKEN_SLOTS
-    type: TokenType
-    value: str
-    line: int
-    column: int
-    position: int  # Absolute position in the script
+    def __init__(self, type: TokenType, value: str, line: int, column: int, position: int) -> None:
+        self.type = type
+        self.value = value
+        self.line = line
+        self.column = column
+        self.position = position
+    def __repr__(self) -> str:
+        return f"Token({self.type.value}, {self.value!r}, {self.line}:{self.column})"
 TokenHandler: TypeAlias = Callable[[str, int, int, int], Optional[Token]]
@@ -61,9 +108,22 @@ TokenPattern: TypeAlias = Union[str, TokenHandler]
 CompiledTokenPattern: TypeAlias = Union[Pattern[str], TokenHandler]
+@mypyc_attr(allow_interpreted_subclasses=True)
 class DialectConfig(ABC):
     """Abstract base class for SQL dialect configurations."""
+    __slots__ = DIALECT_CONFIG_SLOTS
+    def __init__(self) -> None:
+        """Initialize dialect configuration."""
+        self._name: Optional[str] = None
+        self._block_starters: Optional[set[str]] = None
+        self._block_enders: Optional[set[str]] = None
+        self._statement_terminators: Optional[set[str]] = None
+        self._batch_separators: Optional[set[str]] = None
+        self._special_terminators: Optional[dict[str, Callable[[list[Token], int], bool]]] = None
+        self._max_nesting_depth: Optional[int] = None
     @property
     @abstractmethod
     def name(self) -> str:
@@ -87,44 +147,44 @@ class DialectConfig(ABC):
     @property
     def batch_separators(self) -> set[str]:
         """Keywords that separate batches (e.g., GO for T-SQL)."""
-        return set()
+        if self._batch_separators is None:
+            self._batch_separators = set()
+        return self._batch_separators
     @property
     def special_terminators(self) -> dict[str, Callable[[list[Token], int], bool]]:
         """Special terminators that need custom handling."""
-        return {}
+        if self._special_terminators is None:
+            self._special_terminators = {}
+        return self._special_terminators
     @property
     def max_nesting_depth(self) -> int:
         """Maximum allowed nesting depth for blocks."""
-        return 256
+        if self._max_nesting_depth is None:
+            self._max_nesting_depth = 256
+        return self._max_nesting_depth
     def get_all_token_patterns(self) -> list[tuple[TokenType, TokenPattern]]:
         """Assembles the complete, ordered list of token regex patterns."""
-        # 1. Start with high-precedence patterns
         patterns: list[tuple[TokenType, TokenPattern]] = [
             (TokenType.COMMENT_LINE, r"--[^\n]*"),
             (TokenType.COMMENT_BLOCK, r"/\*[\s\S]*?\*/"),
             (TokenType.STRING_LITERAL, r"'(?:[^']|'')*'"),
-            (TokenType.QUOTED_IDENTIFIER, r'"[^"]*"|\[[^\]]*\]'),  # Standard and T-SQL
+            (TokenType.QUOTED_IDENTIFIER, r'"[^"]*"|\[[^\]]*\]'),
         ]
-        # 2. Add dialect-specific patterns (can be overridden)
         patterns.extend(self._get_dialect_specific_patterns())
-        # 3. Dynamically build and insert keyword/separator patterns
         all_keywords = self.block_starters | self.block_enders | self.batch_separators
         if all_keywords:
             sorted_keywords = sorted(all_keywords, key=len, reverse=True)
             patterns.append((TokenType.KEYWORD, r"\b(" + "|".join(re.escape(kw) for kw in sorted_keywords) + r")\b"))
-        # 4. Add terminators
         all_terminators = self.statement_terminators | set(self.special_terminators.keys())
         if all_terminators:
-            # Escape special regex characters
             patterns.append((TokenType.TERMINATOR, "|".join(re.escape(t) for t in all_terminators)))
-        # 5. Add low-precedence patterns
         patterns.extend([(TokenType.WHITESPACE, r"\s+"), (TokenType.OTHER, r".")])
         return patterns
@@ -134,21 +194,12 @@ class DialectConfig(ABC):
         return []
     @staticmethod
-    def is_real_block_ender(tokens: list[Token], current_pos: int) -> bool:
-        """Check if this END keyword is actually a block ender.
-        Override in dialect configs to handle cases like END IF, END LOOP, etc.
-        that are not true block enders.
-        """
-        _ = tokens, current_pos  # Default implementation doesn't use these
+    def is_real_block_ender(tokens: list[Token], current_pos: int) -> bool:  # noqa: ARG004
+        """Check if this END keyword is actually a block ender."""
         return True
     def should_delay_semicolon_termination(self, tokens: list[Token], current_pos: int) -> bool:
-        """Check if semicolon termination should be delayed.
-        Override in dialect configs to handle special cases like Oracle END; /
-        """
-        _ = tokens, current_pos  # Default implementation doesn't use these
+        """Check if semicolon termination should be delayed."""
         return False
@@ -157,30 +208,36 @@ class OracleDialectConfig(DialectConfig):
     @property
     def name(self) -> str:
-        return "oracle"
+        if self._name is None:
+            self._name = "oracle"
+        return self._name
     @property
     def block_starters(self) -> set[str]:
-        return {"BEGIN", "DECLARE", "CASE"}
+        if self._block_starters is None:
+            self._block_starters = {"BEGIN", "DECLARE", "CASE"}
+        return self._block_starters
     @property
     def block_enders(self) -> set[str]:
-        return {"END"}
+        if self._block_enders is None:
+            self._block_enders = {"END"}
+        return self._block_enders
     @property
     def statement_terminators(self) -> set[str]:
-        return {";"}
+        if self._statement_terminators is None:
+            self._statement_terminators = {";"}
+        return self._statement_terminators
     @property
     def special_terminators(self) -> dict[str, Callable[[list[Token], int], bool]]:
-        return {"/": self._handle_slash_terminator}
+        if self._special_terminators is None:
+            self._special_terminators = {"/": self._handle_slash_terminator}
+        return self._special_terminators
     def should_delay_semicolon_termination(self, tokens: list[Token], current_pos: int) -> bool:
-        """Check if we should delay semicolon termination to look for a slash.
-        In Oracle, after END; we should check if there's a / coming up on its own line.
-        """
-        # Look backwards to see if we just processed an END token
+        """Check if we should delay semicolon termination to look for a slash."""
         pos = current_pos - 1
         while pos >= 0:
             token = tokens[pos]
@@ -188,10 +245,7 @@ class OracleDialectConfig(DialectConfig):
                 pos -= 1
                 continue
             if token.type == TokenType.KEYWORD and token.value.upper() == "END":
-                # We found END just before this semicolon
-                # Now look ahead to see if there's a / on its own line
                 return self._has_upcoming_slash(tokens, current_pos)
-            # Found something else, not an END
             break
         return False
@@ -209,25 +263,17 @@ class OracleDialectConfig(DialectConfig):
                 pos += 1
                 continue
             if token.type == TokenType.TERMINATOR and token.value == "/":
-                # Found a /, check if it's valid (on its own line)
                 return found_newline and self._handle_slash_terminator(tokens, pos)
             if token.type in {TokenType.COMMENT_LINE, TokenType.COMMENT_BLOCK}:
-                # Skip comments
                 pos += 1
                 continue
-            # Found non-whitespace, non-comment content
             break
         return False
     @staticmethod
     def is_real_block_ender(tokens: list[Token], current_pos: int) -> bool:
-        """Check if this END keyword is actually a block ender.
-        In Oracle PL/SQL, END followed by IF, LOOP, CASE etc. are not block enders
-        for BEGIN blocks - they terminate control structures.
-        """
-        # Look ahead for the next non-whitespace token(s)
+        """Check if this END keyword is actually a block ender for Oracle PL/SQL."""
         pos = current_pos + 1
         while pos < len(tokens):
             next_token = tokens[pos]
@@ -236,7 +282,6 @@ class OracleDialectConfig(DialectConfig):
                 pos += 1
                 continue
             if next_token.type == TokenType.OTHER:
-                # Collect consecutive OTHER tokens to form a word
                 word_chars = []
                 word_pos = pos
                 while word_pos < len(tokens) and tokens[word_pos].type == TokenType.OTHER:
@@ -245,26 +290,23 @@ class OracleDialectConfig(DialectConfig):
                 word = "".join(word_chars).upper()
                 if word in {"IF", "LOOP", "CASE", "WHILE"}:
-                    return False  # This is not a block ender
-            # Found a non-whitespace token that's not one of our special cases
+                    return False
             break
-        return True  # This is a real block ender
+        return True
     @staticmethod
     def _handle_slash_terminator(tokens: list[Token], current_pos: int) -> bool:
         """Oracle / must be on its own line after whitespace only."""
         if current_pos == 0:
-            return True  # / at start is valid
+            return True
-        # Look backwards to find the start of the line
         pos = current_pos - 1
         while pos >= 0:
             token = tokens[pos]
             if "\n" in token.value:
-                # Found newline, check if only whitespace between newline and /
                 break
             if token.type not in {TokenType.WHITESPACE, TokenType.COMMENT_LINE}:
-                return False  # Non-whitespace before / on same line
+                return False
             pos -= 1
         return True
@@ -275,31 +317,33 @@ class TSQLDialectConfig(DialectConfig):
     @property
     def name(self) -> str:
-        return "tsql"
+        if self._name is None:
+            self._name = "tsql"
+        return self._name
     @property
     def block_starters(self) -> set[str]:
-        return {"BEGIN", "TRY"}
+        if self._block_starters is None:
+            self._block_starters = {"BEGIN", "TRY"}
+        return self._block_starters
     @property
     def block_enders(self) -> set[str]:
-        return {"END", "CATCH"}
+        if self._block_enders is None:
+            self._block_enders = {"END", "CATCH"}
+        return self._block_enders
     @property
     def statement_terminators(self) -> set[str]:
-        return {";"}
+        if self._statement_terminators is None:
+            self._statement_terminators = {";"}
+        return self._statement_terminators
     @property
     def batch_separators(self) -> set[str]:
-        return {"GO"}
-    @staticmethod
-    def validate_batch_separator(tokens: list[Token], current_pos: int) -> bool:
-        """GO must be the only keyword on its line."""
-        # Look for non-whitespace tokens on the same line
-        # Implementation similar to Oracle slash handler
-        _ = tokens, current_pos  # Simplified implementation
-        return True  # Simplified for now
+        if self._batch_separators is None:
+            self._batch_separators = {"GO"}
+        return self._batch_separators
 class PostgreSQLDialectConfig(DialectConfig):
@@ -307,19 +351,27 @@ class PostgreSQLDialectConfig(DialectConfig):
     @property
     def name(self) -> str:
-        return "postgresql"
+        if self._name is None:
+            self._name = "postgresql"
+        return self._name
     @property
     def block_starters(self) -> set[str]:
-        return {"BEGIN", "DECLARE", "CASE", "DO"}
+        if self._block_starters is None:
+            self._block_starters = {"BEGIN", "DECLARE", "CASE", "DO"}
+        return self._block_starters
     @property
     def block_enders(self) -> set[str]:
-        return {"END"}
+        if self._block_enders is None:
+            self._block_enders = {"END"}
+        return self._block_enders
     @property
     def statement_terminators(self) -> set[str]:
-        return {";"}
+        if self._statement_terminators is None:
+            self._statement_terminators = {";"}
+        return self._statement_terminators
     def _get_dialect_specific_patterns(self) -> list[tuple[TokenType, TokenPattern]]:
         """Add PostgreSQL-specific patterns like dollar-quoted strings."""
@@ -328,12 +380,11 @@ class PostgreSQLDialectConfig(DialectConfig):
     @staticmethod
     def _handle_dollar_quoted_string(text: str, position: int, line: int, column: int) -> Optional[Token]:
         """Handle PostgreSQL dollar-quoted strings like $tag$...$tag$."""
-        # Match opening tag
         start_match = re.match(r"\$([a-zA-Z_][a-zA-Z0-9_]*)?\$", text[position:])
         if not start_match:
             return None
-        tag = start_match.group(0)  # The full opening tag, e.g., "$tag$"
+        tag = start_match.group(0)
         content_start = position + len(tag)
         try:
@@ -342,7 +393,6 @@ class PostgreSQLDialectConfig(DialectConfig):
             return Token(type=TokenType.STRING_LITERAL, value=full_value, line=line, column=column, position=position)
         except ValueError:
-            # Closing tag not found
             return None
@@ -351,19 +401,27 @@ class GenericDialectConfig(DialectConfig):
     @property
     def name(self) -> str:
-        return "generic"
+        if self._name is None:
+            self._name = "generic"
+        return self._name
     @property
     def block_starters(self) -> set[str]:
-        return {"BEGIN", "DECLARE", "CASE"}
+        if self._block_starters is None:
+            self._block_starters = {"BEGIN", "DECLARE", "CASE"}
+        return self._block_starters
     @property
     def block_enders(self) -> set[str]:
-        return {"END"}
+        if self._block_enders is None:
+            self._block_enders = {"END"}
+        return self._block_enders
     @property
     def statement_terminators(self) -> set[str]:
-        return {";"}
+        if self._statement_terminators is None:
+            self._statement_terminators = {";"}
+        return self._statement_terminators
 class MySQLDialectConfig(DialectConfig):
@@ -371,24 +429,33 @@ class MySQLDialectConfig(DialectConfig):
     @property
     def name(self) -> str:
-        return "mysql"
+        if self._name is None:
+            self._name = "mysql"
+        return self._name
     @property
     def block_starters(self) -> set[str]:
-        return {"BEGIN", "DECLARE", "CASE"}
+        if self._block_starters is None:
+            self._block_starters = {"BEGIN", "DECLARE", "CASE"}
+        return self._block_starters
     @property
     def block_enders(self) -> set[str]:
-        return {"END"}
+        if self._block_enders is None:
+            self._block_enders = {"END"}
+        return self._block_enders
     @property
     def statement_terminators(self) -> set[str]:
-        return {";"}
+        if self._statement_terminators is None:
+            self._statement_terminators = {";"}
+        return self._statement_terminators
     @property
     def special_terminators(self) -> dict[str, Callable[[list[Token], int], bool]]:
-        """MySQL supports DELIMITER command for changing terminators."""
-        return {"\\g": lambda _tokens, _pos: True, "\\G": lambda _tokens, _pos: True}
+        if self._special_terminators is None:
+            self._special_terminators = {"\\g": lambda _tokens, _pos: True, "\\G": lambda _tokens, _pos: True}
+        return self._special_terminators
 class SQLiteDialectConfig(DialectConfig):
@@ -396,20 +463,27 @@ class SQLiteDialectConfig(DialectConfig):
     @property
     def name(self) -> str:
-        return "sqlite"
+        if self._name is None:
+            self._name = "sqlite"
+        return self._name
     @property
     def block_starters(self) -> set[str]:
-        # SQLite has limited block support
-        return {"BEGIN", "CASE"}
+        if self._block_starters is None:
+            self._block_starters = {"BEGIN", "CASE"}
+        return self._block_starters
     @property
     def block_enders(self) -> set[str]:
-        return {"END"}
+        if self._block_enders is None:
+            self._block_enders = {"END"}
+        return self._block_enders
     @property
     def statement_terminators(self) -> set[str]:
-        return {";"}
+        if self._statement_terminators is None:
+            self._statement_terminators = {";"}
+        return self._statement_terminators
 class DuckDBDialectConfig(DialectConfig):
@@ -417,19 +491,27 @@ class DuckDBDialectConfig(DialectConfig):
     @property
     def name(self) -> str:
-        return "duckdb"
+        if self._name is None:
+            self._name = "duckdb"
+        return self._name
     @property
     def block_starters(self) -> set[str]:
-        return {"BEGIN", "CASE"}
+        if self._block_starters is None:
+            self._block_starters = {"BEGIN", "CASE"}
+        return self._block_starters
     @property
     def block_enders(self) -> set[str]:
-        return {"END"}
+        if self._block_enders is None:
+            self._block_enders = {"END"}
+        return self._block_enders
     @property
     def statement_terminators(self) -> set[str]:
-        return {";"}
+        if self._statement_terminators is None:
+            self._statement_terminators = {";"}
+        return self._statement_terminators
 class BigQueryDialectConfig(DialectConfig):
@@ -437,56 +519,97 @@ class BigQueryDialectConfig(DialectConfig):
     @property
     def name(self) -> str:
-        return "bigquery"
+        if self._name is None:
+            self._name = "bigquery"
+        return self._name
     @property
     def block_starters(self) -> set[str]:
-        return {"BEGIN", "CASE"}
+        if self._block_starters is None:
+            self._block_starters = {"BEGIN", "CASE"}
+        return self._block_starters
     @property
     def block_enders(self) -> set[str]:
-        return {"END"}
+        if self._block_enders is None:
+            self._block_enders = {"END"}
+        return self._block_enders
     @property
     def statement_terminators(self) -> set[str]:
-        return {";"}
+        if self._statement_terminators is None:
+            self._statement_terminators = {";"}
+        return self._statement_terminators
+_pattern_cache: Optional[UnifiedCache[list[tuple[TokenType, CompiledTokenPattern]]]] = None
+_result_cache: Optional[UnifiedCache[list[str]]] = None
+_cache_lock = threading.Lock()
+def _get_pattern_cache() -> UnifiedCache[list[tuple[TokenType, CompiledTokenPattern]]]:
+    """Get or create the pattern compilation cache."""
+    global _pattern_cache
+    if _pattern_cache is None:
+        with _cache_lock:
+            if _pattern_cache is None:
+                _pattern_cache = UnifiedCache[list[tuple[TokenType, CompiledTokenPattern]]](
+                    max_size=DEFAULT_PATTERN_CACHE_SIZE, ttl_seconds=DEFAULT_CACHE_TTL
+                )
+    return _pattern_cache
+def _get_result_cache() -> UnifiedCache[list[str]]:
+    """Get or create the result cache."""
+    global _result_cache
+    if _result_cache is None:
+        with _cache_lock:
+            if _result_cache is None:
+                _result_cache = UnifiedCache[list[str]](
+                    max_size=DEFAULT_RESULT_CACHE_SIZE, ttl_seconds=DEFAULT_CACHE_TTL
+                )
+    return _result_cache
+@mypyc_attr(allow_interpreted_subclasses=False)
 class StatementSplitter:
-    """Splits SQL scripts into individual statements using a lexer-driven state machine."""
+    """SQL script splitter with caching and dialect support."""
+    __slots__ = SPLITTER_SLOTS
     def __init__(self, dialect: DialectConfig, strip_trailing_semicolon: bool = False) -> None:
-        """Initialize the splitter with a specific dialect configuration.
-        Args:
-            dialect: The dialect configuration to use
-            strip_trailing_semicolon: If True, remove trailing semicolons from statements
-        """
-        self.dialect = dialect
-        self.strip_trailing_semicolon = strip_trailing_semicolon
-        self.token_patterns = dialect.get_all_token_patterns()
-        self._compiled_patterns = self._compile_patterns()
-    def _compile_patterns(self) -> list[tuple[TokenType, CompiledTokenPattern]]:
-        """Compile regex patterns for efficiency."""
+        """Initialize the splitter with caching and dialect support."""
+        self._dialect = dialect
+        self._strip_trailing_semicolon = strip_trailing_semicolon
+        self._token_patterns = dialect.get_all_token_patterns()
+        self._pattern_cache_key = f"{dialect.name}:{hash(tuple(str(p) for _, p in self._token_patterns))}"
+        self._pattern_cache = _get_pattern_cache()
+        self._result_cache = _get_result_cache()
+        self._compiled_patterns = self._get_or_compile_patterns()
+    def _get_or_compile_patterns(self) -> list[tuple[TokenType, CompiledTokenPattern]]:
+        """Get compiled patterns from cache or compile and cache them."""
+        cache_key = CacheKey(("pattern", self._pattern_cache_key))
+        cached_patterns = self._pattern_cache.get(cache_key)
+        if cached_patterns is not None:
+            return cached_patterns
         compiled: list[tuple[TokenType, CompiledTokenPattern]] = []
-        for token_type, pattern in self.token_patterns:
+        for token_type, pattern in self._token_patterns:
             if isinstance(pattern, str):
                 compiled.append((token_type, re.compile(pattern, re.IGNORECASE | re.DOTALL)))
             else:
-                # It's a callable
                 compiled.append((token_type, pattern))
+        self._pattern_cache.put(cache_key, compiled)
         return compiled
     def _tokenize(self, sql: str) -> Generator[Token, None, None]:
-        """Tokenize the SQL script into a stream of tokens.
-        sql: The SQL script to tokenize
-        Yields:
-            Token objects representing the recognized tokens in the script.
-        """
+        """Tokenize SQL string."""
         pos = 0
         line = 1
         line_start = 0
@@ -496,7 +619,6 @@ class StatementSplitter:
             for token_type, pattern in self._compiled_patterns:
                 if callable(pattern):
-                    # Call the handler function
                     column = pos - line_start + 1
                     token = pattern(sql, pos, line, column)
                     if token:
@@ -511,7 +633,6 @@ class StatementSplitter:
                         matched = True
                         break
                 else:
-                    # Use regex
                     match = pattern.match(sql, pos)
                     if match:
                         value = match.group(0)
@@ -529,12 +650,25 @@ class StatementSplitter:
                         break
             if not matched:
-                # This should never happen with our catch-all OTHER pattern
                 logger.error("Failed to tokenize at position %d: %s", pos, sql[pos : pos + 20])
-                pos += 1  # Skip the problematic character
+                pos += 1
     def split(self, sql: str) -> list[str]:
-        """Split the SQL script into individual statements."""
+        """Split SQL script with result caching."""
+        script_hash = hash(sql)
+        cache_key = CacheKey(("split", self._dialect.name, script_hash, self._strip_trailing_semicolon))
+        cached_result = self._result_cache.get(cache_key)
+        if cached_result is not None:
+            return cached_result
+        statements = self._do_split(sql)
+        self._result_cache.put(cache_key, statements)
+        return statements
+    def _do_split(self, sql: str) -> list[str]:
+        """Perform SQL script splitting."""
         statements = []
         current_statement_tokens = []
         current_statement_chars = []
@@ -543,10 +677,8 @@ class StatementSplitter:
         all_tokens = list(self._tokenize(sql))
         for token_idx, token in enumerate(all_tokens):
-            # Always accumulate the original text
             current_statement_chars.append(token.value)
-            # Skip whitespace and comments for logic (but keep in output)
             if token.type in {TokenType.WHITESPACE, TokenType.COMMENT_LINE, TokenType.COMMENT_BLOCK}:
                 current_statement_tokens.append(token)
                 continue
@@ -555,50 +687,41 @@ class StatementSplitter:
             token_upper = token.value.upper()
             if token.type == TokenType.KEYWORD:
-                if token_upper in self.dialect.block_starters:
+                if token_upper in self._dialect.block_starters:
                     block_stack.append(token_upper)
-                    if len(block_stack) > self.dialect.max_nesting_depth:
-                        msg = f"Maximum nesting depth ({self.dialect.max_nesting_depth}) exceeded"
+                    if len(block_stack) > self._dialect.max_nesting_depth:
+                        msg = f"Maximum nesting depth ({self._dialect.max_nesting_depth}) exceeded"
                         raise ValueError(msg)
-                elif token_upper in self.dialect.block_enders:
-                    if block_stack and self.dialect.is_real_block_ender(all_tokens, token_idx):
+                elif token_upper in self._dialect.block_enders:
+                    if block_stack and self._dialect.is_real_block_ender(all_tokens, token_idx):
                         block_stack.pop()
-            # Check for statement termination
             is_terminator = False
-            if not block_stack:  # Only terminate when not inside a block
+            if not block_stack:
                 if token.type == TokenType.TERMINATOR:
-                    if token.value in self.dialect.statement_terminators:
-                        should_delay = self.dialect.should_delay_semicolon_termination(all_tokens, token_idx)
+                    if token.value in self._dialect.statement_terminators:
+                        should_delay = self._dialect.should_delay_semicolon_termination(all_tokens, token_idx)
-                        # Also check if there's a batch separator coming up (for T-SQL GO)
-                        if not should_delay and token.value == ";" and self.dialect.batch_separators:
-                            # In dialects with batch separators, semicolons don't terminate
-                            # statements - only batch separators do
+                        if not should_delay and token.value == ";" and self._dialect.batch_separators:
                             should_delay = True
                         if not should_delay:
                             is_terminator = True
-                    elif token.value in self.dialect.special_terminators:
-                        # Call the handler to validate
-                        handler = self.dialect.special_terminators[token.value]
+                    elif token.value in self._dialect.special_terminators:
+                        handler = self._dialect.special_terminators[token.value]
                         if handler(all_tokens, token_idx):
                             is_terminator = True
-                elif token.type == TokenType.KEYWORD and token_upper in self.dialect.batch_separators:
-                    # Batch separators like GO should be included with the preceding statement
+                elif token.type == TokenType.KEYWORD and token_upper in self._dialect.batch_separators:
                     is_terminator = True
             if is_terminator:
-                # Save the statement
                 statement = "".join(current_statement_chars).strip()
                 is_plsql_block = self._is_plsql_block(current_statement_tokens)
-                # Optionally strip the trailing terminator
-                # For PL/SQL blocks, never strip the semicolon as it's syntactically required
                 if (
-                    self.strip_trailing_semicolon
+                    self._strip_trailing_semicolon
                     and token.type == TokenType.TERMINATOR
                     and statement.endswith(token.value)
                     and not is_plsql_block
@@ -619,29 +742,14 @@ class StatementSplitter:
     @staticmethod
     def _is_plsql_block(tokens: list[Token]) -> bool:
-        """Check if the token list represents a PL/SQL block.
-        Args:
-            tokens: List of tokens for the current statement
-        Returns:
-            True if this is a PL/SQL block (BEGIN...END or DECLARE...END)
-        """
+        """Check if the token list represents a PL/SQL block."""
         for token in tokens:
             if token.type == TokenType.KEYWORD:
                 return token.value.upper() in {"BEGIN", "DECLARE"}
         return False
     def _contains_executable_content(self, statement: str) -> bool:
-        """Check if a statement contains actual executable content (not just comments/whitespace).
-        Args:
-            statement: The statement string to check
-        Returns:
-            True if the statement contains executable SQL, False if it's only comments/whitespace
-        """
-        # Tokenize the statement to check its content
+        """Check if a statement contains actual executable content."""
         tokens = list(self._tokenize(statement))
         for token in tokens:
@@ -651,39 +759,61 @@ class StatementSplitter:
         return False
-def split_sql_script(script: str, dialect: str = "generic", strip_trailing_semicolon: bool = False) -> list[str]:
-    """Split a SQL script into statements using the appropriate dialect.
+def split_sql_script(script: str, dialect: Optional[str] = None, strip_trailing_terminator: bool = False) -> list[str]:
+    """Split SQL script into individual statements.
     Args:
         script: The SQL script to split
-        dialect: The SQL dialect name ('oracle', 'tsql', 'postgresql', etc.)
-        strip_trailing_semicolon: If True, remove trailing terminators from statements
+        dialect: The SQL dialect name
+        strip_trailing_terminator: If True, remove trailing terminators from statements
     Returns:
         List of individual SQL statements
     """
+    if dialect is None:
+        dialect = "generic"
     dialect_configs = {
-        # Standard dialects
         "generic": GenericDialectConfig(),
-        # Major databases
         "oracle": OracleDialectConfig(),
         "tsql": TSQLDialectConfig(),
-        "mssql": TSQLDialectConfig(),  # Alias for tsql
-        "sqlserver": TSQLDialectConfig(),  # Alias for tsql
+        "mssql": TSQLDialectConfig(),
+        "sqlserver": TSQLDialectConfig(),
         "postgresql": PostgreSQLDialectConfig(),
-        "postgres": PostgreSQLDialectConfig(),  # Common alias
+        "postgres": PostgreSQLDialectConfig(),
         "mysql": MySQLDialectConfig(),
         "sqlite": SQLiteDialectConfig(),
-        # Modern analytical databases
         "duckdb": DuckDBDialectConfig(),
         "bigquery": BigQueryDialectConfig(),
     }
     config = dialect_configs.get(dialect.lower())
     if not config:
-        # Fall back to generic config for unknown dialects
         logger.warning("Unknown dialect '%s', using generic SQL splitter", dialect)
         config = GenericDialectConfig()
-    splitter = StatementSplitter(config, strip_trailing_semicolon=strip_trailing_semicolon)
+    splitter = StatementSplitter(config, strip_trailing_semicolon=strip_trailing_terminator)
     return splitter.split(script)
+def clear_splitter_caches() -> None:
+    """Clear all splitter caches for memory management."""
+    pattern_cache = _get_pattern_cache()
+    result_cache = _get_result_cache()
+    pattern_cache.clear()
+    result_cache.clear()
+def get_splitter_cache_stats() -> dict[str, Any]:
+    """Get statistics from splitter caches.
+    Returns:
+        Dictionary containing cache statistics
+    """
+    pattern_cache = _get_pattern_cache()
+    result_cache = _get_result_cache()
+    return {
+        "pattern_cache": {"size": pattern_cache.size(), "stats": pattern_cache.get_stats()},
+        "result_cache": {"size": result_cache.size(), "stats": result_cache.get_stats()},
+    }

sqlspec 0.14.1__py3-none-any.whl → 0.16.0__py3-none-any.whl

Potentially problematic release.

sqlspec 0.14.1py3-none-any.whl → 0.16.0py3-none-any.whl