sqlspec 0.16.1__cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- 51ff5a9eadfdefd49f98__mypyc.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/__init__.py +92 -0
- sqlspec/__main__.py +12 -0
- sqlspec/__metadata__.py +14 -0
- sqlspec/_serialization.py +77 -0
- sqlspec/_sql.py +1780 -0
- sqlspec/_typing.py +680 -0
- sqlspec/adapters/__init__.py +0 -0
- sqlspec/adapters/adbc/__init__.py +5 -0
- sqlspec/adapters/adbc/_types.py +12 -0
- sqlspec/adapters/adbc/config.py +361 -0
- sqlspec/adapters/adbc/driver.py +512 -0
- sqlspec/adapters/aiosqlite/__init__.py +19 -0
- sqlspec/adapters/aiosqlite/_types.py +13 -0
- sqlspec/adapters/aiosqlite/config.py +253 -0
- sqlspec/adapters/aiosqlite/driver.py +248 -0
- sqlspec/adapters/asyncmy/__init__.py +19 -0
- sqlspec/adapters/asyncmy/_types.py +12 -0
- sqlspec/adapters/asyncmy/config.py +180 -0
- sqlspec/adapters/asyncmy/driver.py +274 -0
- sqlspec/adapters/asyncpg/__init__.py +21 -0
- sqlspec/adapters/asyncpg/_types.py +17 -0
- sqlspec/adapters/asyncpg/config.py +229 -0
- sqlspec/adapters/asyncpg/driver.py +344 -0
- sqlspec/adapters/bigquery/__init__.py +18 -0
- sqlspec/adapters/bigquery/_types.py +12 -0
- sqlspec/adapters/bigquery/config.py +298 -0
- sqlspec/adapters/bigquery/driver.py +558 -0
- sqlspec/adapters/duckdb/__init__.py +22 -0
- sqlspec/adapters/duckdb/_types.py +12 -0
- sqlspec/adapters/duckdb/config.py +504 -0
- sqlspec/adapters/duckdb/driver.py +368 -0
- sqlspec/adapters/oracledb/__init__.py +32 -0
- sqlspec/adapters/oracledb/_types.py +14 -0
- sqlspec/adapters/oracledb/config.py +317 -0
- sqlspec/adapters/oracledb/driver.py +538 -0
- sqlspec/adapters/psqlpy/__init__.py +16 -0
- sqlspec/adapters/psqlpy/_types.py +11 -0
- sqlspec/adapters/psqlpy/config.py +214 -0
- sqlspec/adapters/psqlpy/driver.py +530 -0
- sqlspec/adapters/psycopg/__init__.py +32 -0
- sqlspec/adapters/psycopg/_types.py +17 -0
- sqlspec/adapters/psycopg/config.py +426 -0
- sqlspec/adapters/psycopg/driver.py +796 -0
- sqlspec/adapters/sqlite/__init__.py +15 -0
- sqlspec/adapters/sqlite/_types.py +11 -0
- sqlspec/adapters/sqlite/config.py +240 -0
- sqlspec/adapters/sqlite/driver.py +294 -0
- sqlspec/base.py +571 -0
- sqlspec/builder/__init__.py +62 -0
- sqlspec/builder/_base.py +473 -0
- sqlspec/builder/_column.py +320 -0
- sqlspec/builder/_ddl.py +1346 -0
- sqlspec/builder/_ddl_utils.py +103 -0
- sqlspec/builder/_delete.py +76 -0
- sqlspec/builder/_insert.py +256 -0
- sqlspec/builder/_merge.py +71 -0
- sqlspec/builder/_parsing_utils.py +140 -0
- sqlspec/builder/_select.py +170 -0
- sqlspec/builder/_update.py +188 -0
- sqlspec/builder/mixins/__init__.py +55 -0
- sqlspec/builder/mixins/_cte_and_set_ops.py +222 -0
- sqlspec/builder/mixins/_delete_operations.py +41 -0
- sqlspec/builder/mixins/_insert_operations.py +244 -0
- sqlspec/builder/mixins/_join_operations.py +122 -0
- sqlspec/builder/mixins/_merge_operations.py +476 -0
- sqlspec/builder/mixins/_order_limit_operations.py +135 -0
- sqlspec/builder/mixins/_pivot_operations.py +153 -0
- sqlspec/builder/mixins/_select_operations.py +603 -0
- sqlspec/builder/mixins/_update_operations.py +187 -0
- sqlspec/builder/mixins/_where_clause.py +621 -0
- sqlspec/cli.py +247 -0
- sqlspec/config.py +395 -0
- sqlspec/core/__init__.py +63 -0
- sqlspec/core/cache.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/core/cache.py +871 -0
- sqlspec/core/compiler.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/core/compiler.py +417 -0
- sqlspec/core/filters.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/core/filters.py +830 -0
- sqlspec/core/hashing.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/core/hashing.py +310 -0
- sqlspec/core/parameters.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/core/parameters.py +1237 -0
- sqlspec/core/result.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/core/result.py +677 -0
- sqlspec/core/splitter.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/core/splitter.py +819 -0
- sqlspec/core/statement.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/core/statement.py +676 -0
- sqlspec/driver/__init__.py +19 -0
- sqlspec/driver/_async.py +502 -0
- sqlspec/driver/_common.py +631 -0
- sqlspec/driver/_sync.py +503 -0
- sqlspec/driver/mixins/__init__.py +6 -0
- sqlspec/driver/mixins/_result_tools.py +193 -0
- sqlspec/driver/mixins/_sql_translator.py +86 -0
- sqlspec/exceptions.py +193 -0
- sqlspec/extensions/__init__.py +0 -0
- sqlspec/extensions/aiosql/__init__.py +10 -0
- sqlspec/extensions/aiosql/adapter.py +461 -0
- sqlspec/extensions/litestar/__init__.py +6 -0
- sqlspec/extensions/litestar/_utils.py +52 -0
- sqlspec/extensions/litestar/cli.py +48 -0
- sqlspec/extensions/litestar/config.py +92 -0
- sqlspec/extensions/litestar/handlers.py +260 -0
- sqlspec/extensions/litestar/plugin.py +145 -0
- sqlspec/extensions/litestar/providers.py +454 -0
- sqlspec/loader.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/loader.py +760 -0
- sqlspec/migrations/__init__.py +35 -0
- sqlspec/migrations/base.py +414 -0
- sqlspec/migrations/commands.py +443 -0
- sqlspec/migrations/loaders.py +402 -0
- sqlspec/migrations/runner.py +213 -0
- sqlspec/migrations/tracker.py +140 -0
- sqlspec/migrations/utils.py +129 -0
- sqlspec/protocols.py +407 -0
- sqlspec/py.typed +0 -0
- sqlspec/storage/__init__.py +23 -0
- sqlspec/storage/backends/__init__.py +0 -0
- sqlspec/storage/backends/base.py +163 -0
- sqlspec/storage/backends/fsspec.py +386 -0
- sqlspec/storage/backends/obstore.py +459 -0
- sqlspec/storage/capabilities.py +102 -0
- sqlspec/storage/registry.py +239 -0
- sqlspec/typing.py +299 -0
- sqlspec/utils/__init__.py +3 -0
- sqlspec/utils/correlation.py +150 -0
- sqlspec/utils/deprecation.py +106 -0
- sqlspec/utils/fixtures.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/utils/fixtures.py +58 -0
- sqlspec/utils/logging.py +127 -0
- sqlspec/utils/module_loader.py +89 -0
- sqlspec/utils/serializers.py +4 -0
- sqlspec/utils/singleton.py +32 -0
- sqlspec/utils/sync_tools.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/utils/sync_tools.py +237 -0
- sqlspec/utils/text.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/utils/text.py +96 -0
- sqlspec/utils/type_guards.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/utils/type_guards.py +1139 -0
- sqlspec-0.16.1.dist-info/METADATA +365 -0
- sqlspec-0.16.1.dist-info/RECORD +148 -0
- sqlspec-0.16.1.dist-info/WHEEL +7 -0
- sqlspec-0.16.1.dist-info/entry_points.txt +2 -0
- sqlspec-0.16.1.dist-info/licenses/LICENSE +21 -0
- sqlspec-0.16.1.dist-info/licenses/NOTICE +29 -0
sqlspec/core/splitter.py
ADDED
|
@@ -0,0 +1,819 @@
|
|
|
1
|
+
"""SQL statement splitter with caching and dialect support.
|
|
2
|
+
|
|
3
|
+
This module provides a SQL script statement splitter with caching and
|
|
4
|
+
multiple dialect support.
|
|
5
|
+
|
|
6
|
+
Components:
|
|
7
|
+
- StatementSplitter: SQL splitter with caching
|
|
8
|
+
- DialectConfig: Dialect configuration system
|
|
9
|
+
- Token/TokenType: Tokenization system
|
|
10
|
+
- Caching: LRU caching for split results
|
|
11
|
+
- Pattern compilation caching
|
|
12
|
+
|
|
13
|
+
Features:
|
|
14
|
+
- Support for multiple SQL dialects (Oracle, T-SQL, PostgreSQL, MySQL, SQLite, DuckDB, BigQuery)
|
|
15
|
+
- Cached pattern compilation
|
|
16
|
+
- LRU caching for split results
|
|
17
|
+
- Optimized tokenization
|
|
18
|
+
- Complete preservation of split_sql_script function
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import re
|
|
22
|
+
import threading
|
|
23
|
+
from abc import ABC, abstractmethod
|
|
24
|
+
from collections.abc import Generator
|
|
25
|
+
from enum import Enum
|
|
26
|
+
from re import Pattern
|
|
27
|
+
from typing import Any, Callable, Optional, Union
|
|
28
|
+
|
|
29
|
+
from mypy_extensions import mypyc_attr
|
|
30
|
+
from typing_extensions import TypeAlias
|
|
31
|
+
|
|
32
|
+
from sqlspec.core.cache import CacheKey, UnifiedCache
|
|
33
|
+
from sqlspec.utils.logging import get_logger
|
|
34
|
+
|
|
35
|
+
__all__ = (
|
|
36
|
+
"DialectConfig",
|
|
37
|
+
"OracleDialectConfig",
|
|
38
|
+
"PostgreSQLDialectConfig",
|
|
39
|
+
"StatementSplitter",
|
|
40
|
+
"TSQLDialectConfig",
|
|
41
|
+
"Token",
|
|
42
|
+
"TokenType",
|
|
43
|
+
"split_sql_script",
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
logger = get_logger("sqlspec.core.splitter")
|
|
47
|
+
|
|
48
|
+
DEFAULT_PATTERN_CACHE_SIZE = 1000 # Compiled regex patterns
|
|
49
|
+
DEFAULT_RESULT_CACHE_SIZE = 5000 # Split results
|
|
50
|
+
DEFAULT_CACHE_TTL = 3600 # 1 hour TTL
|
|
51
|
+
|
|
52
|
+
DIALECT_CONFIG_SLOTS = (
|
|
53
|
+
"_block_starters",
|
|
54
|
+
"_block_enders",
|
|
55
|
+
"_statement_terminators",
|
|
56
|
+
"_batch_separators",
|
|
57
|
+
"_special_terminators",
|
|
58
|
+
"_max_nesting_depth",
|
|
59
|
+
"_name",
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
TOKEN_SLOTS = ("type", "value", "line", "column", "position")
|
|
63
|
+
|
|
64
|
+
SPLITTER_SLOTS = (
|
|
65
|
+
"_dialect",
|
|
66
|
+
"_strip_trailing_semicolon",
|
|
67
|
+
"_token_patterns",
|
|
68
|
+
"_compiled_patterns",
|
|
69
|
+
"_pattern_cache_key",
|
|
70
|
+
"_result_cache",
|
|
71
|
+
"_pattern_cache",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class TokenType(Enum):
|
|
76
|
+
"""Types of tokens recognized by the SQL lexer."""
|
|
77
|
+
|
|
78
|
+
COMMENT_LINE = "COMMENT_LINE"
|
|
79
|
+
COMMENT_BLOCK = "COMMENT_BLOCK"
|
|
80
|
+
STRING_LITERAL = "STRING_LITERAL"
|
|
81
|
+
QUOTED_IDENTIFIER = "QUOTED_IDENTIFIER"
|
|
82
|
+
KEYWORD = "KEYWORD"
|
|
83
|
+
TERMINATOR = "TERMINATOR"
|
|
84
|
+
BATCH_SEPARATOR = "BATCH_SEPARATOR"
|
|
85
|
+
WHITESPACE = "WHITESPACE"
|
|
86
|
+
OTHER = "OTHER"
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@mypyc_attr(allow_interpreted_subclasses=True)
|
|
90
|
+
class Token:
|
|
91
|
+
"""SQL token with metadata."""
|
|
92
|
+
|
|
93
|
+
__slots__ = TOKEN_SLOTS
|
|
94
|
+
|
|
95
|
+
def __init__(self, type: TokenType, value: str, line: int, column: int, position: int) -> None:
|
|
96
|
+
self.type = type
|
|
97
|
+
self.value = value
|
|
98
|
+
self.line = line
|
|
99
|
+
self.column = column
|
|
100
|
+
self.position = position
|
|
101
|
+
|
|
102
|
+
def __repr__(self) -> str:
|
|
103
|
+
return f"Token({self.type.value}, {self.value!r}, {self.line}:{self.column})"
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
TokenHandler: TypeAlias = Callable[[str, int, int, int], Optional[Token]]
|
|
107
|
+
TokenPattern: TypeAlias = Union[str, TokenHandler]
|
|
108
|
+
CompiledTokenPattern: TypeAlias = Union[Pattern[str], TokenHandler]
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@mypyc_attr(allow_interpreted_subclasses=True)
|
|
112
|
+
class DialectConfig(ABC):
|
|
113
|
+
"""Abstract base class for SQL dialect configurations."""
|
|
114
|
+
|
|
115
|
+
__slots__ = DIALECT_CONFIG_SLOTS
|
|
116
|
+
|
|
117
|
+
def __init__(self) -> None:
|
|
118
|
+
"""Initialize dialect configuration."""
|
|
119
|
+
self._name: Optional[str] = None
|
|
120
|
+
self._block_starters: Optional[set[str]] = None
|
|
121
|
+
self._block_enders: Optional[set[str]] = None
|
|
122
|
+
self._statement_terminators: Optional[set[str]] = None
|
|
123
|
+
self._batch_separators: Optional[set[str]] = None
|
|
124
|
+
self._special_terminators: Optional[dict[str, Callable[[list[Token], int], bool]]] = None
|
|
125
|
+
self._max_nesting_depth: Optional[int] = None
|
|
126
|
+
|
|
127
|
+
@property
|
|
128
|
+
@abstractmethod
|
|
129
|
+
def name(self) -> str:
|
|
130
|
+
"""Name of the dialect (e.g., 'oracle', 'tsql')."""
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
@abstractmethod
|
|
134
|
+
def block_starters(self) -> set[str]:
|
|
135
|
+
"""Keywords that start a block (e.g., BEGIN, DECLARE)."""
|
|
136
|
+
|
|
137
|
+
@property
|
|
138
|
+
@abstractmethod
|
|
139
|
+
def block_enders(self) -> set[str]:
|
|
140
|
+
"""Keywords that end a block (e.g., END)."""
|
|
141
|
+
|
|
142
|
+
@property
|
|
143
|
+
@abstractmethod
|
|
144
|
+
def statement_terminators(self) -> set[str]:
|
|
145
|
+
"""Characters that terminate statements (e.g., ;)."""
|
|
146
|
+
|
|
147
|
+
@property
|
|
148
|
+
def batch_separators(self) -> set[str]:
|
|
149
|
+
"""Keywords that separate batches (e.g., GO for T-SQL)."""
|
|
150
|
+
if self._batch_separators is None:
|
|
151
|
+
self._batch_separators = set()
|
|
152
|
+
return self._batch_separators
|
|
153
|
+
|
|
154
|
+
@property
|
|
155
|
+
def special_terminators(self) -> dict[str, Callable[[list[Token], int], bool]]:
|
|
156
|
+
"""Special terminators that need custom handling."""
|
|
157
|
+
if self._special_terminators is None:
|
|
158
|
+
self._special_terminators = {}
|
|
159
|
+
return self._special_terminators
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def max_nesting_depth(self) -> int:
|
|
163
|
+
"""Maximum allowed nesting depth for blocks."""
|
|
164
|
+
if self._max_nesting_depth is None:
|
|
165
|
+
self._max_nesting_depth = 256
|
|
166
|
+
return self._max_nesting_depth
|
|
167
|
+
|
|
168
|
+
def get_all_token_patterns(self) -> list[tuple[TokenType, TokenPattern]]:
|
|
169
|
+
"""Assembles the complete, ordered list of token regex patterns."""
|
|
170
|
+
patterns: list[tuple[TokenType, TokenPattern]] = [
|
|
171
|
+
(TokenType.COMMENT_LINE, r"--[^\n]*"),
|
|
172
|
+
(TokenType.COMMENT_BLOCK, r"/\*[\s\S]*?\*/"),
|
|
173
|
+
(TokenType.STRING_LITERAL, r"'(?:[^']|'')*'"),
|
|
174
|
+
(TokenType.QUOTED_IDENTIFIER, r'"[^"]*"|\[[^\]]*\]'),
|
|
175
|
+
]
|
|
176
|
+
|
|
177
|
+
patterns.extend(self._get_dialect_specific_patterns())
|
|
178
|
+
|
|
179
|
+
all_keywords = self.block_starters | self.block_enders | self.batch_separators
|
|
180
|
+
if all_keywords:
|
|
181
|
+
sorted_keywords = sorted(all_keywords, key=len, reverse=True)
|
|
182
|
+
patterns.append((TokenType.KEYWORD, r"\b(" + "|".join(re.escape(kw) for kw in sorted_keywords) + r")\b"))
|
|
183
|
+
|
|
184
|
+
all_terminators = self.statement_terminators | set(self.special_terminators.keys())
|
|
185
|
+
if all_terminators:
|
|
186
|
+
patterns.append((TokenType.TERMINATOR, "|".join(re.escape(t) for t in all_terminators)))
|
|
187
|
+
|
|
188
|
+
patterns.extend([(TokenType.WHITESPACE, r"\s+"), (TokenType.OTHER, r".")])
|
|
189
|
+
|
|
190
|
+
return patterns
|
|
191
|
+
|
|
192
|
+
def _get_dialect_specific_patterns(self) -> list[tuple[TokenType, TokenPattern]]:
|
|
193
|
+
"""Override to add dialect-specific token patterns."""
|
|
194
|
+
return []
|
|
195
|
+
|
|
196
|
+
@staticmethod
|
|
197
|
+
def is_real_block_ender(tokens: list[Token], current_pos: int) -> bool: # noqa: ARG004
|
|
198
|
+
"""Check if this END keyword is actually a block ender."""
|
|
199
|
+
return True
|
|
200
|
+
|
|
201
|
+
def should_delay_semicolon_termination(self, tokens: list[Token], current_pos: int) -> bool:
|
|
202
|
+
"""Check if semicolon termination should be delayed."""
|
|
203
|
+
return False
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
class OracleDialectConfig(DialectConfig):
|
|
207
|
+
"""Configuration for Oracle PL/SQL dialect."""
|
|
208
|
+
|
|
209
|
+
@property
|
|
210
|
+
def name(self) -> str:
|
|
211
|
+
if self._name is None:
|
|
212
|
+
self._name = "oracle"
|
|
213
|
+
return self._name
|
|
214
|
+
|
|
215
|
+
@property
|
|
216
|
+
def block_starters(self) -> set[str]:
|
|
217
|
+
if self._block_starters is None:
|
|
218
|
+
self._block_starters = {"BEGIN", "DECLARE", "CASE"}
|
|
219
|
+
return self._block_starters
|
|
220
|
+
|
|
221
|
+
@property
|
|
222
|
+
def block_enders(self) -> set[str]:
|
|
223
|
+
if self._block_enders is None:
|
|
224
|
+
self._block_enders = {"END"}
|
|
225
|
+
return self._block_enders
|
|
226
|
+
|
|
227
|
+
@property
|
|
228
|
+
def statement_terminators(self) -> set[str]:
|
|
229
|
+
if self._statement_terminators is None:
|
|
230
|
+
self._statement_terminators = {";"}
|
|
231
|
+
return self._statement_terminators
|
|
232
|
+
|
|
233
|
+
@property
|
|
234
|
+
def special_terminators(self) -> dict[str, Callable[[list[Token], int], bool]]:
|
|
235
|
+
if self._special_terminators is None:
|
|
236
|
+
self._special_terminators = {"/": self._handle_slash_terminator}
|
|
237
|
+
return self._special_terminators
|
|
238
|
+
|
|
239
|
+
def should_delay_semicolon_termination(self, tokens: list[Token], current_pos: int) -> bool:
|
|
240
|
+
"""Check if we should delay semicolon termination to look for a slash."""
|
|
241
|
+
pos = current_pos - 1
|
|
242
|
+
while pos >= 0:
|
|
243
|
+
token = tokens[pos]
|
|
244
|
+
if token.type == TokenType.WHITESPACE:
|
|
245
|
+
pos -= 1
|
|
246
|
+
continue
|
|
247
|
+
if token.type == TokenType.KEYWORD and token.value.upper() == "END":
|
|
248
|
+
return self._has_upcoming_slash(tokens, current_pos)
|
|
249
|
+
break
|
|
250
|
+
|
|
251
|
+
return False
|
|
252
|
+
|
|
253
|
+
def _has_upcoming_slash(self, tokens: list[Token], current_pos: int) -> bool:
|
|
254
|
+
"""Check if there's a / terminator coming up on its own line."""
|
|
255
|
+
pos = current_pos + 1
|
|
256
|
+
found_newline = False
|
|
257
|
+
|
|
258
|
+
while pos < len(tokens):
|
|
259
|
+
token = tokens[pos]
|
|
260
|
+
if token.type == TokenType.WHITESPACE:
|
|
261
|
+
if "\n" in token.value:
|
|
262
|
+
found_newline = True
|
|
263
|
+
pos += 1
|
|
264
|
+
continue
|
|
265
|
+
if token.type == TokenType.TERMINATOR and token.value == "/":
|
|
266
|
+
return found_newline and self._handle_slash_terminator(tokens, pos)
|
|
267
|
+
if token.type in {TokenType.COMMENT_LINE, TokenType.COMMENT_BLOCK}:
|
|
268
|
+
pos += 1
|
|
269
|
+
continue
|
|
270
|
+
break
|
|
271
|
+
|
|
272
|
+
return False
|
|
273
|
+
|
|
274
|
+
@staticmethod
|
|
275
|
+
def is_real_block_ender(tokens: list[Token], current_pos: int) -> bool:
|
|
276
|
+
"""Check if this END keyword is actually a block ender for Oracle PL/SQL."""
|
|
277
|
+
pos = current_pos + 1
|
|
278
|
+
while pos < len(tokens):
|
|
279
|
+
next_token = tokens[pos]
|
|
280
|
+
|
|
281
|
+
if next_token.type == TokenType.WHITESPACE:
|
|
282
|
+
pos += 1
|
|
283
|
+
continue
|
|
284
|
+
if next_token.type == TokenType.OTHER:
|
|
285
|
+
word_chars = []
|
|
286
|
+
word_pos = pos
|
|
287
|
+
while word_pos < len(tokens) and tokens[word_pos].type == TokenType.OTHER:
|
|
288
|
+
word_chars.append(tokens[word_pos].value)
|
|
289
|
+
word_pos += 1
|
|
290
|
+
|
|
291
|
+
word = "".join(word_chars).upper()
|
|
292
|
+
if word in {"IF", "LOOP", "CASE", "WHILE"}:
|
|
293
|
+
return False
|
|
294
|
+
break
|
|
295
|
+
return True
|
|
296
|
+
|
|
297
|
+
@staticmethod
|
|
298
|
+
def _handle_slash_terminator(tokens: list[Token], current_pos: int) -> bool:
|
|
299
|
+
"""Oracle / must be on its own line after whitespace only."""
|
|
300
|
+
if current_pos == 0:
|
|
301
|
+
return True
|
|
302
|
+
|
|
303
|
+
pos = current_pos - 1
|
|
304
|
+
while pos >= 0:
|
|
305
|
+
token = tokens[pos]
|
|
306
|
+
if "\n" in token.value:
|
|
307
|
+
break
|
|
308
|
+
if token.type not in {TokenType.WHITESPACE, TokenType.COMMENT_LINE}:
|
|
309
|
+
return False
|
|
310
|
+
pos -= 1
|
|
311
|
+
|
|
312
|
+
return True
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
class TSQLDialectConfig(DialectConfig):
|
|
316
|
+
"""Configuration for T-SQL (SQL Server) dialect."""
|
|
317
|
+
|
|
318
|
+
@property
|
|
319
|
+
def name(self) -> str:
|
|
320
|
+
if self._name is None:
|
|
321
|
+
self._name = "tsql"
|
|
322
|
+
return self._name
|
|
323
|
+
|
|
324
|
+
@property
|
|
325
|
+
def block_starters(self) -> set[str]:
|
|
326
|
+
if self._block_starters is None:
|
|
327
|
+
self._block_starters = {"BEGIN", "TRY"}
|
|
328
|
+
return self._block_starters
|
|
329
|
+
|
|
330
|
+
@property
|
|
331
|
+
def block_enders(self) -> set[str]:
|
|
332
|
+
if self._block_enders is None:
|
|
333
|
+
self._block_enders = {"END", "CATCH"}
|
|
334
|
+
return self._block_enders
|
|
335
|
+
|
|
336
|
+
@property
|
|
337
|
+
def statement_terminators(self) -> set[str]:
|
|
338
|
+
if self._statement_terminators is None:
|
|
339
|
+
self._statement_terminators = {";"}
|
|
340
|
+
return self._statement_terminators
|
|
341
|
+
|
|
342
|
+
@property
|
|
343
|
+
def batch_separators(self) -> set[str]:
|
|
344
|
+
if self._batch_separators is None:
|
|
345
|
+
self._batch_separators = {"GO"}
|
|
346
|
+
return self._batch_separators
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
class PostgreSQLDialectConfig(DialectConfig):
|
|
350
|
+
"""Configuration for PostgreSQL dialect with dollar-quoted strings."""
|
|
351
|
+
|
|
352
|
+
@property
|
|
353
|
+
def name(self) -> str:
|
|
354
|
+
if self._name is None:
|
|
355
|
+
self._name = "postgresql"
|
|
356
|
+
return self._name
|
|
357
|
+
|
|
358
|
+
@property
|
|
359
|
+
def block_starters(self) -> set[str]:
|
|
360
|
+
if self._block_starters is None:
|
|
361
|
+
self._block_starters = {"BEGIN", "DECLARE", "CASE", "DO"}
|
|
362
|
+
return self._block_starters
|
|
363
|
+
|
|
364
|
+
@property
|
|
365
|
+
def block_enders(self) -> set[str]:
|
|
366
|
+
if self._block_enders is None:
|
|
367
|
+
self._block_enders = {"END"}
|
|
368
|
+
return self._block_enders
|
|
369
|
+
|
|
370
|
+
@property
|
|
371
|
+
def statement_terminators(self) -> set[str]:
|
|
372
|
+
if self._statement_terminators is None:
|
|
373
|
+
self._statement_terminators = {";"}
|
|
374
|
+
return self._statement_terminators
|
|
375
|
+
|
|
376
|
+
def _get_dialect_specific_patterns(self) -> list[tuple[TokenType, TokenPattern]]:
|
|
377
|
+
"""Add PostgreSQL-specific patterns like dollar-quoted strings."""
|
|
378
|
+
return [(TokenType.STRING_LITERAL, self._handle_dollar_quoted_string)]
|
|
379
|
+
|
|
380
|
+
@staticmethod
|
|
381
|
+
def _handle_dollar_quoted_string(text: str, position: int, line: int, column: int) -> Optional[Token]:
|
|
382
|
+
"""Handle PostgreSQL dollar-quoted strings like $tag$...$tag$."""
|
|
383
|
+
start_match = re.match(r"\$([a-zA-Z_][a-zA-Z0-9_]*)?\$", text[position:])
|
|
384
|
+
if not start_match:
|
|
385
|
+
return None
|
|
386
|
+
|
|
387
|
+
tag = start_match.group(0)
|
|
388
|
+
content_start = position + len(tag)
|
|
389
|
+
|
|
390
|
+
try:
|
|
391
|
+
content_end = text.index(tag, content_start)
|
|
392
|
+
full_value = text[position : content_end + len(tag)]
|
|
393
|
+
|
|
394
|
+
return Token(type=TokenType.STRING_LITERAL, value=full_value, line=line, column=column, position=position)
|
|
395
|
+
except ValueError:
|
|
396
|
+
return None
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
class GenericDialectConfig(DialectConfig):
|
|
400
|
+
"""Generic SQL dialect configuration for standard SQL."""
|
|
401
|
+
|
|
402
|
+
@property
|
|
403
|
+
def name(self) -> str:
|
|
404
|
+
if self._name is None:
|
|
405
|
+
self._name = "generic"
|
|
406
|
+
return self._name
|
|
407
|
+
|
|
408
|
+
@property
|
|
409
|
+
def block_starters(self) -> set[str]:
|
|
410
|
+
if self._block_starters is None:
|
|
411
|
+
self._block_starters = {"BEGIN", "DECLARE", "CASE"}
|
|
412
|
+
return self._block_starters
|
|
413
|
+
|
|
414
|
+
@property
|
|
415
|
+
def block_enders(self) -> set[str]:
|
|
416
|
+
if self._block_enders is None:
|
|
417
|
+
self._block_enders = {"END"}
|
|
418
|
+
return self._block_enders
|
|
419
|
+
|
|
420
|
+
@property
|
|
421
|
+
def statement_terminators(self) -> set[str]:
|
|
422
|
+
if self._statement_terminators is None:
|
|
423
|
+
self._statement_terminators = {";"}
|
|
424
|
+
return self._statement_terminators
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
class MySQLDialectConfig(DialectConfig):
|
|
428
|
+
"""Configuration for MySQL dialect."""
|
|
429
|
+
|
|
430
|
+
@property
|
|
431
|
+
def name(self) -> str:
|
|
432
|
+
if self._name is None:
|
|
433
|
+
self._name = "mysql"
|
|
434
|
+
return self._name
|
|
435
|
+
|
|
436
|
+
@property
|
|
437
|
+
def block_starters(self) -> set[str]:
|
|
438
|
+
if self._block_starters is None:
|
|
439
|
+
self._block_starters = {"BEGIN", "DECLARE", "CASE"}
|
|
440
|
+
return self._block_starters
|
|
441
|
+
|
|
442
|
+
@property
|
|
443
|
+
def block_enders(self) -> set[str]:
|
|
444
|
+
if self._block_enders is None:
|
|
445
|
+
self._block_enders = {"END"}
|
|
446
|
+
return self._block_enders
|
|
447
|
+
|
|
448
|
+
@property
|
|
449
|
+
def statement_terminators(self) -> set[str]:
|
|
450
|
+
if self._statement_terminators is None:
|
|
451
|
+
self._statement_terminators = {";"}
|
|
452
|
+
return self._statement_terminators
|
|
453
|
+
|
|
454
|
+
@property
|
|
455
|
+
def special_terminators(self) -> dict[str, Callable[[list[Token], int], bool]]:
|
|
456
|
+
if self._special_terminators is None:
|
|
457
|
+
self._special_terminators = {"\\g": lambda _tokens, _pos: True, "\\G": lambda _tokens, _pos: True}
|
|
458
|
+
return self._special_terminators
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
class SQLiteDialectConfig(DialectConfig):
|
|
462
|
+
"""Configuration for SQLite dialect."""
|
|
463
|
+
|
|
464
|
+
@property
|
|
465
|
+
def name(self) -> str:
|
|
466
|
+
if self._name is None:
|
|
467
|
+
self._name = "sqlite"
|
|
468
|
+
return self._name
|
|
469
|
+
|
|
470
|
+
@property
|
|
471
|
+
def block_starters(self) -> set[str]:
|
|
472
|
+
if self._block_starters is None:
|
|
473
|
+
self._block_starters = {"BEGIN", "CASE"}
|
|
474
|
+
return self._block_starters
|
|
475
|
+
|
|
476
|
+
@property
|
|
477
|
+
def block_enders(self) -> set[str]:
|
|
478
|
+
if self._block_enders is None:
|
|
479
|
+
self._block_enders = {"END"}
|
|
480
|
+
return self._block_enders
|
|
481
|
+
|
|
482
|
+
@property
|
|
483
|
+
def statement_terminators(self) -> set[str]:
|
|
484
|
+
if self._statement_terminators is None:
|
|
485
|
+
self._statement_terminators = {";"}
|
|
486
|
+
return self._statement_terminators
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
class DuckDBDialectConfig(DialectConfig):
|
|
490
|
+
"""Configuration for DuckDB dialect."""
|
|
491
|
+
|
|
492
|
+
@property
|
|
493
|
+
def name(self) -> str:
|
|
494
|
+
if self._name is None:
|
|
495
|
+
self._name = "duckdb"
|
|
496
|
+
return self._name
|
|
497
|
+
|
|
498
|
+
@property
|
|
499
|
+
def block_starters(self) -> set[str]:
|
|
500
|
+
if self._block_starters is None:
|
|
501
|
+
self._block_starters = {"BEGIN", "CASE"}
|
|
502
|
+
return self._block_starters
|
|
503
|
+
|
|
504
|
+
@property
|
|
505
|
+
def block_enders(self) -> set[str]:
|
|
506
|
+
if self._block_enders is None:
|
|
507
|
+
self._block_enders = {"END"}
|
|
508
|
+
return self._block_enders
|
|
509
|
+
|
|
510
|
+
@property
|
|
511
|
+
def statement_terminators(self) -> set[str]:
|
|
512
|
+
if self._statement_terminators is None:
|
|
513
|
+
self._statement_terminators = {";"}
|
|
514
|
+
return self._statement_terminators
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
class BigQueryDialectConfig(DialectConfig):
|
|
518
|
+
"""Configuration for BigQuery dialect."""
|
|
519
|
+
|
|
520
|
+
@property
|
|
521
|
+
def name(self) -> str:
|
|
522
|
+
if self._name is None:
|
|
523
|
+
self._name = "bigquery"
|
|
524
|
+
return self._name
|
|
525
|
+
|
|
526
|
+
@property
|
|
527
|
+
def block_starters(self) -> set[str]:
|
|
528
|
+
if self._block_starters is None:
|
|
529
|
+
self._block_starters = {"BEGIN", "CASE"}
|
|
530
|
+
return self._block_starters
|
|
531
|
+
|
|
532
|
+
@property
|
|
533
|
+
def block_enders(self) -> set[str]:
|
|
534
|
+
if self._block_enders is None:
|
|
535
|
+
self._block_enders = {"END"}
|
|
536
|
+
return self._block_enders
|
|
537
|
+
|
|
538
|
+
@property
|
|
539
|
+
def statement_terminators(self) -> set[str]:
|
|
540
|
+
if self._statement_terminators is None:
|
|
541
|
+
self._statement_terminators = {";"}
|
|
542
|
+
return self._statement_terminators
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
_pattern_cache: Optional[UnifiedCache[list[tuple[TokenType, CompiledTokenPattern]]]] = None
|
|
546
|
+
_result_cache: Optional[UnifiedCache[list[str]]] = None
|
|
547
|
+
_cache_lock = threading.Lock()
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
def _get_pattern_cache() -> UnifiedCache[list[tuple[TokenType, CompiledTokenPattern]]]:
|
|
551
|
+
"""Get or create the pattern compilation cache."""
|
|
552
|
+
global _pattern_cache
|
|
553
|
+
if _pattern_cache is None:
|
|
554
|
+
with _cache_lock:
|
|
555
|
+
if _pattern_cache is None:
|
|
556
|
+
_pattern_cache = UnifiedCache[list[tuple[TokenType, CompiledTokenPattern]]](
|
|
557
|
+
max_size=DEFAULT_PATTERN_CACHE_SIZE, ttl_seconds=DEFAULT_CACHE_TTL
|
|
558
|
+
)
|
|
559
|
+
return _pattern_cache
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
def _get_result_cache() -> UnifiedCache[list[str]]:
|
|
563
|
+
"""Get or create the result cache."""
|
|
564
|
+
global _result_cache
|
|
565
|
+
if _result_cache is None:
|
|
566
|
+
with _cache_lock:
|
|
567
|
+
if _result_cache is None:
|
|
568
|
+
_result_cache = UnifiedCache[list[str]](
|
|
569
|
+
max_size=DEFAULT_RESULT_CACHE_SIZE, ttl_seconds=DEFAULT_CACHE_TTL
|
|
570
|
+
)
|
|
571
|
+
return _result_cache
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
@mypyc_attr(allow_interpreted_subclasses=False)
|
|
575
|
+
class StatementSplitter:
|
|
576
|
+
"""SQL script splitter with caching and dialect support."""
|
|
577
|
+
|
|
578
|
+
__slots__ = SPLITTER_SLOTS
|
|
579
|
+
|
|
580
|
+
def __init__(self, dialect: DialectConfig, strip_trailing_semicolon: bool = False) -> None:
|
|
581
|
+
"""Initialize the splitter with caching and dialect support."""
|
|
582
|
+
self._dialect = dialect
|
|
583
|
+
self._strip_trailing_semicolon = strip_trailing_semicolon
|
|
584
|
+
self._token_patterns = dialect.get_all_token_patterns()
|
|
585
|
+
|
|
586
|
+
self._pattern_cache_key = f"{dialect.name}:{hash(tuple(str(p) for _, p in self._token_patterns))}"
|
|
587
|
+
|
|
588
|
+
self._pattern_cache = _get_pattern_cache()
|
|
589
|
+
self._result_cache = _get_result_cache()
|
|
590
|
+
|
|
591
|
+
self._compiled_patterns = self._get_or_compile_patterns()
|
|
592
|
+
|
|
593
|
+
def _get_or_compile_patterns(self) -> list[tuple[TokenType, CompiledTokenPattern]]:
|
|
594
|
+
"""Get compiled patterns from cache or compile and cache them."""
|
|
595
|
+
cache_key = CacheKey(("pattern", self._pattern_cache_key))
|
|
596
|
+
|
|
597
|
+
cached_patterns = self._pattern_cache.get(cache_key)
|
|
598
|
+
if cached_patterns is not None:
|
|
599
|
+
return cached_patterns
|
|
600
|
+
|
|
601
|
+
compiled: list[tuple[TokenType, CompiledTokenPattern]] = []
|
|
602
|
+
for token_type, pattern in self._token_patterns:
|
|
603
|
+
if isinstance(pattern, str):
|
|
604
|
+
compiled.append((token_type, re.compile(pattern, re.IGNORECASE | re.DOTALL)))
|
|
605
|
+
else:
|
|
606
|
+
compiled.append((token_type, pattern))
|
|
607
|
+
|
|
608
|
+
self._pattern_cache.put(cache_key, compiled)
|
|
609
|
+
return compiled
|
|
610
|
+
|
|
611
|
+
def _tokenize(self, sql: str) -> Generator[Token, None, None]:
|
|
612
|
+
"""Tokenize SQL string."""
|
|
613
|
+
pos = 0
|
|
614
|
+
line = 1
|
|
615
|
+
line_start = 0
|
|
616
|
+
|
|
617
|
+
while pos < len(sql):
|
|
618
|
+
matched = False
|
|
619
|
+
|
|
620
|
+
for token_type, pattern in self._compiled_patterns:
|
|
621
|
+
if callable(pattern):
|
|
622
|
+
column = pos - line_start + 1
|
|
623
|
+
token = pattern(sql, pos, line, column)
|
|
624
|
+
if token:
|
|
625
|
+
newlines = token.value.count("\n")
|
|
626
|
+
if newlines > 0:
|
|
627
|
+
line += newlines
|
|
628
|
+
last_newline = token.value.rfind("\n")
|
|
629
|
+
line_start = pos + last_newline + 1
|
|
630
|
+
|
|
631
|
+
yield token
|
|
632
|
+
pos += len(token.value)
|
|
633
|
+
matched = True
|
|
634
|
+
break
|
|
635
|
+
else:
|
|
636
|
+
match = pattern.match(sql, pos)
|
|
637
|
+
if match:
|
|
638
|
+
value = match.group(0)
|
|
639
|
+
column = pos - line_start + 1
|
|
640
|
+
|
|
641
|
+
newlines = value.count("\n")
|
|
642
|
+
if newlines > 0:
|
|
643
|
+
line += newlines
|
|
644
|
+
last_newline = value.rfind("\n")
|
|
645
|
+
line_start = pos + last_newline + 1
|
|
646
|
+
|
|
647
|
+
yield Token(type=token_type, value=value, line=line, column=column, position=pos)
|
|
648
|
+
pos = match.end()
|
|
649
|
+
matched = True
|
|
650
|
+
break
|
|
651
|
+
|
|
652
|
+
if not matched:
|
|
653
|
+
logger.error("Failed to tokenize at position %d: %s", pos, sql[pos : pos + 20])
|
|
654
|
+
pos += 1
|
|
655
|
+
|
|
656
|
+
def split(self, sql: str) -> list[str]:
|
|
657
|
+
"""Split SQL script with result caching."""
|
|
658
|
+
script_hash = hash(sql)
|
|
659
|
+
cache_key = CacheKey(("split", self._dialect.name, script_hash, self._strip_trailing_semicolon))
|
|
660
|
+
|
|
661
|
+
cached_result = self._result_cache.get(cache_key)
|
|
662
|
+
if cached_result is not None:
|
|
663
|
+
return cached_result
|
|
664
|
+
|
|
665
|
+
statements = self._do_split(sql)
|
|
666
|
+
|
|
667
|
+
self._result_cache.put(cache_key, statements)
|
|
668
|
+
return statements
|
|
669
|
+
|
|
670
|
+
def _do_split(self, sql: str) -> list[str]:
|
|
671
|
+
"""Perform SQL script splitting."""
|
|
672
|
+
statements = []
|
|
673
|
+
current_statement_tokens = []
|
|
674
|
+
current_statement_chars = []
|
|
675
|
+
block_stack = []
|
|
676
|
+
|
|
677
|
+
all_tokens = list(self._tokenize(sql))
|
|
678
|
+
|
|
679
|
+
for token_idx, token in enumerate(all_tokens):
|
|
680
|
+
current_statement_chars.append(token.value)
|
|
681
|
+
|
|
682
|
+
if token.type in {TokenType.WHITESPACE, TokenType.COMMENT_LINE, TokenType.COMMENT_BLOCK}:
|
|
683
|
+
current_statement_tokens.append(token)
|
|
684
|
+
continue
|
|
685
|
+
|
|
686
|
+
current_statement_tokens.append(token)
|
|
687
|
+
token_upper = token.value.upper()
|
|
688
|
+
|
|
689
|
+
if token.type == TokenType.KEYWORD:
|
|
690
|
+
if token_upper in self._dialect.block_starters:
|
|
691
|
+
block_stack.append(token_upper)
|
|
692
|
+
if len(block_stack) > self._dialect.max_nesting_depth:
|
|
693
|
+
msg = f"Maximum nesting depth ({self._dialect.max_nesting_depth}) exceeded"
|
|
694
|
+
raise ValueError(msg)
|
|
695
|
+
elif token_upper in self._dialect.block_enders:
|
|
696
|
+
if block_stack and self._dialect.is_real_block_ender(all_tokens, token_idx):
|
|
697
|
+
block_stack.pop()
|
|
698
|
+
|
|
699
|
+
is_terminator = False
|
|
700
|
+
if not block_stack:
|
|
701
|
+
if token.type == TokenType.TERMINATOR:
|
|
702
|
+
if token.value in self._dialect.statement_terminators:
|
|
703
|
+
should_delay = self._dialect.should_delay_semicolon_termination(all_tokens, token_idx)
|
|
704
|
+
|
|
705
|
+
if not should_delay and token.value == ";" and self._dialect.batch_separators:
|
|
706
|
+
should_delay = True
|
|
707
|
+
|
|
708
|
+
if not should_delay:
|
|
709
|
+
is_terminator = True
|
|
710
|
+
elif token.value in self._dialect.special_terminators:
|
|
711
|
+
handler = self._dialect.special_terminators[token.value]
|
|
712
|
+
if handler(all_tokens, token_idx):
|
|
713
|
+
is_terminator = True
|
|
714
|
+
|
|
715
|
+
elif token.type == TokenType.KEYWORD and token_upper in self._dialect.batch_separators:
|
|
716
|
+
is_terminator = True
|
|
717
|
+
|
|
718
|
+
if is_terminator:
|
|
719
|
+
statement = "".join(current_statement_chars).strip()
|
|
720
|
+
|
|
721
|
+
is_plsql_block = self._is_plsql_block(current_statement_tokens)
|
|
722
|
+
|
|
723
|
+
if (
|
|
724
|
+
self._strip_trailing_semicolon
|
|
725
|
+
and token.type == TokenType.TERMINATOR
|
|
726
|
+
and statement.endswith(token.value)
|
|
727
|
+
and not is_plsql_block
|
|
728
|
+
):
|
|
729
|
+
statement = statement[: -len(token.value)].rstrip()
|
|
730
|
+
|
|
731
|
+
if statement and self._contains_executable_content(statement):
|
|
732
|
+
statements.append(statement)
|
|
733
|
+
current_statement_tokens = []
|
|
734
|
+
current_statement_chars = []
|
|
735
|
+
|
|
736
|
+
if current_statement_chars:
|
|
737
|
+
statement = "".join(current_statement_chars).strip()
|
|
738
|
+
if statement and self._contains_executable_content(statement):
|
|
739
|
+
statements.append(statement)
|
|
740
|
+
|
|
741
|
+
return statements
|
|
742
|
+
|
|
743
|
+
@staticmethod
|
|
744
|
+
def _is_plsql_block(tokens: list[Token]) -> bool:
|
|
745
|
+
"""Check if the token list represents a PL/SQL block."""
|
|
746
|
+
for token in tokens:
|
|
747
|
+
if token.type == TokenType.KEYWORD:
|
|
748
|
+
return token.value.upper() in {"BEGIN", "DECLARE"}
|
|
749
|
+
return False
|
|
750
|
+
|
|
751
|
+
def _contains_executable_content(self, statement: str) -> bool:
|
|
752
|
+
"""Check if a statement contains actual executable content."""
|
|
753
|
+
tokens = list(self._tokenize(statement))
|
|
754
|
+
|
|
755
|
+
for token in tokens:
|
|
756
|
+
if token.type not in {TokenType.WHITESPACE, TokenType.COMMENT_LINE, TokenType.COMMENT_BLOCK}:
|
|
757
|
+
return True
|
|
758
|
+
|
|
759
|
+
return False
|
|
760
|
+
|
|
761
|
+
|
|
762
|
+
def split_sql_script(script: str, dialect: Optional[str] = None, strip_trailing_terminator: bool = False) -> list[str]:
|
|
763
|
+
"""Split SQL script into individual statements.
|
|
764
|
+
|
|
765
|
+
Args:
|
|
766
|
+
script: The SQL script to split
|
|
767
|
+
dialect: The SQL dialect name
|
|
768
|
+
strip_trailing_terminator: If True, remove trailing terminators from statements
|
|
769
|
+
|
|
770
|
+
Returns:
|
|
771
|
+
List of individual SQL statements
|
|
772
|
+
"""
|
|
773
|
+
if dialect is None:
|
|
774
|
+
dialect = "generic"
|
|
775
|
+
|
|
776
|
+
dialect_configs = {
|
|
777
|
+
"generic": GenericDialectConfig(),
|
|
778
|
+
"oracle": OracleDialectConfig(),
|
|
779
|
+
"tsql": TSQLDialectConfig(),
|
|
780
|
+
"mssql": TSQLDialectConfig(),
|
|
781
|
+
"sqlserver": TSQLDialectConfig(),
|
|
782
|
+
"postgresql": PostgreSQLDialectConfig(),
|
|
783
|
+
"postgres": PostgreSQLDialectConfig(),
|
|
784
|
+
"mysql": MySQLDialectConfig(),
|
|
785
|
+
"sqlite": SQLiteDialectConfig(),
|
|
786
|
+
"duckdb": DuckDBDialectConfig(),
|
|
787
|
+
"bigquery": BigQueryDialectConfig(),
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
config = dialect_configs.get(dialect.lower())
|
|
791
|
+
if not config:
|
|
792
|
+
logger.warning("Unknown dialect '%s', using generic SQL splitter", dialect)
|
|
793
|
+
config = GenericDialectConfig()
|
|
794
|
+
|
|
795
|
+
splitter = StatementSplitter(config, strip_trailing_semicolon=strip_trailing_terminator)
|
|
796
|
+
return splitter.split(script)
|
|
797
|
+
|
|
798
|
+
|
|
799
|
+
def clear_splitter_caches() -> None:
|
|
800
|
+
"""Clear all splitter caches for memory management."""
|
|
801
|
+
pattern_cache = _get_pattern_cache()
|
|
802
|
+
result_cache = _get_result_cache()
|
|
803
|
+
pattern_cache.clear()
|
|
804
|
+
result_cache.clear()
|
|
805
|
+
|
|
806
|
+
|
|
807
|
+
def get_splitter_cache_stats() -> dict[str, Any]:
|
|
808
|
+
"""Get statistics from splitter caches.
|
|
809
|
+
|
|
810
|
+
Returns:
|
|
811
|
+
Dictionary containing cache statistics
|
|
812
|
+
"""
|
|
813
|
+
pattern_cache = _get_pattern_cache()
|
|
814
|
+
result_cache = _get_result_cache()
|
|
815
|
+
|
|
816
|
+
return {
|
|
817
|
+
"pattern_cache": {"size": pattern_cache.size(), "stats": pattern_cache.get_stats()},
|
|
818
|
+
"result_cache": {"size": result_cache.size(), "stats": result_cache.get_stats()},
|
|
819
|
+
}
|