sqlspec 0.16.1__cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- 51ff5a9eadfdefd49f98__mypyc.cpython-310-aarch64-linux-gnu.so +0 -0
- sqlspec/__init__.py +92 -0
- sqlspec/__main__.py +12 -0
- sqlspec/__metadata__.py +14 -0
- sqlspec/_serialization.py +77 -0
- sqlspec/_sql.py +1780 -0
- sqlspec/_typing.py +680 -0
- sqlspec/adapters/__init__.py +0 -0
- sqlspec/adapters/adbc/__init__.py +5 -0
- sqlspec/adapters/adbc/_types.py +12 -0
- sqlspec/adapters/adbc/config.py +361 -0
- sqlspec/adapters/adbc/driver.py +512 -0
- sqlspec/adapters/aiosqlite/__init__.py +19 -0
- sqlspec/adapters/aiosqlite/_types.py +13 -0
- sqlspec/adapters/aiosqlite/config.py +253 -0
- sqlspec/adapters/aiosqlite/driver.py +248 -0
- sqlspec/adapters/asyncmy/__init__.py +19 -0
- sqlspec/adapters/asyncmy/_types.py +12 -0
- sqlspec/adapters/asyncmy/config.py +180 -0
- sqlspec/adapters/asyncmy/driver.py +274 -0
- sqlspec/adapters/asyncpg/__init__.py +21 -0
- sqlspec/adapters/asyncpg/_types.py +17 -0
- sqlspec/adapters/asyncpg/config.py +229 -0
- sqlspec/adapters/asyncpg/driver.py +344 -0
- sqlspec/adapters/bigquery/__init__.py +18 -0
- sqlspec/adapters/bigquery/_types.py +12 -0
- sqlspec/adapters/bigquery/config.py +298 -0
- sqlspec/adapters/bigquery/driver.py +558 -0
- sqlspec/adapters/duckdb/__init__.py +22 -0
- sqlspec/adapters/duckdb/_types.py +12 -0
- sqlspec/adapters/duckdb/config.py +504 -0
- sqlspec/adapters/duckdb/driver.py +368 -0
- sqlspec/adapters/oracledb/__init__.py +32 -0
- sqlspec/adapters/oracledb/_types.py +14 -0
- sqlspec/adapters/oracledb/config.py +317 -0
- sqlspec/adapters/oracledb/driver.py +538 -0
- sqlspec/adapters/psqlpy/__init__.py +16 -0
- sqlspec/adapters/psqlpy/_types.py +11 -0
- sqlspec/adapters/psqlpy/config.py +214 -0
- sqlspec/adapters/psqlpy/driver.py +530 -0
- sqlspec/adapters/psycopg/__init__.py +32 -0
- sqlspec/adapters/psycopg/_types.py +17 -0
- sqlspec/adapters/psycopg/config.py +426 -0
- sqlspec/adapters/psycopg/driver.py +796 -0
- sqlspec/adapters/sqlite/__init__.py +15 -0
- sqlspec/adapters/sqlite/_types.py +11 -0
- sqlspec/adapters/sqlite/config.py +240 -0
- sqlspec/adapters/sqlite/driver.py +294 -0
- sqlspec/base.py +571 -0
- sqlspec/builder/__init__.py +62 -0
- sqlspec/builder/_base.py +473 -0
- sqlspec/builder/_column.py +320 -0
- sqlspec/builder/_ddl.py +1346 -0
- sqlspec/builder/_ddl_utils.py +103 -0
- sqlspec/builder/_delete.py +76 -0
- sqlspec/builder/_insert.py +256 -0
- sqlspec/builder/_merge.py +71 -0
- sqlspec/builder/_parsing_utils.py +140 -0
- sqlspec/builder/_select.py +170 -0
- sqlspec/builder/_update.py +188 -0
- sqlspec/builder/mixins/__init__.py +55 -0
- sqlspec/builder/mixins/_cte_and_set_ops.py +222 -0
- sqlspec/builder/mixins/_delete_operations.py +41 -0
- sqlspec/builder/mixins/_insert_operations.py +244 -0
- sqlspec/builder/mixins/_join_operations.py +122 -0
- sqlspec/builder/mixins/_merge_operations.py +476 -0
- sqlspec/builder/mixins/_order_limit_operations.py +135 -0
- sqlspec/builder/mixins/_pivot_operations.py +153 -0
- sqlspec/builder/mixins/_select_operations.py +603 -0
- sqlspec/builder/mixins/_update_operations.py +187 -0
- sqlspec/builder/mixins/_where_clause.py +621 -0
- sqlspec/cli.py +247 -0
- sqlspec/config.py +395 -0
- sqlspec/core/__init__.py +63 -0
- sqlspec/core/cache.cpython-310-aarch64-linux-gnu.so +0 -0
- sqlspec/core/cache.py +871 -0
- sqlspec/core/compiler.cpython-310-aarch64-linux-gnu.so +0 -0
- sqlspec/core/compiler.py +417 -0
- sqlspec/core/filters.cpython-310-aarch64-linux-gnu.so +0 -0
- sqlspec/core/filters.py +830 -0
- sqlspec/core/hashing.cpython-310-aarch64-linux-gnu.so +0 -0
- sqlspec/core/hashing.py +310 -0
- sqlspec/core/parameters.cpython-310-aarch64-linux-gnu.so +0 -0
- sqlspec/core/parameters.py +1237 -0
- sqlspec/core/result.cpython-310-aarch64-linux-gnu.so +0 -0
- sqlspec/core/result.py +677 -0
- sqlspec/core/splitter.cpython-310-aarch64-linux-gnu.so +0 -0
- sqlspec/core/splitter.py +819 -0
- sqlspec/core/statement.cpython-310-aarch64-linux-gnu.so +0 -0
- sqlspec/core/statement.py +676 -0
- sqlspec/driver/__init__.py +19 -0
- sqlspec/driver/_async.py +502 -0
- sqlspec/driver/_common.py +631 -0
- sqlspec/driver/_sync.py +503 -0
- sqlspec/driver/mixins/__init__.py +6 -0
- sqlspec/driver/mixins/_result_tools.py +193 -0
- sqlspec/driver/mixins/_sql_translator.py +86 -0
- sqlspec/exceptions.py +193 -0
- sqlspec/extensions/__init__.py +0 -0
- sqlspec/extensions/aiosql/__init__.py +10 -0
- sqlspec/extensions/aiosql/adapter.py +461 -0
- sqlspec/extensions/litestar/__init__.py +6 -0
- sqlspec/extensions/litestar/_utils.py +52 -0
- sqlspec/extensions/litestar/cli.py +48 -0
- sqlspec/extensions/litestar/config.py +92 -0
- sqlspec/extensions/litestar/handlers.py +260 -0
- sqlspec/extensions/litestar/plugin.py +145 -0
- sqlspec/extensions/litestar/providers.py +454 -0
- sqlspec/loader.cpython-310-aarch64-linux-gnu.so +0 -0
- sqlspec/loader.py +760 -0
- sqlspec/migrations/__init__.py +35 -0
- sqlspec/migrations/base.py +414 -0
- sqlspec/migrations/commands.py +443 -0
- sqlspec/migrations/loaders.py +402 -0
- sqlspec/migrations/runner.py +213 -0
- sqlspec/migrations/tracker.py +140 -0
- sqlspec/migrations/utils.py +129 -0
- sqlspec/protocols.py +407 -0
- sqlspec/py.typed +0 -0
- sqlspec/storage/__init__.py +23 -0
- sqlspec/storage/backends/__init__.py +0 -0
- sqlspec/storage/backends/base.py +163 -0
- sqlspec/storage/backends/fsspec.py +386 -0
- sqlspec/storage/backends/obstore.py +459 -0
- sqlspec/storage/capabilities.py +102 -0
- sqlspec/storage/registry.py +239 -0
- sqlspec/typing.py +299 -0
- sqlspec/utils/__init__.py +3 -0
- sqlspec/utils/correlation.py +150 -0
- sqlspec/utils/deprecation.py +106 -0
- sqlspec/utils/fixtures.cpython-310-aarch64-linux-gnu.so +0 -0
- sqlspec/utils/fixtures.py +58 -0
- sqlspec/utils/logging.py +127 -0
- sqlspec/utils/module_loader.py +89 -0
- sqlspec/utils/serializers.py +4 -0
- sqlspec/utils/singleton.py +32 -0
- sqlspec/utils/sync_tools.cpython-310-aarch64-linux-gnu.so +0 -0
- sqlspec/utils/sync_tools.py +237 -0
- sqlspec/utils/text.cpython-310-aarch64-linux-gnu.so +0 -0
- sqlspec/utils/text.py +96 -0
- sqlspec/utils/type_guards.cpython-310-aarch64-linux-gnu.so +0 -0
- sqlspec/utils/type_guards.py +1139 -0
- sqlspec-0.16.1.dist-info/METADATA +365 -0
- sqlspec-0.16.1.dist-info/RECORD +148 -0
- sqlspec-0.16.1.dist-info/WHEEL +7 -0
- sqlspec-0.16.1.dist-info/entry_points.txt +2 -0
- sqlspec-0.16.1.dist-info/licenses/LICENSE +21 -0
- sqlspec-0.16.1.dist-info/licenses/NOTICE +29 -0
sqlspec/loader.py
ADDED
|
@@ -0,0 +1,760 @@
|
|
|
1
|
+
"""SQL file loader module for managing SQL statements from files.
|
|
2
|
+
|
|
3
|
+
This module provides functionality to load, cache, and manage SQL statements
|
|
4
|
+
from files using aiosql-style named queries.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import hashlib
|
|
8
|
+
import re
|
|
9
|
+
import time
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from datetime import datetime, timezone
|
|
12
|
+
from difflib import get_close_matches
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Optional, Union
|
|
15
|
+
|
|
16
|
+
from sqlspec.core.cache import CacheKey, get_cache_config, get_default_cache
|
|
17
|
+
from sqlspec.core.parameters import ParameterStyleConfig, ParameterValidator
|
|
18
|
+
from sqlspec.core.statement import SQL, StatementConfig
|
|
19
|
+
from sqlspec.exceptions import SQLFileNotFoundError, SQLFileParseError, StorageOperationFailedError
|
|
20
|
+
from sqlspec.storage import storage_registry
|
|
21
|
+
from sqlspec.storage.registry import StorageRegistry
|
|
22
|
+
from sqlspec.utils.correlation import CorrelationContext
|
|
23
|
+
from sqlspec.utils.logging import get_logger
|
|
24
|
+
|
|
25
|
+
__all__ = ("CachedSQLFile", "NamedStatement", "SQLFile", "SQLFileLoader")
|
|
26
|
+
|
|
27
|
+
logger = get_logger("loader")
|
|
28
|
+
|
|
29
|
+
# Matches: -- name: query_name (supports hyphens and special suffixes)
|
|
30
|
+
# We capture the name plus any trailing special characters
|
|
31
|
+
QUERY_NAME_PATTERN = re.compile(r"^\s*--\s*name\s*:\s*([\w-]+[^\w\s]*)\s*$", re.MULTILINE | re.IGNORECASE)
|
|
32
|
+
TRIM_SPECIAL_CHARS = re.compile(r"[^\w-]")
|
|
33
|
+
|
|
34
|
+
# Matches: -- dialect: dialect_name (optional dialect specification)
|
|
35
|
+
DIALECT_PATTERN = re.compile(r"^\s*--\s*dialect\s*:\s*(?P<dialect>[a-zA-Z0-9_]+)\s*$", re.IGNORECASE | re.MULTILINE)
|
|
36
|
+
|
|
37
|
+
# Supported SQL dialects (based on SQLGlot's available dialects)
|
|
38
|
+
SUPPORTED_DIALECTS = {
|
|
39
|
+
# Core databases
|
|
40
|
+
"sqlite",
|
|
41
|
+
"postgresql",
|
|
42
|
+
"postgres",
|
|
43
|
+
"mysql",
|
|
44
|
+
"oracle",
|
|
45
|
+
"mssql",
|
|
46
|
+
"tsql",
|
|
47
|
+
# Cloud platforms
|
|
48
|
+
"bigquery",
|
|
49
|
+
"snowflake",
|
|
50
|
+
"redshift",
|
|
51
|
+
"athena",
|
|
52
|
+
"fabric",
|
|
53
|
+
# Analytics engines
|
|
54
|
+
"clickhouse",
|
|
55
|
+
"duckdb",
|
|
56
|
+
"databricks",
|
|
57
|
+
"spark",
|
|
58
|
+
"spark2",
|
|
59
|
+
"trino",
|
|
60
|
+
"presto",
|
|
61
|
+
# Specialized
|
|
62
|
+
"hive",
|
|
63
|
+
"drill",
|
|
64
|
+
"druid",
|
|
65
|
+
"materialize",
|
|
66
|
+
"teradata",
|
|
67
|
+
"dremio",
|
|
68
|
+
"doris",
|
|
69
|
+
"risingwave",
|
|
70
|
+
"singlestore",
|
|
71
|
+
"starrocks",
|
|
72
|
+
"tableau",
|
|
73
|
+
"exasol",
|
|
74
|
+
"dune",
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
# Dialect aliases for common variants
|
|
78
|
+
DIALECT_ALIASES = {
|
|
79
|
+
"postgresql": "postgres",
|
|
80
|
+
"pg": "postgres",
|
|
81
|
+
"pgplsql": "postgres",
|
|
82
|
+
"plsql": "oracle",
|
|
83
|
+
"oracledb": "oracle",
|
|
84
|
+
"tsql": "mssql",
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
MIN_QUERY_PARTS = 3
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _normalize_query_name(name: str) -> str:
|
|
91
|
+
"""Normalize query name to be a valid Python identifier.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
name: Raw query name from SQL file
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Normalized query name suitable as Python identifier
|
|
98
|
+
"""
|
|
99
|
+
return TRIM_SPECIAL_CHARS.sub("", name).replace("-", "_")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _normalize_dialect(dialect: str) -> str:
|
|
103
|
+
"""Normalize dialect name with aliases.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
dialect: Raw dialect name from SQL file
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
Normalized dialect name
|
|
110
|
+
"""
|
|
111
|
+
normalized = dialect.lower().strip()
|
|
112
|
+
return DIALECT_ALIASES.get(normalized, normalized)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _normalize_dialect_for_sqlglot(dialect: str) -> str:
|
|
116
|
+
"""Normalize dialect name for SQLGlot compatibility.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
dialect: Dialect name from SQL file or parameter
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
SQLGlot-compatible dialect name
|
|
123
|
+
"""
|
|
124
|
+
normalized = dialect.lower().strip()
|
|
125
|
+
return DIALECT_ALIASES.get(normalized, normalized)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _get_dialect_suggestions(invalid_dialect: str) -> "list[str]":
|
|
129
|
+
"""Get dialect suggestions using fuzzy matching.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
invalid_dialect: Invalid dialect name that was provided
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
List of suggested dialect names (up to 3 suggestions)
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
return get_close_matches(invalid_dialect, SUPPORTED_DIALECTS, n=3, cutoff=0.6)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class NamedStatement:
|
|
142
|
+
"""Represents a parsed SQL statement with metadata.
|
|
143
|
+
|
|
144
|
+
Contains individual SQL statements extracted from files with their
|
|
145
|
+
normalized names, SQL content, optional dialect specifications,
|
|
146
|
+
and line position for error reporting.
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
__slots__ = ("dialect", "name", "sql", "start_line")
|
|
150
|
+
|
|
151
|
+
def __init__(self, name: str, sql: str, dialect: "Optional[str]" = None, start_line: int = 0) -> None:
|
|
152
|
+
self.name = name
|
|
153
|
+
self.sql = sql
|
|
154
|
+
self.dialect = dialect
|
|
155
|
+
self.start_line = start_line
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
@dataclass
|
|
159
|
+
class SQLFile:
|
|
160
|
+
"""Represents a loaded SQL file with metadata.
|
|
161
|
+
|
|
162
|
+
Contains SQL content and associated metadata including file location,
|
|
163
|
+
timestamps, and content hash.
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
content: str
|
|
167
|
+
"""The raw SQL content from the file."""
|
|
168
|
+
|
|
169
|
+
path: str
|
|
170
|
+
"""Path where the SQL file was loaded from."""
|
|
171
|
+
|
|
172
|
+
metadata: "dict[str, Any]" = field(default_factory=dict)
|
|
173
|
+
"""Optional metadata associated with the SQL file."""
|
|
174
|
+
|
|
175
|
+
checksum: str = field(init=False)
|
|
176
|
+
"""MD5 checksum of the SQL content for cache invalidation."""
|
|
177
|
+
|
|
178
|
+
loaded_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
179
|
+
"""Timestamp when the file was loaded."""
|
|
180
|
+
|
|
181
|
+
def __post_init__(self) -> None:
|
|
182
|
+
"""Calculate checksum after initialization."""
|
|
183
|
+
self.checksum = hashlib.md5(self.content.encode(), usedforsecurity=False).hexdigest()
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class CachedSQLFile:
|
|
187
|
+
"""Cached SQL file with parsed statements for efficient reloading.
|
|
188
|
+
|
|
189
|
+
Stored in the file cache to avoid re-parsing SQL files when their
|
|
190
|
+
content hasn't changed.
|
|
191
|
+
"""
|
|
192
|
+
|
|
193
|
+
__slots__ = ("parsed_statements", "sql_file", "statement_names")
|
|
194
|
+
|
|
195
|
+
def __init__(self, sql_file: SQLFile, parsed_statements: "dict[str, NamedStatement]") -> None:
|
|
196
|
+
"""Initialize cached SQL file.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
sql_file: The original SQLFile with content and metadata.
|
|
200
|
+
parsed_statements: Named statements from the file.
|
|
201
|
+
"""
|
|
202
|
+
self.sql_file = sql_file
|
|
203
|
+
self.parsed_statements = parsed_statements
|
|
204
|
+
self.statement_names = list(parsed_statements.keys())
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class SQLFileLoader:
|
|
208
|
+
"""Loads and parses SQL files with aiosql-style named queries.
|
|
209
|
+
|
|
210
|
+
Provides functionality to load SQL files containing named queries
|
|
211
|
+
(using -- name: syntax) and retrieve them by name.
|
|
212
|
+
"""
|
|
213
|
+
|
|
214
|
+
def __init__(self, *, encoding: str = "utf-8", storage_registry: StorageRegistry = storage_registry) -> None:
|
|
215
|
+
"""Initialize the SQL file loader.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
encoding: Text encoding for reading SQL files.
|
|
219
|
+
storage_registry: Storage registry for handling file URIs.
|
|
220
|
+
"""
|
|
221
|
+
self.encoding = encoding
|
|
222
|
+
self.storage_registry = storage_registry
|
|
223
|
+
self._queries: dict[str, NamedStatement] = {}
|
|
224
|
+
self._files: dict[str, SQLFile] = {}
|
|
225
|
+
self._query_to_file: dict[str, str] = {}
|
|
226
|
+
|
|
227
|
+
def _raise_file_not_found(self, path: str) -> None:
|
|
228
|
+
"""Raise SQLFileNotFoundError for nonexistent file.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
path: File path that was not found.
|
|
232
|
+
|
|
233
|
+
Raises:
|
|
234
|
+
SQLFileNotFoundError: Always raised.
|
|
235
|
+
"""
|
|
236
|
+
raise SQLFileNotFoundError(path)
|
|
237
|
+
|
|
238
|
+
def _generate_file_cache_key(self, path: Union[str, Path]) -> str:
|
|
239
|
+
"""Generate cache key for a file path.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
path: File path to generate key for.
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
Cache key string for the file.
|
|
246
|
+
"""
|
|
247
|
+
path_str = str(path)
|
|
248
|
+
path_hash = hashlib.md5(path_str.encode(), usedforsecurity=False).hexdigest()
|
|
249
|
+
return f"file:{path_hash[:16]}"
|
|
250
|
+
|
|
251
|
+
def _calculate_file_checksum(self, path: Union[str, Path]) -> str:
|
|
252
|
+
"""Calculate checksum for file content validation.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
path: File path to calculate checksum for.
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
MD5 checksum of file content.
|
|
259
|
+
|
|
260
|
+
Raises:
|
|
261
|
+
SQLFileParseError: If file cannot be read.
|
|
262
|
+
"""
|
|
263
|
+
try:
|
|
264
|
+
content = self._read_file_content(path)
|
|
265
|
+
return hashlib.md5(content.encode(), usedforsecurity=False).hexdigest()
|
|
266
|
+
except Exception as e:
|
|
267
|
+
raise SQLFileParseError(str(path), str(path), e) from e
|
|
268
|
+
|
|
269
|
+
def _is_file_unchanged(self, path: Union[str, Path], cached_file: CachedSQLFile) -> bool:
|
|
270
|
+
"""Check if file has changed since caching.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
path: File path to check.
|
|
274
|
+
cached_file: Cached file data.
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
True if file is unchanged, False otherwise.
|
|
278
|
+
"""
|
|
279
|
+
try:
|
|
280
|
+
current_checksum = self._calculate_file_checksum(path)
|
|
281
|
+
except Exception:
|
|
282
|
+
return False
|
|
283
|
+
else:
|
|
284
|
+
return current_checksum == cached_file.sql_file.checksum
|
|
285
|
+
|
|
286
|
+
def _read_file_content(self, path: Union[str, Path]) -> str:
|
|
287
|
+
"""Read file content using storage backend.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
path: File path (can be local path or URI).
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
File content as string.
|
|
294
|
+
|
|
295
|
+
Raises:
|
|
296
|
+
SQLFileNotFoundError: If file does not exist.
|
|
297
|
+
SQLFileParseError: If file cannot be read or parsed.
|
|
298
|
+
"""
|
|
299
|
+
|
|
300
|
+
path_str = str(path)
|
|
301
|
+
|
|
302
|
+
try:
|
|
303
|
+
backend = self.storage_registry.get(path)
|
|
304
|
+
return backend.read_text(path_str, encoding=self.encoding)
|
|
305
|
+
except KeyError as e:
|
|
306
|
+
raise SQLFileNotFoundError(path_str) from e
|
|
307
|
+
except StorageOperationFailedError as e:
|
|
308
|
+
if "not found" in str(e).lower() or "no such file" in str(e).lower():
|
|
309
|
+
raise SQLFileNotFoundError(path_str) from e
|
|
310
|
+
raise SQLFileParseError(path_str, path_str, e) from e
|
|
311
|
+
except Exception as e:
|
|
312
|
+
raise SQLFileParseError(path_str, path_str, e) from e
|
|
313
|
+
|
|
314
|
+
@staticmethod
|
|
315
|
+
def _strip_leading_comments(sql_text: str) -> str:
|
|
316
|
+
"""Remove leading comment lines from a SQL string."""
|
|
317
|
+
lines = sql_text.strip().split("\n")
|
|
318
|
+
first_sql_line_index = -1
|
|
319
|
+
for i, line in enumerate(lines):
|
|
320
|
+
if line.strip() and not line.strip().startswith("--"):
|
|
321
|
+
first_sql_line_index = i
|
|
322
|
+
break
|
|
323
|
+
if first_sql_line_index == -1:
|
|
324
|
+
return ""
|
|
325
|
+
return "\n".join(lines[first_sql_line_index:]).strip()
|
|
326
|
+
|
|
327
|
+
@staticmethod
|
|
328
|
+
def _parse_sql_content(content: str, file_path: str) -> "dict[str, NamedStatement]":
|
|
329
|
+
"""Parse SQL content and extract named statements with dialect specifications.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
content: Raw SQL file content to parse
|
|
333
|
+
file_path: File path for error reporting
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
Dictionary mapping normalized statement names to NamedStatement objects
|
|
337
|
+
|
|
338
|
+
Raises:
|
|
339
|
+
SQLFileParseError: If no named statements found, duplicate names exist,
|
|
340
|
+
or invalid dialect names are specified
|
|
341
|
+
"""
|
|
342
|
+
statements: dict[str, NamedStatement] = {}
|
|
343
|
+
content.splitlines()
|
|
344
|
+
|
|
345
|
+
name_matches = list(QUERY_NAME_PATTERN.finditer(content))
|
|
346
|
+
if not name_matches:
|
|
347
|
+
raise SQLFileParseError(
|
|
348
|
+
file_path, file_path, ValueError("No named SQL statements found (-- name: statement_name)")
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
for i, match in enumerate(name_matches):
|
|
352
|
+
raw_statement_name = match.group(1).strip()
|
|
353
|
+
statement_start_line = content[: match.start()].count("\n")
|
|
354
|
+
|
|
355
|
+
start_pos = match.end()
|
|
356
|
+
end_pos = name_matches[i + 1].start() if i + 1 < len(name_matches) else len(content)
|
|
357
|
+
|
|
358
|
+
statement_section = content[start_pos:end_pos].strip()
|
|
359
|
+
if not raw_statement_name or not statement_section:
|
|
360
|
+
continue
|
|
361
|
+
|
|
362
|
+
dialect = None
|
|
363
|
+
statement_sql = statement_section
|
|
364
|
+
|
|
365
|
+
section_lines = [line.strip() for line in statement_section.split("\n") if line.strip()]
|
|
366
|
+
if section_lines:
|
|
367
|
+
first_line = section_lines[0]
|
|
368
|
+
dialect_match = DIALECT_PATTERN.match(first_line)
|
|
369
|
+
if dialect_match:
|
|
370
|
+
declared_dialect = dialect_match.group("dialect").lower()
|
|
371
|
+
|
|
372
|
+
normalized_dialect = _normalize_dialect(declared_dialect)
|
|
373
|
+
|
|
374
|
+
if normalized_dialect not in SUPPORTED_DIALECTS:
|
|
375
|
+
suggestions = _get_dialect_suggestions(normalized_dialect)
|
|
376
|
+
warning_msg = f"Unknown dialect '{declared_dialect}' at line {statement_start_line + 1}"
|
|
377
|
+
if suggestions:
|
|
378
|
+
warning_msg += f". Did you mean: {', '.join(suggestions)}?"
|
|
379
|
+
warning_msg += (
|
|
380
|
+
f". Supported dialects: {', '.join(sorted(SUPPORTED_DIALECTS))}. Using dialect as-is."
|
|
381
|
+
)
|
|
382
|
+
logger.warning(warning_msg)
|
|
383
|
+
dialect = declared_dialect.lower()
|
|
384
|
+
else:
|
|
385
|
+
dialect = normalized_dialect
|
|
386
|
+
remaining_lines = section_lines[1:]
|
|
387
|
+
statement_sql = "\n".join(remaining_lines)
|
|
388
|
+
|
|
389
|
+
clean_sql = SQLFileLoader._strip_leading_comments(statement_sql)
|
|
390
|
+
if clean_sql:
|
|
391
|
+
normalized_name = _normalize_query_name(raw_statement_name)
|
|
392
|
+
if normalized_name in statements:
|
|
393
|
+
raise SQLFileParseError(
|
|
394
|
+
file_path, file_path, ValueError(f"Duplicate statement name: {raw_statement_name}")
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
statements[normalized_name] = NamedStatement(
|
|
398
|
+
name=normalized_name, sql=clean_sql, dialect=dialect, start_line=statement_start_line
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
if not statements:
|
|
402
|
+
raise SQLFileParseError(file_path, file_path, ValueError("No valid SQL statements found after parsing"))
|
|
403
|
+
|
|
404
|
+
return statements
|
|
405
|
+
|
|
406
|
+
def load_sql(self, *paths: Union[str, Path]) -> None:
|
|
407
|
+
"""Load SQL files and parse named queries.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
*paths: One or more file paths or directory paths to load.
|
|
411
|
+
"""
|
|
412
|
+
correlation_id = CorrelationContext.get()
|
|
413
|
+
start_time = time.perf_counter()
|
|
414
|
+
|
|
415
|
+
logger.info("Loading SQL files", extra={"file_count": len(paths), "correlation_id": correlation_id})
|
|
416
|
+
|
|
417
|
+
loaded_count = 0
|
|
418
|
+
query_count_before = len(self._queries)
|
|
419
|
+
|
|
420
|
+
try:
|
|
421
|
+
for path in paths:
|
|
422
|
+
path_str = str(path)
|
|
423
|
+
if "://" in path_str:
|
|
424
|
+
self._load_single_file(path, None)
|
|
425
|
+
loaded_count += 1
|
|
426
|
+
else:
|
|
427
|
+
path_obj = Path(path)
|
|
428
|
+
if path_obj.is_dir():
|
|
429
|
+
loaded_count += self._load_directory(path_obj)
|
|
430
|
+
elif path_obj.exists():
|
|
431
|
+
self._load_single_file(path_obj, None)
|
|
432
|
+
loaded_count += 1
|
|
433
|
+
elif path_obj.suffix:
|
|
434
|
+
self._raise_file_not_found(str(path))
|
|
435
|
+
|
|
436
|
+
duration = time.perf_counter() - start_time
|
|
437
|
+
new_queries = len(self._queries) - query_count_before
|
|
438
|
+
|
|
439
|
+
logger.info(
|
|
440
|
+
"Loaded %d SQL files with %d new queries in %.3fms",
|
|
441
|
+
loaded_count,
|
|
442
|
+
new_queries,
|
|
443
|
+
duration * 1000,
|
|
444
|
+
extra={
|
|
445
|
+
"files_loaded": loaded_count,
|
|
446
|
+
"new_queries": new_queries,
|
|
447
|
+
"duration_ms": duration * 1000,
|
|
448
|
+
"correlation_id": correlation_id,
|
|
449
|
+
},
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
except Exception as e:
|
|
453
|
+
duration = time.perf_counter() - start_time
|
|
454
|
+
logger.exception(
|
|
455
|
+
"Failed to load SQL files after %.3fms",
|
|
456
|
+
duration * 1000,
|
|
457
|
+
extra={
|
|
458
|
+
"error_type": type(e).__name__,
|
|
459
|
+
"duration_ms": duration * 1000,
|
|
460
|
+
"correlation_id": correlation_id,
|
|
461
|
+
},
|
|
462
|
+
)
|
|
463
|
+
raise
|
|
464
|
+
|
|
465
|
+
def _load_directory(self, dir_path: Path) -> int:
|
|
466
|
+
"""Load all SQL files from a directory with namespacing."""
|
|
467
|
+
sql_files = list(dir_path.rglob("*.sql"))
|
|
468
|
+
if not sql_files:
|
|
469
|
+
return 0
|
|
470
|
+
|
|
471
|
+
for file_path in sql_files:
|
|
472
|
+
relative_path = file_path.relative_to(dir_path)
|
|
473
|
+
namespace_parts = relative_path.parent.parts
|
|
474
|
+
namespace = ".".join(namespace_parts) if namespace_parts else None
|
|
475
|
+
self._load_single_file(file_path, namespace)
|
|
476
|
+
return len(sql_files)
|
|
477
|
+
|
|
478
|
+
def _load_single_file(self, file_path: Union[str, Path], namespace: Optional[str]) -> None:
|
|
479
|
+
"""Load a single SQL file with optional namespace and caching.
|
|
480
|
+
|
|
481
|
+
Args:
|
|
482
|
+
file_path: Path to the SQL file.
|
|
483
|
+
namespace: Optional namespace prefix for queries.
|
|
484
|
+
"""
|
|
485
|
+
path_str = str(file_path)
|
|
486
|
+
|
|
487
|
+
if path_str in self._files:
|
|
488
|
+
return
|
|
489
|
+
|
|
490
|
+
cache_config = get_cache_config()
|
|
491
|
+
if not cache_config.compiled_cache_enabled:
|
|
492
|
+
self._load_file_without_cache(file_path, namespace)
|
|
493
|
+
return
|
|
494
|
+
|
|
495
|
+
cache_key_str = self._generate_file_cache_key(file_path)
|
|
496
|
+
cache_key = CacheKey((cache_key_str,))
|
|
497
|
+
unified_cache = get_default_cache()
|
|
498
|
+
cached_file = unified_cache.get(cache_key)
|
|
499
|
+
|
|
500
|
+
if (
|
|
501
|
+
cached_file is not None
|
|
502
|
+
and isinstance(cached_file, CachedSQLFile)
|
|
503
|
+
and self._is_file_unchanged(file_path, cached_file)
|
|
504
|
+
):
|
|
505
|
+
self._files[path_str] = cached_file.sql_file
|
|
506
|
+
for name, statement in cached_file.parsed_statements.items():
|
|
507
|
+
namespaced_name = f"{namespace}.{name}" if namespace else name
|
|
508
|
+
if namespaced_name in self._queries:
|
|
509
|
+
existing_file = self._query_to_file.get(namespaced_name, "unknown")
|
|
510
|
+
if existing_file != path_str:
|
|
511
|
+
raise SQLFileParseError(
|
|
512
|
+
path_str,
|
|
513
|
+
path_str,
|
|
514
|
+
ValueError(f"Query name '{namespaced_name}' already exists in file: {existing_file}"),
|
|
515
|
+
)
|
|
516
|
+
self._queries[namespaced_name] = statement
|
|
517
|
+
self._query_to_file[namespaced_name] = path_str
|
|
518
|
+
return
|
|
519
|
+
|
|
520
|
+
self._load_file_without_cache(file_path, namespace)
|
|
521
|
+
|
|
522
|
+
if path_str in self._files:
|
|
523
|
+
sql_file = self._files[path_str]
|
|
524
|
+
file_statements: dict[str, NamedStatement] = {}
|
|
525
|
+
for query_name, query_path in self._query_to_file.items():
|
|
526
|
+
if query_path == path_str:
|
|
527
|
+
stored_name = query_name
|
|
528
|
+
if namespace and query_name.startswith(f"{namespace}."):
|
|
529
|
+
stored_name = query_name[len(namespace) + 1 :]
|
|
530
|
+
file_statements[stored_name] = self._queries[query_name]
|
|
531
|
+
|
|
532
|
+
cached_file_data = CachedSQLFile(sql_file=sql_file, parsed_statements=file_statements)
|
|
533
|
+
unified_cache.put(cache_key, cached_file_data)
|
|
534
|
+
|
|
535
|
+
def _load_file_without_cache(self, file_path: Union[str, Path], namespace: Optional[str]) -> None:
|
|
536
|
+
"""Load a single SQL file without caching.
|
|
537
|
+
|
|
538
|
+
Args:
|
|
539
|
+
file_path: Path to the SQL file.
|
|
540
|
+
namespace: Optional namespace prefix for queries.
|
|
541
|
+
"""
|
|
542
|
+
path_str = str(file_path)
|
|
543
|
+
|
|
544
|
+
content = self._read_file_content(file_path)
|
|
545
|
+
sql_file = SQLFile(content=content, path=path_str)
|
|
546
|
+
self._files[path_str] = sql_file
|
|
547
|
+
|
|
548
|
+
statements = self._parse_sql_content(content, path_str)
|
|
549
|
+
for name, statement in statements.items():
|
|
550
|
+
namespaced_name = f"{namespace}.{name}" if namespace else name
|
|
551
|
+
if namespaced_name in self._queries:
|
|
552
|
+
existing_file = self._query_to_file.get(namespaced_name, "unknown")
|
|
553
|
+
if existing_file != path_str:
|
|
554
|
+
raise SQLFileParseError(
|
|
555
|
+
path_str,
|
|
556
|
+
path_str,
|
|
557
|
+
ValueError(f"Query name '{namespaced_name}' already exists in file: {existing_file}"),
|
|
558
|
+
)
|
|
559
|
+
self._queries[namespaced_name] = statement
|
|
560
|
+
self._query_to_file[namespaced_name] = path_str
|
|
561
|
+
|
|
562
|
+
def add_named_sql(self, name: str, sql: str, dialect: "Optional[str]" = None) -> None:
|
|
563
|
+
"""Add a named SQL query directly without loading from a file.
|
|
564
|
+
|
|
565
|
+
Args:
|
|
566
|
+
name: Name for the SQL query.
|
|
567
|
+
sql: Raw SQL content.
|
|
568
|
+
dialect: Optional dialect for the SQL statement.
|
|
569
|
+
|
|
570
|
+
Raises:
|
|
571
|
+
ValueError: If query name already exists.
|
|
572
|
+
"""
|
|
573
|
+
if name in self._queries:
|
|
574
|
+
existing_source = self._query_to_file.get(name, "<directly added>")
|
|
575
|
+
msg = f"Query name '{name}' already exists (source: {existing_source})"
|
|
576
|
+
raise ValueError(msg)
|
|
577
|
+
|
|
578
|
+
if dialect is not None:
|
|
579
|
+
normalized_dialect = _normalize_dialect(dialect)
|
|
580
|
+
if normalized_dialect not in SUPPORTED_DIALECTS:
|
|
581
|
+
suggestions = _get_dialect_suggestions(normalized_dialect)
|
|
582
|
+
warning_msg = f"Unknown dialect '{dialect}'"
|
|
583
|
+
if suggestions:
|
|
584
|
+
warning_msg += f". Did you mean: {', '.join(suggestions)}?"
|
|
585
|
+
warning_msg += f". Supported dialects: {', '.join(sorted(SUPPORTED_DIALECTS))}. Using dialect as-is."
|
|
586
|
+
logger.warning(warning_msg)
|
|
587
|
+
dialect = dialect.lower()
|
|
588
|
+
else:
|
|
589
|
+
dialect = normalized_dialect
|
|
590
|
+
|
|
591
|
+
statement = NamedStatement(name=name, sql=sql.strip(), dialect=dialect, start_line=0)
|
|
592
|
+
self._queries[name] = statement
|
|
593
|
+
self._query_to_file[name] = "<directly added>"
|
|
594
|
+
|
|
595
|
+
def get_sql(
|
|
596
|
+
self, name: str, parameters: "Optional[Any]" = None, dialect: "Optional[str]" = None, **kwargs: "Any"
|
|
597
|
+
) -> "SQL":
|
|
598
|
+
"""Get a SQL object by statement name with dialect support.
|
|
599
|
+
|
|
600
|
+
Args:
|
|
601
|
+
name: Name of the statement (from -- name: in SQL file).
|
|
602
|
+
Hyphens in names are converted to underscores.
|
|
603
|
+
parameters: Parameters for the SQL statement.
|
|
604
|
+
dialect: Optional dialect override.
|
|
605
|
+
**kwargs: Additional parameters to pass to the SQL object.
|
|
606
|
+
|
|
607
|
+
Returns:
|
|
608
|
+
SQL object ready for execution.
|
|
609
|
+
|
|
610
|
+
Raises:
|
|
611
|
+
SQLFileNotFoundError: If statement name not found.
|
|
612
|
+
"""
|
|
613
|
+
correlation_id = CorrelationContext.get()
|
|
614
|
+
|
|
615
|
+
safe_name = _normalize_query_name(name)
|
|
616
|
+
|
|
617
|
+
if safe_name not in self._queries:
|
|
618
|
+
available = ", ".join(sorted(self._queries.keys())) if self._queries else "none"
|
|
619
|
+
logger.error(
|
|
620
|
+
"Statement not found: %s",
|
|
621
|
+
name,
|
|
622
|
+
extra={
|
|
623
|
+
"statement_name": name,
|
|
624
|
+
"safe_name": safe_name,
|
|
625
|
+
"available_statements": len(self._queries),
|
|
626
|
+
"correlation_id": correlation_id,
|
|
627
|
+
},
|
|
628
|
+
)
|
|
629
|
+
raise SQLFileNotFoundError(name, path=f"Statement '{name}' not found. Available statements: {available}")
|
|
630
|
+
|
|
631
|
+
parsed_statement = self._queries[safe_name]
|
|
632
|
+
|
|
633
|
+
effective_dialect = dialect or parsed_statement.dialect
|
|
634
|
+
|
|
635
|
+
if dialect is not None:
|
|
636
|
+
normalized_dialect = _normalize_dialect(dialect)
|
|
637
|
+
if normalized_dialect not in SUPPORTED_DIALECTS:
|
|
638
|
+
suggestions = _get_dialect_suggestions(normalized_dialect)
|
|
639
|
+
warning_msg = f"Unknown dialect '{dialect}'"
|
|
640
|
+
if suggestions:
|
|
641
|
+
warning_msg += f". Did you mean: {', '.join(suggestions)}?"
|
|
642
|
+
warning_msg += f". Supported dialects: {', '.join(sorted(SUPPORTED_DIALECTS))}. Using dialect as-is."
|
|
643
|
+
logger.warning(warning_msg)
|
|
644
|
+
effective_dialect = dialect.lower()
|
|
645
|
+
else:
|
|
646
|
+
effective_dialect = normalized_dialect
|
|
647
|
+
|
|
648
|
+
sql_kwargs = dict(kwargs)
|
|
649
|
+
if parameters is not None:
|
|
650
|
+
sql_kwargs["parameters"] = parameters
|
|
651
|
+
|
|
652
|
+
sqlglot_dialect = None
|
|
653
|
+
if effective_dialect:
|
|
654
|
+
sqlglot_dialect = _normalize_dialect_for_sqlglot(effective_dialect)
|
|
655
|
+
|
|
656
|
+
if not effective_dialect and "statement_config" not in sql_kwargs:
|
|
657
|
+
validator = ParameterValidator()
|
|
658
|
+
param_info = validator.extract_parameters(parsed_statement.sql)
|
|
659
|
+
if param_info:
|
|
660
|
+
styles = {p.style for p in param_info}
|
|
661
|
+
if styles:
|
|
662
|
+
detected_style = next(iter(styles))
|
|
663
|
+
sql_kwargs["statement_config"] = StatementConfig(
|
|
664
|
+
parameter_config=ParameterStyleConfig(
|
|
665
|
+
default_parameter_style=detected_style,
|
|
666
|
+
supported_parameter_styles=styles,
|
|
667
|
+
preserve_parameter_format=True,
|
|
668
|
+
)
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
return SQL(parsed_statement.sql, dialect=sqlglot_dialect, **sql_kwargs)
|
|
672
|
+
|
|
673
|
+
def get_file(self, path: Union[str, Path]) -> "Optional[SQLFile]":
|
|
674
|
+
"""Get a loaded SQLFile object by path.
|
|
675
|
+
|
|
676
|
+
Args:
|
|
677
|
+
path: Path of the file.
|
|
678
|
+
|
|
679
|
+
Returns:
|
|
680
|
+
SQLFile object if loaded, None otherwise.
|
|
681
|
+
"""
|
|
682
|
+
return self._files.get(str(path))
|
|
683
|
+
|
|
684
|
+
def get_file_for_query(self, name: str) -> "Optional[SQLFile]":
|
|
685
|
+
"""Get the SQLFile object containing a query.
|
|
686
|
+
|
|
687
|
+
Args:
|
|
688
|
+
name: Query name (hyphens are converted to underscores).
|
|
689
|
+
|
|
690
|
+
Returns:
|
|
691
|
+
SQLFile object if query exists, None otherwise.
|
|
692
|
+
"""
|
|
693
|
+
safe_name = _normalize_query_name(name)
|
|
694
|
+
if safe_name in self._query_to_file:
|
|
695
|
+
file_path = self._query_to_file[safe_name]
|
|
696
|
+
return self._files.get(file_path)
|
|
697
|
+
return None
|
|
698
|
+
|
|
699
|
+
def list_queries(self) -> "list[str]":
|
|
700
|
+
"""List all available query names.
|
|
701
|
+
|
|
702
|
+
Returns:
|
|
703
|
+
Sorted list of query names.
|
|
704
|
+
"""
|
|
705
|
+
return sorted(self._queries.keys())
|
|
706
|
+
|
|
707
|
+
def list_files(self) -> "list[str]":
|
|
708
|
+
"""List all loaded file paths.
|
|
709
|
+
|
|
710
|
+
Returns:
|
|
711
|
+
Sorted list of file paths.
|
|
712
|
+
"""
|
|
713
|
+
return sorted(self._files.keys())
|
|
714
|
+
|
|
715
|
+
def has_query(self, name: str) -> bool:
|
|
716
|
+
"""Check if a query exists.
|
|
717
|
+
|
|
718
|
+
Args:
|
|
719
|
+
name: Query name to check.
|
|
720
|
+
|
|
721
|
+
Returns:
|
|
722
|
+
True if query exists.
|
|
723
|
+
"""
|
|
724
|
+
safe_name = _normalize_query_name(name)
|
|
725
|
+
return safe_name in self._queries
|
|
726
|
+
|
|
727
|
+
def clear_cache(self) -> None:
|
|
728
|
+
"""Clear all cached files and queries."""
|
|
729
|
+
self._files.clear()
|
|
730
|
+
self._queries.clear()
|
|
731
|
+
self._query_to_file.clear()
|
|
732
|
+
|
|
733
|
+
cache_config = get_cache_config()
|
|
734
|
+
if cache_config.compiled_cache_enabled:
|
|
735
|
+
unified_cache = get_default_cache()
|
|
736
|
+
unified_cache.clear()
|
|
737
|
+
|
|
738
|
+
def clear_file_cache(self) -> None:
|
|
739
|
+
"""Clear the file cache only, keeping loaded queries."""
|
|
740
|
+
cache_config = get_cache_config()
|
|
741
|
+
if cache_config.compiled_cache_enabled:
|
|
742
|
+
unified_cache = get_default_cache()
|
|
743
|
+
unified_cache.clear()
|
|
744
|
+
|
|
745
|
+
def get_query_text(self, name: str) -> str:
|
|
746
|
+
"""Get raw SQL text for a query.
|
|
747
|
+
|
|
748
|
+
Args:
|
|
749
|
+
name: Query name.
|
|
750
|
+
|
|
751
|
+
Returns:
|
|
752
|
+
Raw SQL text.
|
|
753
|
+
|
|
754
|
+
Raises:
|
|
755
|
+
SQLFileNotFoundError: If query not found.
|
|
756
|
+
"""
|
|
757
|
+
safe_name = _normalize_query_name(name)
|
|
758
|
+
if safe_name not in self._queries:
|
|
759
|
+
raise SQLFileNotFoundError(name)
|
|
760
|
+
return self._queries[safe_name].sql
|