sqlspec 0.16.2__cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

Files changed (148) hide show
  1. 51ff5a9eadfdefd49f98__mypyc.cpython-39-aarch64-linux-gnu.so +0 -0
  2. sqlspec/__init__.py +92 -0
  3. sqlspec/__main__.py +12 -0
  4. sqlspec/__metadata__.py +14 -0
  5. sqlspec/_serialization.py +77 -0
  6. sqlspec/_sql.py +1782 -0
  7. sqlspec/_typing.py +680 -0
  8. sqlspec/adapters/__init__.py +0 -0
  9. sqlspec/adapters/adbc/__init__.py +5 -0
  10. sqlspec/adapters/adbc/_types.py +12 -0
  11. sqlspec/adapters/adbc/config.py +361 -0
  12. sqlspec/adapters/adbc/driver.py +512 -0
  13. sqlspec/adapters/aiosqlite/__init__.py +19 -0
  14. sqlspec/adapters/aiosqlite/_types.py +13 -0
  15. sqlspec/adapters/aiosqlite/config.py +253 -0
  16. sqlspec/adapters/aiosqlite/driver.py +248 -0
  17. sqlspec/adapters/asyncmy/__init__.py +19 -0
  18. sqlspec/adapters/asyncmy/_types.py +12 -0
  19. sqlspec/adapters/asyncmy/config.py +180 -0
  20. sqlspec/adapters/asyncmy/driver.py +274 -0
  21. sqlspec/adapters/asyncpg/__init__.py +21 -0
  22. sqlspec/adapters/asyncpg/_types.py +17 -0
  23. sqlspec/adapters/asyncpg/config.py +229 -0
  24. sqlspec/adapters/asyncpg/driver.py +344 -0
  25. sqlspec/adapters/bigquery/__init__.py +18 -0
  26. sqlspec/adapters/bigquery/_types.py +12 -0
  27. sqlspec/adapters/bigquery/config.py +298 -0
  28. sqlspec/adapters/bigquery/driver.py +558 -0
  29. sqlspec/adapters/duckdb/__init__.py +22 -0
  30. sqlspec/adapters/duckdb/_types.py +12 -0
  31. sqlspec/adapters/duckdb/config.py +504 -0
  32. sqlspec/adapters/duckdb/driver.py +368 -0
  33. sqlspec/adapters/oracledb/__init__.py +32 -0
  34. sqlspec/adapters/oracledb/_types.py +14 -0
  35. sqlspec/adapters/oracledb/config.py +317 -0
  36. sqlspec/adapters/oracledb/driver.py +538 -0
  37. sqlspec/adapters/psqlpy/__init__.py +16 -0
  38. sqlspec/adapters/psqlpy/_types.py +11 -0
  39. sqlspec/adapters/psqlpy/config.py +214 -0
  40. sqlspec/adapters/psqlpy/driver.py +530 -0
  41. sqlspec/adapters/psycopg/__init__.py +32 -0
  42. sqlspec/adapters/psycopg/_types.py +17 -0
  43. sqlspec/adapters/psycopg/config.py +426 -0
  44. sqlspec/adapters/psycopg/driver.py +796 -0
  45. sqlspec/adapters/sqlite/__init__.py +15 -0
  46. sqlspec/adapters/sqlite/_types.py +11 -0
  47. sqlspec/adapters/sqlite/config.py +240 -0
  48. sqlspec/adapters/sqlite/driver.py +294 -0
  49. sqlspec/base.py +571 -0
  50. sqlspec/builder/__init__.py +62 -0
  51. sqlspec/builder/_base.py +473 -0
  52. sqlspec/builder/_column.py +320 -0
  53. sqlspec/builder/_ddl.py +1346 -0
  54. sqlspec/builder/_ddl_utils.py +103 -0
  55. sqlspec/builder/_delete.py +76 -0
  56. sqlspec/builder/_insert.py +421 -0
  57. sqlspec/builder/_merge.py +71 -0
  58. sqlspec/builder/_parsing_utils.py +164 -0
  59. sqlspec/builder/_select.py +170 -0
  60. sqlspec/builder/_update.py +188 -0
  61. sqlspec/builder/mixins/__init__.py +55 -0
  62. sqlspec/builder/mixins/_cte_and_set_ops.py +222 -0
  63. sqlspec/builder/mixins/_delete_operations.py +41 -0
  64. sqlspec/builder/mixins/_insert_operations.py +244 -0
  65. sqlspec/builder/mixins/_join_operations.py +149 -0
  66. sqlspec/builder/mixins/_merge_operations.py +562 -0
  67. sqlspec/builder/mixins/_order_limit_operations.py +135 -0
  68. sqlspec/builder/mixins/_pivot_operations.py +153 -0
  69. sqlspec/builder/mixins/_select_operations.py +604 -0
  70. sqlspec/builder/mixins/_update_operations.py +202 -0
  71. sqlspec/builder/mixins/_where_clause.py +644 -0
  72. sqlspec/cli.py +247 -0
  73. sqlspec/config.py +395 -0
  74. sqlspec/core/__init__.py +63 -0
  75. sqlspec/core/cache.cpython-39-aarch64-linux-gnu.so +0 -0
  76. sqlspec/core/cache.py +871 -0
  77. sqlspec/core/compiler.cpython-39-aarch64-linux-gnu.so +0 -0
  78. sqlspec/core/compiler.py +417 -0
  79. sqlspec/core/filters.cpython-39-aarch64-linux-gnu.so +0 -0
  80. sqlspec/core/filters.py +830 -0
  81. sqlspec/core/hashing.cpython-39-aarch64-linux-gnu.so +0 -0
  82. sqlspec/core/hashing.py +310 -0
  83. sqlspec/core/parameters.cpython-39-aarch64-linux-gnu.so +0 -0
  84. sqlspec/core/parameters.py +1237 -0
  85. sqlspec/core/result.cpython-39-aarch64-linux-gnu.so +0 -0
  86. sqlspec/core/result.py +677 -0
  87. sqlspec/core/splitter.cpython-39-aarch64-linux-gnu.so +0 -0
  88. sqlspec/core/splitter.py +819 -0
  89. sqlspec/core/statement.cpython-39-aarch64-linux-gnu.so +0 -0
  90. sqlspec/core/statement.py +676 -0
  91. sqlspec/driver/__init__.py +19 -0
  92. sqlspec/driver/_async.py +502 -0
  93. sqlspec/driver/_common.py +631 -0
  94. sqlspec/driver/_sync.py +503 -0
  95. sqlspec/driver/mixins/__init__.py +6 -0
  96. sqlspec/driver/mixins/_result_tools.py +193 -0
  97. sqlspec/driver/mixins/_sql_translator.py +86 -0
  98. sqlspec/exceptions.py +193 -0
  99. sqlspec/extensions/__init__.py +0 -0
  100. sqlspec/extensions/aiosql/__init__.py +10 -0
  101. sqlspec/extensions/aiosql/adapter.py +461 -0
  102. sqlspec/extensions/litestar/__init__.py +6 -0
  103. sqlspec/extensions/litestar/_utils.py +52 -0
  104. sqlspec/extensions/litestar/cli.py +48 -0
  105. sqlspec/extensions/litestar/config.py +92 -0
  106. sqlspec/extensions/litestar/handlers.py +260 -0
  107. sqlspec/extensions/litestar/plugin.py +145 -0
  108. sqlspec/extensions/litestar/providers.py +454 -0
  109. sqlspec/loader.cpython-39-aarch64-linux-gnu.so +0 -0
  110. sqlspec/loader.py +760 -0
  111. sqlspec/migrations/__init__.py +35 -0
  112. sqlspec/migrations/base.py +414 -0
  113. sqlspec/migrations/commands.py +443 -0
  114. sqlspec/migrations/loaders.py +402 -0
  115. sqlspec/migrations/runner.py +213 -0
  116. sqlspec/migrations/tracker.py +140 -0
  117. sqlspec/migrations/utils.py +129 -0
  118. sqlspec/protocols.py +407 -0
  119. sqlspec/py.typed +0 -0
  120. sqlspec/storage/__init__.py +23 -0
  121. sqlspec/storage/backends/__init__.py +0 -0
  122. sqlspec/storage/backends/base.py +163 -0
  123. sqlspec/storage/backends/fsspec.py +386 -0
  124. sqlspec/storage/backends/obstore.py +459 -0
  125. sqlspec/storage/capabilities.py +102 -0
  126. sqlspec/storage/registry.py +239 -0
  127. sqlspec/typing.py +299 -0
  128. sqlspec/utils/__init__.py +3 -0
  129. sqlspec/utils/correlation.py +150 -0
  130. sqlspec/utils/deprecation.py +106 -0
  131. sqlspec/utils/fixtures.cpython-39-aarch64-linux-gnu.so +0 -0
  132. sqlspec/utils/fixtures.py +58 -0
  133. sqlspec/utils/logging.py +127 -0
  134. sqlspec/utils/module_loader.py +89 -0
  135. sqlspec/utils/serializers.py +4 -0
  136. sqlspec/utils/singleton.py +32 -0
  137. sqlspec/utils/sync_tools.cpython-39-aarch64-linux-gnu.so +0 -0
  138. sqlspec/utils/sync_tools.py +237 -0
  139. sqlspec/utils/text.cpython-39-aarch64-linux-gnu.so +0 -0
  140. sqlspec/utils/text.py +96 -0
  141. sqlspec/utils/type_guards.cpython-39-aarch64-linux-gnu.so +0 -0
  142. sqlspec/utils/type_guards.py +1139 -0
  143. sqlspec-0.16.2.dist-info/METADATA +365 -0
  144. sqlspec-0.16.2.dist-info/RECORD +148 -0
  145. sqlspec-0.16.2.dist-info/WHEEL +7 -0
  146. sqlspec-0.16.2.dist-info/entry_points.txt +2 -0
  147. sqlspec-0.16.2.dist-info/licenses/LICENSE +21 -0
  148. sqlspec-0.16.2.dist-info/licenses/NOTICE +29 -0
sqlspec/loader.py ADDED
@@ -0,0 +1,760 @@
1
+ """SQL file loader module for managing SQL statements from files.
2
+
3
+ This module provides functionality to load, cache, and manage SQL statements
4
+ from files using aiosql-style named queries.
5
+ """
6
+
7
+ import hashlib
8
+ import re
9
+ import time
10
+ from dataclasses import dataclass, field
11
+ from datetime import datetime, timezone
12
+ from difflib import get_close_matches
13
+ from pathlib import Path
14
+ from typing import Any, Optional, Union
15
+
16
+ from sqlspec.core.cache import CacheKey, get_cache_config, get_default_cache
17
+ from sqlspec.core.parameters import ParameterStyleConfig, ParameterValidator
18
+ from sqlspec.core.statement import SQL, StatementConfig
19
+ from sqlspec.exceptions import SQLFileNotFoundError, SQLFileParseError, StorageOperationFailedError
20
+ from sqlspec.storage import storage_registry
21
+ from sqlspec.storage.registry import StorageRegistry
22
+ from sqlspec.utils.correlation import CorrelationContext
23
+ from sqlspec.utils.logging import get_logger
24
+
25
+ __all__ = ("CachedSQLFile", "NamedStatement", "SQLFile", "SQLFileLoader")
26
+
27
+ logger = get_logger("loader")
28
+
29
+ # Matches: -- name: query_name (supports hyphens and special suffixes)
30
+ # We capture the name plus any trailing special characters
31
+ QUERY_NAME_PATTERN = re.compile(r"^\s*--\s*name\s*:\s*([\w-]+[^\w\s]*)\s*$", re.MULTILINE | re.IGNORECASE)
32
+ TRIM_SPECIAL_CHARS = re.compile(r"[^\w-]")
33
+
34
+ # Matches: -- dialect: dialect_name (optional dialect specification)
35
+ DIALECT_PATTERN = re.compile(r"^\s*--\s*dialect\s*:\s*(?P<dialect>[a-zA-Z0-9_]+)\s*$", re.IGNORECASE | re.MULTILINE)
36
+
37
+ # Supported SQL dialects (based on SQLGlot's available dialects)
38
+ SUPPORTED_DIALECTS = {
39
+ # Core databases
40
+ "sqlite",
41
+ "postgresql",
42
+ "postgres",
43
+ "mysql",
44
+ "oracle",
45
+ "mssql",
46
+ "tsql",
47
+ # Cloud platforms
48
+ "bigquery",
49
+ "snowflake",
50
+ "redshift",
51
+ "athena",
52
+ "fabric",
53
+ # Analytics engines
54
+ "clickhouse",
55
+ "duckdb",
56
+ "databricks",
57
+ "spark",
58
+ "spark2",
59
+ "trino",
60
+ "presto",
61
+ # Specialized
62
+ "hive",
63
+ "drill",
64
+ "druid",
65
+ "materialize",
66
+ "teradata",
67
+ "dremio",
68
+ "doris",
69
+ "risingwave",
70
+ "singlestore",
71
+ "starrocks",
72
+ "tableau",
73
+ "exasol",
74
+ "dune",
75
+ }
76
+
77
+ # Dialect aliases for common variants
78
+ DIALECT_ALIASES = {
79
+ "postgresql": "postgres",
80
+ "pg": "postgres",
81
+ "pgplsql": "postgres",
82
+ "plsql": "oracle",
83
+ "oracledb": "oracle",
84
+ "tsql": "mssql",
85
+ }
86
+
87
+ MIN_QUERY_PARTS = 3
88
+
89
+
90
+ def _normalize_query_name(name: str) -> str:
91
+ """Normalize query name to be a valid Python identifier.
92
+
93
+ Args:
94
+ name: Raw query name from SQL file
95
+
96
+ Returns:
97
+ Normalized query name suitable as Python identifier
98
+ """
99
+ return TRIM_SPECIAL_CHARS.sub("", name).replace("-", "_")
100
+
101
+
102
+ def _normalize_dialect(dialect: str) -> str:
103
+ """Normalize dialect name with aliases.
104
+
105
+ Args:
106
+ dialect: Raw dialect name from SQL file
107
+
108
+ Returns:
109
+ Normalized dialect name
110
+ """
111
+ normalized = dialect.lower().strip()
112
+ return DIALECT_ALIASES.get(normalized, normalized)
113
+
114
+
115
+ def _normalize_dialect_for_sqlglot(dialect: str) -> str:
116
+ """Normalize dialect name for SQLGlot compatibility.
117
+
118
+ Args:
119
+ dialect: Dialect name from SQL file or parameter
120
+
121
+ Returns:
122
+ SQLGlot-compatible dialect name
123
+ """
124
+ normalized = dialect.lower().strip()
125
+ return DIALECT_ALIASES.get(normalized, normalized)
126
+
127
+
128
+ def _get_dialect_suggestions(invalid_dialect: str) -> "list[str]":
129
+ """Get dialect suggestions using fuzzy matching.
130
+
131
+ Args:
132
+ invalid_dialect: Invalid dialect name that was provided
133
+
134
+ Returns:
135
+ List of suggested dialect names (up to 3 suggestions)
136
+ """
137
+
138
+ return get_close_matches(invalid_dialect, SUPPORTED_DIALECTS, n=3, cutoff=0.6)
139
+
140
+
141
+ class NamedStatement:
142
+ """Represents a parsed SQL statement with metadata.
143
+
144
+ Contains individual SQL statements extracted from files with their
145
+ normalized names, SQL content, optional dialect specifications,
146
+ and line position for error reporting.
147
+ """
148
+
149
+ __slots__ = ("dialect", "name", "sql", "start_line")
150
+
151
+ def __init__(self, name: str, sql: str, dialect: "Optional[str]" = None, start_line: int = 0) -> None:
152
+ self.name = name
153
+ self.sql = sql
154
+ self.dialect = dialect
155
+ self.start_line = start_line
156
+
157
+
158
+ @dataclass
159
+ class SQLFile:
160
+ """Represents a loaded SQL file with metadata.
161
+
162
+ Contains SQL content and associated metadata including file location,
163
+ timestamps, and content hash.
164
+ """
165
+
166
+ content: str
167
+ """The raw SQL content from the file."""
168
+
169
+ path: str
170
+ """Path where the SQL file was loaded from."""
171
+
172
+ metadata: "dict[str, Any]" = field(default_factory=dict)
173
+ """Optional metadata associated with the SQL file."""
174
+
175
+ checksum: str = field(init=False)
176
+ """MD5 checksum of the SQL content for cache invalidation."""
177
+
178
+ loaded_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
179
+ """Timestamp when the file was loaded."""
180
+
181
+ def __post_init__(self) -> None:
182
+ """Calculate checksum after initialization."""
183
+ self.checksum = hashlib.md5(self.content.encode(), usedforsecurity=False).hexdigest()
184
+
185
+
186
+ class CachedSQLFile:
187
+ """Cached SQL file with parsed statements for efficient reloading.
188
+
189
+ Stored in the file cache to avoid re-parsing SQL files when their
190
+ content hasn't changed.
191
+ """
192
+
193
+ __slots__ = ("parsed_statements", "sql_file", "statement_names")
194
+
195
+ def __init__(self, sql_file: SQLFile, parsed_statements: "dict[str, NamedStatement]") -> None:
196
+ """Initialize cached SQL file.
197
+
198
+ Args:
199
+ sql_file: The original SQLFile with content and metadata.
200
+ parsed_statements: Named statements from the file.
201
+ """
202
+ self.sql_file = sql_file
203
+ self.parsed_statements = parsed_statements
204
+ self.statement_names = list(parsed_statements.keys())
205
+
206
+
207
+ class SQLFileLoader:
208
+ """Loads and parses SQL files with aiosql-style named queries.
209
+
210
+ Provides functionality to load SQL files containing named queries
211
+ (using -- name: syntax) and retrieve them by name.
212
+ """
213
+
214
+ def __init__(self, *, encoding: str = "utf-8", storage_registry: StorageRegistry = storage_registry) -> None:
215
+ """Initialize the SQL file loader.
216
+
217
+ Args:
218
+ encoding: Text encoding for reading SQL files.
219
+ storage_registry: Storage registry for handling file URIs.
220
+ """
221
+ self.encoding = encoding
222
+ self.storage_registry = storage_registry
223
+ self._queries: dict[str, NamedStatement] = {}
224
+ self._files: dict[str, SQLFile] = {}
225
+ self._query_to_file: dict[str, str] = {}
226
+
227
+ def _raise_file_not_found(self, path: str) -> None:
228
+ """Raise SQLFileNotFoundError for nonexistent file.
229
+
230
+ Args:
231
+ path: File path that was not found.
232
+
233
+ Raises:
234
+ SQLFileNotFoundError: Always raised.
235
+ """
236
+ raise SQLFileNotFoundError(path)
237
+
238
+ def _generate_file_cache_key(self, path: Union[str, Path]) -> str:
239
+ """Generate cache key for a file path.
240
+
241
+ Args:
242
+ path: File path to generate key for.
243
+
244
+ Returns:
245
+ Cache key string for the file.
246
+ """
247
+ path_str = str(path)
248
+ path_hash = hashlib.md5(path_str.encode(), usedforsecurity=False).hexdigest()
249
+ return f"file:{path_hash[:16]}"
250
+
251
+ def _calculate_file_checksum(self, path: Union[str, Path]) -> str:
252
+ """Calculate checksum for file content validation.
253
+
254
+ Args:
255
+ path: File path to calculate checksum for.
256
+
257
+ Returns:
258
+ MD5 checksum of file content.
259
+
260
+ Raises:
261
+ SQLFileParseError: If file cannot be read.
262
+ """
263
+ try:
264
+ content = self._read_file_content(path)
265
+ return hashlib.md5(content.encode(), usedforsecurity=False).hexdigest()
266
+ except Exception as e:
267
+ raise SQLFileParseError(str(path), str(path), e) from e
268
+
269
+ def _is_file_unchanged(self, path: Union[str, Path], cached_file: CachedSQLFile) -> bool:
270
+ """Check if file has changed since caching.
271
+
272
+ Args:
273
+ path: File path to check.
274
+ cached_file: Cached file data.
275
+
276
+ Returns:
277
+ True if file is unchanged, False otherwise.
278
+ """
279
+ try:
280
+ current_checksum = self._calculate_file_checksum(path)
281
+ except Exception:
282
+ return False
283
+ else:
284
+ return current_checksum == cached_file.sql_file.checksum
285
+
286
+ def _read_file_content(self, path: Union[str, Path]) -> str:
287
+ """Read file content using storage backend.
288
+
289
+ Args:
290
+ path: File path (can be local path or URI).
291
+
292
+ Returns:
293
+ File content as string.
294
+
295
+ Raises:
296
+ SQLFileNotFoundError: If file does not exist.
297
+ SQLFileParseError: If file cannot be read or parsed.
298
+ """
299
+
300
+ path_str = str(path)
301
+
302
+ try:
303
+ backend = self.storage_registry.get(path)
304
+ return backend.read_text(path_str, encoding=self.encoding)
305
+ except KeyError as e:
306
+ raise SQLFileNotFoundError(path_str) from e
307
+ except StorageOperationFailedError as e:
308
+ if "not found" in str(e).lower() or "no such file" in str(e).lower():
309
+ raise SQLFileNotFoundError(path_str) from e
310
+ raise SQLFileParseError(path_str, path_str, e) from e
311
+ except Exception as e:
312
+ raise SQLFileParseError(path_str, path_str, e) from e
313
+
314
+ @staticmethod
315
+ def _strip_leading_comments(sql_text: str) -> str:
316
+ """Remove leading comment lines from a SQL string."""
317
+ lines = sql_text.strip().split("\n")
318
+ first_sql_line_index = -1
319
+ for i, line in enumerate(lines):
320
+ if line.strip() and not line.strip().startswith("--"):
321
+ first_sql_line_index = i
322
+ break
323
+ if first_sql_line_index == -1:
324
+ return ""
325
+ return "\n".join(lines[first_sql_line_index:]).strip()
326
+
327
+ @staticmethod
328
+ def _parse_sql_content(content: str, file_path: str) -> "dict[str, NamedStatement]":
329
+ """Parse SQL content and extract named statements with dialect specifications.
330
+
331
+ Args:
332
+ content: Raw SQL file content to parse
333
+ file_path: File path for error reporting
334
+
335
+ Returns:
336
+ Dictionary mapping normalized statement names to NamedStatement objects
337
+
338
+ Raises:
339
+ SQLFileParseError: If no named statements found, duplicate names exist,
340
+ or invalid dialect names are specified
341
+ """
342
+ statements: dict[str, NamedStatement] = {}
343
+ content.splitlines()
344
+
345
+ name_matches = list(QUERY_NAME_PATTERN.finditer(content))
346
+ if not name_matches:
347
+ raise SQLFileParseError(
348
+ file_path, file_path, ValueError("No named SQL statements found (-- name: statement_name)")
349
+ )
350
+
351
+ for i, match in enumerate(name_matches):
352
+ raw_statement_name = match.group(1).strip()
353
+ statement_start_line = content[: match.start()].count("\n")
354
+
355
+ start_pos = match.end()
356
+ end_pos = name_matches[i + 1].start() if i + 1 < len(name_matches) else len(content)
357
+
358
+ statement_section = content[start_pos:end_pos].strip()
359
+ if not raw_statement_name or not statement_section:
360
+ continue
361
+
362
+ dialect = None
363
+ statement_sql = statement_section
364
+
365
+ section_lines = [line.strip() for line in statement_section.split("\n") if line.strip()]
366
+ if section_lines:
367
+ first_line = section_lines[0]
368
+ dialect_match = DIALECT_PATTERN.match(first_line)
369
+ if dialect_match:
370
+ declared_dialect = dialect_match.group("dialect").lower()
371
+
372
+ normalized_dialect = _normalize_dialect(declared_dialect)
373
+
374
+ if normalized_dialect not in SUPPORTED_DIALECTS:
375
+ suggestions = _get_dialect_suggestions(normalized_dialect)
376
+ warning_msg = f"Unknown dialect '{declared_dialect}' at line {statement_start_line + 1}"
377
+ if suggestions:
378
+ warning_msg += f". Did you mean: {', '.join(suggestions)}?"
379
+ warning_msg += (
380
+ f". Supported dialects: {', '.join(sorted(SUPPORTED_DIALECTS))}. Using dialect as-is."
381
+ )
382
+ logger.warning(warning_msg)
383
+ dialect = declared_dialect.lower()
384
+ else:
385
+ dialect = normalized_dialect
386
+ remaining_lines = section_lines[1:]
387
+ statement_sql = "\n".join(remaining_lines)
388
+
389
+ clean_sql = SQLFileLoader._strip_leading_comments(statement_sql)
390
+ if clean_sql:
391
+ normalized_name = _normalize_query_name(raw_statement_name)
392
+ if normalized_name in statements:
393
+ raise SQLFileParseError(
394
+ file_path, file_path, ValueError(f"Duplicate statement name: {raw_statement_name}")
395
+ )
396
+
397
+ statements[normalized_name] = NamedStatement(
398
+ name=normalized_name, sql=clean_sql, dialect=dialect, start_line=statement_start_line
399
+ )
400
+
401
+ if not statements:
402
+ raise SQLFileParseError(file_path, file_path, ValueError("No valid SQL statements found after parsing"))
403
+
404
+ return statements
405
+
406
+ def load_sql(self, *paths: Union[str, Path]) -> None:
407
+ """Load SQL files and parse named queries.
408
+
409
+ Args:
410
+ *paths: One or more file paths or directory paths to load.
411
+ """
412
+ correlation_id = CorrelationContext.get()
413
+ start_time = time.perf_counter()
414
+
415
+ logger.info("Loading SQL files", extra={"file_count": len(paths), "correlation_id": correlation_id})
416
+
417
+ loaded_count = 0
418
+ query_count_before = len(self._queries)
419
+
420
+ try:
421
+ for path in paths:
422
+ path_str = str(path)
423
+ if "://" in path_str:
424
+ self._load_single_file(path, None)
425
+ loaded_count += 1
426
+ else:
427
+ path_obj = Path(path)
428
+ if path_obj.is_dir():
429
+ loaded_count += self._load_directory(path_obj)
430
+ elif path_obj.exists():
431
+ self._load_single_file(path_obj, None)
432
+ loaded_count += 1
433
+ elif path_obj.suffix:
434
+ self._raise_file_not_found(str(path))
435
+
436
+ duration = time.perf_counter() - start_time
437
+ new_queries = len(self._queries) - query_count_before
438
+
439
+ logger.info(
440
+ "Loaded %d SQL files with %d new queries in %.3fms",
441
+ loaded_count,
442
+ new_queries,
443
+ duration * 1000,
444
+ extra={
445
+ "files_loaded": loaded_count,
446
+ "new_queries": new_queries,
447
+ "duration_ms": duration * 1000,
448
+ "correlation_id": correlation_id,
449
+ },
450
+ )
451
+
452
+ except Exception as e:
453
+ duration = time.perf_counter() - start_time
454
+ logger.exception(
455
+ "Failed to load SQL files after %.3fms",
456
+ duration * 1000,
457
+ extra={
458
+ "error_type": type(e).__name__,
459
+ "duration_ms": duration * 1000,
460
+ "correlation_id": correlation_id,
461
+ },
462
+ )
463
+ raise
464
+
465
+ def _load_directory(self, dir_path: Path) -> int:
466
+ """Load all SQL files from a directory with namespacing."""
467
+ sql_files = list(dir_path.rglob("*.sql"))
468
+ if not sql_files:
469
+ return 0
470
+
471
+ for file_path in sql_files:
472
+ relative_path = file_path.relative_to(dir_path)
473
+ namespace_parts = relative_path.parent.parts
474
+ namespace = ".".join(namespace_parts) if namespace_parts else None
475
+ self._load_single_file(file_path, namespace)
476
+ return len(sql_files)
477
+
478
+ def _load_single_file(self, file_path: Union[str, Path], namespace: Optional[str]) -> None:
479
+ """Load a single SQL file with optional namespace and caching.
480
+
481
+ Args:
482
+ file_path: Path to the SQL file.
483
+ namespace: Optional namespace prefix for queries.
484
+ """
485
+ path_str = str(file_path)
486
+
487
+ if path_str in self._files:
488
+ return
489
+
490
+ cache_config = get_cache_config()
491
+ if not cache_config.compiled_cache_enabled:
492
+ self._load_file_without_cache(file_path, namespace)
493
+ return
494
+
495
+ cache_key_str = self._generate_file_cache_key(file_path)
496
+ cache_key = CacheKey((cache_key_str,))
497
+ unified_cache = get_default_cache()
498
+ cached_file = unified_cache.get(cache_key)
499
+
500
+ if (
501
+ cached_file is not None
502
+ and isinstance(cached_file, CachedSQLFile)
503
+ and self._is_file_unchanged(file_path, cached_file)
504
+ ):
505
+ self._files[path_str] = cached_file.sql_file
506
+ for name, statement in cached_file.parsed_statements.items():
507
+ namespaced_name = f"{namespace}.{name}" if namespace else name
508
+ if namespaced_name in self._queries:
509
+ existing_file = self._query_to_file.get(namespaced_name, "unknown")
510
+ if existing_file != path_str:
511
+ raise SQLFileParseError(
512
+ path_str,
513
+ path_str,
514
+ ValueError(f"Query name '{namespaced_name}' already exists in file: {existing_file}"),
515
+ )
516
+ self._queries[namespaced_name] = statement
517
+ self._query_to_file[namespaced_name] = path_str
518
+ return
519
+
520
+ self._load_file_without_cache(file_path, namespace)
521
+
522
+ if path_str in self._files:
523
+ sql_file = self._files[path_str]
524
+ file_statements: dict[str, NamedStatement] = {}
525
+ for query_name, query_path in self._query_to_file.items():
526
+ if query_path == path_str:
527
+ stored_name = query_name
528
+ if namespace and query_name.startswith(f"{namespace}."):
529
+ stored_name = query_name[len(namespace) + 1 :]
530
+ file_statements[stored_name] = self._queries[query_name]
531
+
532
+ cached_file_data = CachedSQLFile(sql_file=sql_file, parsed_statements=file_statements)
533
+ unified_cache.put(cache_key, cached_file_data)
534
+
535
+ def _load_file_without_cache(self, file_path: Union[str, Path], namespace: Optional[str]) -> None:
536
+ """Load a single SQL file without caching.
537
+
538
+ Args:
539
+ file_path: Path to the SQL file.
540
+ namespace: Optional namespace prefix for queries.
541
+ """
542
+ path_str = str(file_path)
543
+
544
+ content = self._read_file_content(file_path)
545
+ sql_file = SQLFile(content=content, path=path_str)
546
+ self._files[path_str] = sql_file
547
+
548
+ statements = self._parse_sql_content(content, path_str)
549
+ for name, statement in statements.items():
550
+ namespaced_name = f"{namespace}.{name}" if namespace else name
551
+ if namespaced_name in self._queries:
552
+ existing_file = self._query_to_file.get(namespaced_name, "unknown")
553
+ if existing_file != path_str:
554
+ raise SQLFileParseError(
555
+ path_str,
556
+ path_str,
557
+ ValueError(f"Query name '{namespaced_name}' already exists in file: {existing_file}"),
558
+ )
559
+ self._queries[namespaced_name] = statement
560
+ self._query_to_file[namespaced_name] = path_str
561
+
562
+ def add_named_sql(self, name: str, sql: str, dialect: "Optional[str]" = None) -> None:
563
+ """Add a named SQL query directly without loading from a file.
564
+
565
+ Args:
566
+ name: Name for the SQL query.
567
+ sql: Raw SQL content.
568
+ dialect: Optional dialect for the SQL statement.
569
+
570
+ Raises:
571
+ ValueError: If query name already exists.
572
+ """
573
+ if name in self._queries:
574
+ existing_source = self._query_to_file.get(name, "<directly added>")
575
+ msg = f"Query name '{name}' already exists (source: {existing_source})"
576
+ raise ValueError(msg)
577
+
578
+ if dialect is not None:
579
+ normalized_dialect = _normalize_dialect(dialect)
580
+ if normalized_dialect not in SUPPORTED_DIALECTS:
581
+ suggestions = _get_dialect_suggestions(normalized_dialect)
582
+ warning_msg = f"Unknown dialect '{dialect}'"
583
+ if suggestions:
584
+ warning_msg += f". Did you mean: {', '.join(suggestions)}?"
585
+ warning_msg += f". Supported dialects: {', '.join(sorted(SUPPORTED_DIALECTS))}. Using dialect as-is."
586
+ logger.warning(warning_msg)
587
+ dialect = dialect.lower()
588
+ else:
589
+ dialect = normalized_dialect
590
+
591
+ statement = NamedStatement(name=name, sql=sql.strip(), dialect=dialect, start_line=0)
592
+ self._queries[name] = statement
593
+ self._query_to_file[name] = "<directly added>"
594
+
595
+ def get_sql(
596
+ self, name: str, parameters: "Optional[Any]" = None, dialect: "Optional[str]" = None, **kwargs: "Any"
597
+ ) -> "SQL":
598
+ """Get a SQL object by statement name with dialect support.
599
+
600
+ Args:
601
+ name: Name of the statement (from -- name: in SQL file).
602
+ Hyphens in names are converted to underscores.
603
+ parameters: Parameters for the SQL statement.
604
+ dialect: Optional dialect override.
605
+ **kwargs: Additional parameters to pass to the SQL object.
606
+
607
+ Returns:
608
+ SQL object ready for execution.
609
+
610
+ Raises:
611
+ SQLFileNotFoundError: If statement name not found.
612
+ """
613
+ correlation_id = CorrelationContext.get()
614
+
615
+ safe_name = _normalize_query_name(name)
616
+
617
+ if safe_name not in self._queries:
618
+ available = ", ".join(sorted(self._queries.keys())) if self._queries else "none"
619
+ logger.error(
620
+ "Statement not found: %s",
621
+ name,
622
+ extra={
623
+ "statement_name": name,
624
+ "safe_name": safe_name,
625
+ "available_statements": len(self._queries),
626
+ "correlation_id": correlation_id,
627
+ },
628
+ )
629
+ raise SQLFileNotFoundError(name, path=f"Statement '{name}' not found. Available statements: {available}")
630
+
631
+ parsed_statement = self._queries[safe_name]
632
+
633
+ effective_dialect = dialect or parsed_statement.dialect
634
+
635
+ if dialect is not None:
636
+ normalized_dialect = _normalize_dialect(dialect)
637
+ if normalized_dialect not in SUPPORTED_DIALECTS:
638
+ suggestions = _get_dialect_suggestions(normalized_dialect)
639
+ warning_msg = f"Unknown dialect '{dialect}'"
640
+ if suggestions:
641
+ warning_msg += f". Did you mean: {', '.join(suggestions)}?"
642
+ warning_msg += f". Supported dialects: {', '.join(sorted(SUPPORTED_DIALECTS))}. Using dialect as-is."
643
+ logger.warning(warning_msg)
644
+ effective_dialect = dialect.lower()
645
+ else:
646
+ effective_dialect = normalized_dialect
647
+
648
+ sql_kwargs = dict(kwargs)
649
+ if parameters is not None:
650
+ sql_kwargs["parameters"] = parameters
651
+
652
+ sqlglot_dialect = None
653
+ if effective_dialect:
654
+ sqlglot_dialect = _normalize_dialect_for_sqlglot(effective_dialect)
655
+
656
+ if not effective_dialect and "statement_config" not in sql_kwargs:
657
+ validator = ParameterValidator()
658
+ param_info = validator.extract_parameters(parsed_statement.sql)
659
+ if param_info:
660
+ styles = {p.style for p in param_info}
661
+ if styles:
662
+ detected_style = next(iter(styles))
663
+ sql_kwargs["statement_config"] = StatementConfig(
664
+ parameter_config=ParameterStyleConfig(
665
+ default_parameter_style=detected_style,
666
+ supported_parameter_styles=styles,
667
+ preserve_parameter_format=True,
668
+ )
669
+ )
670
+
671
+ return SQL(parsed_statement.sql, dialect=sqlglot_dialect, **sql_kwargs)
672
+
673
+ def get_file(self, path: Union[str, Path]) -> "Optional[SQLFile]":
674
+ """Get a loaded SQLFile object by path.
675
+
676
+ Args:
677
+ path: Path of the file.
678
+
679
+ Returns:
680
+ SQLFile object if loaded, None otherwise.
681
+ """
682
+ return self._files.get(str(path))
683
+
684
+ def get_file_for_query(self, name: str) -> "Optional[SQLFile]":
685
+ """Get the SQLFile object containing a query.
686
+
687
+ Args:
688
+ name: Query name (hyphens are converted to underscores).
689
+
690
+ Returns:
691
+ SQLFile object if query exists, None otherwise.
692
+ """
693
+ safe_name = _normalize_query_name(name)
694
+ if safe_name in self._query_to_file:
695
+ file_path = self._query_to_file[safe_name]
696
+ return self._files.get(file_path)
697
+ return None
698
+
699
+ def list_queries(self) -> "list[str]":
700
+ """List all available query names.
701
+
702
+ Returns:
703
+ Sorted list of query names.
704
+ """
705
+ return sorted(self._queries.keys())
706
+
707
+ def list_files(self) -> "list[str]":
708
+ """List all loaded file paths.
709
+
710
+ Returns:
711
+ Sorted list of file paths.
712
+ """
713
+ return sorted(self._files.keys())
714
+
715
+ def has_query(self, name: str) -> bool:
716
+ """Check if a query exists.
717
+
718
+ Args:
719
+ name: Query name to check.
720
+
721
+ Returns:
722
+ True if query exists.
723
+ """
724
+ safe_name = _normalize_query_name(name)
725
+ return safe_name in self._queries
726
+
727
+ def clear_cache(self) -> None:
728
+ """Clear all cached files and queries."""
729
+ self._files.clear()
730
+ self._queries.clear()
731
+ self._query_to_file.clear()
732
+
733
+ cache_config = get_cache_config()
734
+ if cache_config.compiled_cache_enabled:
735
+ unified_cache = get_default_cache()
736
+ unified_cache.clear()
737
+
738
+ def clear_file_cache(self) -> None:
739
+ """Clear the file cache only, keeping loaded queries."""
740
+ cache_config = get_cache_config()
741
+ if cache_config.compiled_cache_enabled:
742
+ unified_cache = get_default_cache()
743
+ unified_cache.clear()
744
+
745
+ def get_query_text(self, name: str) -> str:
746
+ """Get raw SQL text for a query.
747
+
748
+ Args:
749
+ name: Query name.
750
+
751
+ Returns:
752
+ Raw SQL text.
753
+
754
+ Raises:
755
+ SQLFileNotFoundError: If query not found.
756
+ """
757
+ safe_name = _normalize_query_name(name)
758
+ if safe_name not in self._queries:
759
+ raise SQLFileNotFoundError(name)
760
+ return self._queries[safe_name].sql