tree-sitter-analyzer 1.9.17.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. tree_sitter_analyzer/__init__.py +132 -0
  2. tree_sitter_analyzer/__main__.py +11 -0
  3. tree_sitter_analyzer/api.py +853 -0
  4. tree_sitter_analyzer/cli/__init__.py +39 -0
  5. tree_sitter_analyzer/cli/__main__.py +12 -0
  6. tree_sitter_analyzer/cli/argument_validator.py +89 -0
  7. tree_sitter_analyzer/cli/commands/__init__.py +26 -0
  8. tree_sitter_analyzer/cli/commands/advanced_command.py +226 -0
  9. tree_sitter_analyzer/cli/commands/base_command.py +181 -0
  10. tree_sitter_analyzer/cli/commands/default_command.py +18 -0
  11. tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +188 -0
  12. tree_sitter_analyzer/cli/commands/list_files_cli.py +133 -0
  13. tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -0
  14. tree_sitter_analyzer/cli/commands/query_command.py +109 -0
  15. tree_sitter_analyzer/cli/commands/search_content_cli.py +161 -0
  16. tree_sitter_analyzer/cli/commands/structure_command.py +156 -0
  17. tree_sitter_analyzer/cli/commands/summary_command.py +116 -0
  18. tree_sitter_analyzer/cli/commands/table_command.py +414 -0
  19. tree_sitter_analyzer/cli/info_commands.py +124 -0
  20. tree_sitter_analyzer/cli_main.py +472 -0
  21. tree_sitter_analyzer/constants.py +85 -0
  22. tree_sitter_analyzer/core/__init__.py +15 -0
  23. tree_sitter_analyzer/core/analysis_engine.py +580 -0
  24. tree_sitter_analyzer/core/cache_service.py +333 -0
  25. tree_sitter_analyzer/core/engine.py +585 -0
  26. tree_sitter_analyzer/core/parser.py +293 -0
  27. tree_sitter_analyzer/core/query.py +605 -0
  28. tree_sitter_analyzer/core/query_filter.py +200 -0
  29. tree_sitter_analyzer/core/query_service.py +340 -0
  30. tree_sitter_analyzer/encoding_utils.py +530 -0
  31. tree_sitter_analyzer/exceptions.py +747 -0
  32. tree_sitter_analyzer/file_handler.py +246 -0
  33. tree_sitter_analyzer/formatters/__init__.py +1 -0
  34. tree_sitter_analyzer/formatters/base_formatter.py +201 -0
  35. tree_sitter_analyzer/formatters/csharp_formatter.py +367 -0
  36. tree_sitter_analyzer/formatters/formatter_config.py +197 -0
  37. tree_sitter_analyzer/formatters/formatter_factory.py +84 -0
  38. tree_sitter_analyzer/formatters/formatter_registry.py +377 -0
  39. tree_sitter_analyzer/formatters/formatter_selector.py +96 -0
  40. tree_sitter_analyzer/formatters/go_formatter.py +368 -0
  41. tree_sitter_analyzer/formatters/html_formatter.py +498 -0
  42. tree_sitter_analyzer/formatters/java_formatter.py +423 -0
  43. tree_sitter_analyzer/formatters/javascript_formatter.py +611 -0
  44. tree_sitter_analyzer/formatters/kotlin_formatter.py +268 -0
  45. tree_sitter_analyzer/formatters/language_formatter_factory.py +123 -0
  46. tree_sitter_analyzer/formatters/legacy_formatter_adapters.py +228 -0
  47. tree_sitter_analyzer/formatters/markdown_formatter.py +725 -0
  48. tree_sitter_analyzer/formatters/php_formatter.py +301 -0
  49. tree_sitter_analyzer/formatters/python_formatter.py +830 -0
  50. tree_sitter_analyzer/formatters/ruby_formatter.py +278 -0
  51. tree_sitter_analyzer/formatters/rust_formatter.py +233 -0
  52. tree_sitter_analyzer/formatters/sql_formatter_wrapper.py +689 -0
  53. tree_sitter_analyzer/formatters/sql_formatters.py +536 -0
  54. tree_sitter_analyzer/formatters/typescript_formatter.py +543 -0
  55. tree_sitter_analyzer/formatters/yaml_formatter.py +462 -0
  56. tree_sitter_analyzer/interfaces/__init__.py +9 -0
  57. tree_sitter_analyzer/interfaces/cli.py +535 -0
  58. tree_sitter_analyzer/interfaces/cli_adapter.py +359 -0
  59. tree_sitter_analyzer/interfaces/mcp_adapter.py +224 -0
  60. tree_sitter_analyzer/interfaces/mcp_server.py +428 -0
  61. tree_sitter_analyzer/language_detector.py +553 -0
  62. tree_sitter_analyzer/language_loader.py +271 -0
  63. tree_sitter_analyzer/languages/__init__.py +10 -0
  64. tree_sitter_analyzer/languages/csharp_plugin.py +1076 -0
  65. tree_sitter_analyzer/languages/css_plugin.py +449 -0
  66. tree_sitter_analyzer/languages/go_plugin.py +836 -0
  67. tree_sitter_analyzer/languages/html_plugin.py +496 -0
  68. tree_sitter_analyzer/languages/java_plugin.py +1299 -0
  69. tree_sitter_analyzer/languages/javascript_plugin.py +1622 -0
  70. tree_sitter_analyzer/languages/kotlin_plugin.py +656 -0
  71. tree_sitter_analyzer/languages/markdown_plugin.py +1928 -0
  72. tree_sitter_analyzer/languages/php_plugin.py +862 -0
  73. tree_sitter_analyzer/languages/python_plugin.py +1636 -0
  74. tree_sitter_analyzer/languages/ruby_plugin.py +757 -0
  75. tree_sitter_analyzer/languages/rust_plugin.py +673 -0
  76. tree_sitter_analyzer/languages/sql_plugin.py +2444 -0
  77. tree_sitter_analyzer/languages/typescript_plugin.py +1892 -0
  78. tree_sitter_analyzer/languages/yaml_plugin.py +695 -0
  79. tree_sitter_analyzer/legacy_table_formatter.py +860 -0
  80. tree_sitter_analyzer/mcp/__init__.py +34 -0
  81. tree_sitter_analyzer/mcp/resources/__init__.py +43 -0
  82. tree_sitter_analyzer/mcp/resources/code_file_resource.py +208 -0
  83. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +586 -0
  84. tree_sitter_analyzer/mcp/server.py +869 -0
  85. tree_sitter_analyzer/mcp/tools/__init__.py +28 -0
  86. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +779 -0
  87. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +291 -0
  88. tree_sitter_analyzer/mcp/tools/base_tool.py +139 -0
  89. tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +816 -0
  90. tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +686 -0
  91. tree_sitter_analyzer/mcp/tools/list_files_tool.py +413 -0
  92. tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
  93. tree_sitter_analyzer/mcp/tools/query_tool.py +443 -0
  94. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +464 -0
  95. tree_sitter_analyzer/mcp/tools/search_content_tool.py +836 -0
  96. tree_sitter_analyzer/mcp/tools/table_format_tool.py +572 -0
  97. tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +653 -0
  98. tree_sitter_analyzer/mcp/utils/__init__.py +113 -0
  99. tree_sitter_analyzer/mcp/utils/error_handler.py +569 -0
  100. tree_sitter_analyzer/mcp/utils/file_output_factory.py +217 -0
  101. tree_sitter_analyzer/mcp/utils/file_output_manager.py +322 -0
  102. tree_sitter_analyzer/mcp/utils/gitignore_detector.py +358 -0
  103. tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -0
  104. tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
  105. tree_sitter_analyzer/models.py +840 -0
  106. tree_sitter_analyzer/mypy_current_errors.txt +2 -0
  107. tree_sitter_analyzer/output_manager.py +255 -0
  108. tree_sitter_analyzer/platform_compat/__init__.py +3 -0
  109. tree_sitter_analyzer/platform_compat/adapter.py +324 -0
  110. tree_sitter_analyzer/platform_compat/compare.py +224 -0
  111. tree_sitter_analyzer/platform_compat/detector.py +67 -0
  112. tree_sitter_analyzer/platform_compat/fixtures.py +228 -0
  113. tree_sitter_analyzer/platform_compat/profiles.py +217 -0
  114. tree_sitter_analyzer/platform_compat/record.py +55 -0
  115. tree_sitter_analyzer/platform_compat/recorder.py +155 -0
  116. tree_sitter_analyzer/platform_compat/report.py +92 -0
  117. tree_sitter_analyzer/plugins/__init__.py +280 -0
  118. tree_sitter_analyzer/plugins/base.py +647 -0
  119. tree_sitter_analyzer/plugins/manager.py +384 -0
  120. tree_sitter_analyzer/project_detector.py +328 -0
  121. tree_sitter_analyzer/queries/__init__.py +27 -0
  122. tree_sitter_analyzer/queries/csharp.py +216 -0
  123. tree_sitter_analyzer/queries/css.py +615 -0
  124. tree_sitter_analyzer/queries/go.py +275 -0
  125. tree_sitter_analyzer/queries/html.py +543 -0
  126. tree_sitter_analyzer/queries/java.py +402 -0
  127. tree_sitter_analyzer/queries/javascript.py +724 -0
  128. tree_sitter_analyzer/queries/kotlin.py +192 -0
  129. tree_sitter_analyzer/queries/markdown.py +258 -0
  130. tree_sitter_analyzer/queries/php.py +95 -0
  131. tree_sitter_analyzer/queries/python.py +859 -0
  132. tree_sitter_analyzer/queries/ruby.py +92 -0
  133. tree_sitter_analyzer/queries/rust.py +223 -0
  134. tree_sitter_analyzer/queries/sql.py +555 -0
  135. tree_sitter_analyzer/queries/typescript.py +871 -0
  136. tree_sitter_analyzer/queries/yaml.py +236 -0
  137. tree_sitter_analyzer/query_loader.py +272 -0
  138. tree_sitter_analyzer/security/__init__.py +22 -0
  139. tree_sitter_analyzer/security/boundary_manager.py +277 -0
  140. tree_sitter_analyzer/security/regex_checker.py +297 -0
  141. tree_sitter_analyzer/security/validator.py +599 -0
  142. tree_sitter_analyzer/table_formatter.py +782 -0
  143. tree_sitter_analyzer/utils/__init__.py +53 -0
  144. tree_sitter_analyzer/utils/logging.py +433 -0
  145. tree_sitter_analyzer/utils/tree_sitter_compat.py +289 -0
  146. tree_sitter_analyzer-1.9.17.1.dist-info/METADATA +485 -0
  147. tree_sitter_analyzer-1.9.17.1.dist-info/RECORD +149 -0
  148. tree_sitter_analyzer-1.9.17.1.dist-info/WHEEL +4 -0
  149. tree_sitter_analyzer-1.9.17.1.dist-info/entry_points.txt +25 -0
@@ -0,0 +1,2444 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ SQL Language Plugin
4
+
5
+ Provides SQL-specific parsing and element extraction functionality.
6
+ Supports extraction of tables, views, stored procedures, functions, triggers, and indexes.
7
+ """
8
+
9
+ from collections.abc import Iterator
10
+ from typing import TYPE_CHECKING, Any
11
+
12
+ if TYPE_CHECKING:
13
+ import tree_sitter
14
+
15
+ from ..core.analysis_engine import AnalysisRequest
16
+ from ..models import AnalysisResult
17
+
18
+ try:
19
+ import tree_sitter
20
+
21
+ TREE_SITTER_AVAILABLE = True
22
+ except ImportError:
23
+ TREE_SITTER_AVAILABLE = False
24
+
25
+ from ..encoding_utils import extract_text_slice, safe_encode
26
+ from ..models import (
27
+ Class,
28
+ Function,
29
+ Import,
30
+ SQLColumn,
31
+ SQLConstraint,
32
+ SQLElement,
33
+ SQLFunction,
34
+ SQLIndex,
35
+ SQLParameter,
36
+ SQLProcedure,
37
+ SQLTable,
38
+ SQLTrigger,
39
+ SQLView,
40
+ Variable,
41
+ )
42
+ from ..platform_compat.adapter import CompatibilityAdapter
43
+ from ..platform_compat.detector import PlatformDetector
44
+ from ..platform_compat.profiles import BehaviorProfile
45
+ from ..plugins.base import ElementExtractor, LanguagePlugin
46
+ from ..utils import log_debug, log_error
47
+
48
+
49
+ class SQLElementExtractor(ElementExtractor):
50
+ """
51
+ SQL-specific element extractor.
52
+
53
+ This extractor parses SQL AST and extracts database elements, mapping them
54
+ to the unified element model:
55
+ - Tables and Views → Class elements
56
+ - Stored Procedures, Functions, Triggers → Function elements
57
+ - Indexes → Variable elements
58
+ - Schema references → Import elements
59
+
60
+ The extractor handles standard SQL (ANSI SQL) syntax and supports
61
+ CREATE TABLE, CREATE VIEW, CREATE PROCEDURE, CREATE FUNCTION,
62
+ CREATE TRIGGER, and CREATE INDEX statements.
63
+ """
64
+
65
+ def __init__(self, diagnostic_mode: bool = False) -> None:
66
+ """
67
+ Initialize the SQL element extractor.
68
+
69
+ Sets up internal state for source code processing and performance
70
+ optimization caches for node text extraction.
71
+ """
72
+ super().__init__()
73
+ self.source_code: str = ""
74
+ self.content_lines: list[str] = []
75
+ self.diagnostic_mode = diagnostic_mode
76
+ self.platform_info = None
77
+
78
+ # Performance optimization caches
79
+ # Cache node text to avoid repeated extraction
80
+ self._node_text_cache: dict[int, str] = {}
81
+ # Track processed nodes to avoid duplicate processing
82
+ self._processed_nodes: set[int] = set()
83
+ # File encoding for safe text extraction
84
+ self._file_encoding: str | None = None
85
+
86
+ # Platform compatibility
87
+ self.adapter: CompatibilityAdapter | None = None
88
+
89
+ def set_adapter(self, adapter: CompatibilityAdapter) -> None:
90
+ """Set the compatibility adapter."""
91
+ self.adapter = adapter
92
+
93
+ def extract_sql_elements(
94
+ self, tree: "tree_sitter.Tree", source_code: str
95
+ ) -> list[SQLElement]:
96
+ """
97
+ Extract all SQL elements with enhanced metadata.
98
+
99
+ This is the new enhanced extraction method that returns SQL-specific
100
+ element types with detailed metadata including columns, constraints,
101
+ parameters, and dependencies.
102
+
103
+ Args:
104
+ tree: Tree-sitter AST tree parsed from SQL source
105
+ source_code: Original SQL source code as string
106
+
107
+ Returns:
108
+ List of SQLElement objects with detailed metadata
109
+ """
110
+ self.source_code = source_code or ""
111
+ self.content_lines = self.source_code.split("\n")
112
+ self._reset_caches()
113
+
114
+ sql_elements: list[SQLElement] = []
115
+
116
+ if tree is not None and tree.root_node is not None:
117
+ try:
118
+ # Extract all SQL element types with enhanced metadata
119
+ self._extract_sql_tables(tree.root_node, sql_elements)
120
+ self._extract_sql_views(tree.root_node, sql_elements)
121
+ self._extract_sql_procedures(tree.root_node, sql_elements)
122
+ self._extract_sql_functions_enhanced(tree.root_node, sql_elements)
123
+ self._extract_sql_triggers(tree.root_node, sql_elements)
124
+ self._extract_sql_indexes(tree.root_node, sql_elements)
125
+
126
+ # Apply platform compatibility adapter if available
127
+ if self.adapter:
128
+ if self.diagnostic_mode:
129
+ log_debug(
130
+ f"Diagnostic: Before adaptation: {[e.name for e in sql_elements]}"
131
+ )
132
+
133
+ sql_elements = self.adapter.adapt_elements(
134
+ sql_elements, self.source_code
135
+ )
136
+
137
+ if self.diagnostic_mode:
138
+ log_debug(
139
+ f"Diagnostic: After adaptation: {[e.name for e in sql_elements]}"
140
+ )
141
+
142
+ # Post-process to fix platform-specific parsing errors
143
+ sql_elements = self._validate_and_fix_elements(sql_elements)
144
+
145
+ log_debug(f"Extracted {len(sql_elements)} SQL elements with metadata")
146
+ except Exception as e:
147
+ log_error(
148
+ f"Error during enhanced SQL extraction on {self.platform_info}: {e}"
149
+ )
150
+ log_error(
151
+ "Suggestion: Check platform compatibility documentation or enable diagnostic mode for more details."
152
+ )
153
+ # Return empty list or partial results to allow other languages to continue
154
+ if not sql_elements:
155
+ sql_elements = []
156
+
157
+ return sql_elements
158
+
159
+ def _validate_and_fix_elements(
160
+ self, elements: list[SQLElement]
161
+ ) -> list[SQLElement]:
162
+ """
163
+ Post-process elements to fix parsing errors caused by platform-specific
164
+ tree-sitter behavior (e.g. ERROR nodes misidentifying triggers).
165
+ """
166
+ import re
167
+
168
+ validated = []
169
+ seen_names = set()
170
+
171
+ for elem in elements:
172
+ elem_type = getattr(elem, "sql_element_type", None)
173
+
174
+ # 1. Check for Phantom Elements (Mismatch between Type and Content)
175
+ if elem_type and elem.raw_text:
176
+ raw_text_stripped = elem.raw_text.strip()
177
+ is_valid = True
178
+
179
+ # Fix Ubuntu 3.12 phantom trigger issue (Trigger type but Function content)
180
+ if elem_type.value == "trigger":
181
+ # Must start with CREATE TRIGGER (allow comments/whitespace)
182
+ if not re.search(
183
+ r"CREATE\s+TRIGGER", raw_text_stripped, re.IGNORECASE
184
+ ):
185
+ log_debug(
186
+ f"Removing phantom trigger: {elem.name} (content mismatch)"
187
+ )
188
+ is_valid = False
189
+
190
+ # Fix phantom functions
191
+ elif elem_type.value == "function":
192
+ if not re.search(
193
+ r"CREATE\s+FUNCTION", raw_text_stripped, re.IGNORECASE
194
+ ):
195
+ log_debug(
196
+ f"Removing phantom function: {elem.name} (content mismatch)"
197
+ )
198
+ is_valid = False
199
+
200
+ if not is_valid:
201
+ continue
202
+
203
+ # 2. Fix Names
204
+ if elem_type and elem.raw_text:
205
+ # Fix Trigger name issues (e.g. macOS "description" bug)
206
+ if elem_type.value == "trigger":
207
+ match = re.search(
208
+ r"CREATE\s+TRIGGER\s+([a-zA-Z_][a-zA-Z0-9_]*)",
209
+ elem.raw_text,
210
+ re.IGNORECASE,
211
+ )
212
+ if match:
213
+ correct_name = match.group(1)
214
+ if elem.name != correct_name and self._is_valid_identifier(
215
+ correct_name
216
+ ):
217
+ log_debug(
218
+ f"Fixing trigger name: {elem.name} -> {correct_name}"
219
+ )
220
+ elem.name = correct_name
221
+
222
+ # Fix Function name issues (e.g. Windows/Ubuntu "AUTO_INCREMENT" bug)
223
+ elif elem_type.value == "function":
224
+ # Filter out obvious garbage names if they match keywords
225
+ if elem.name and elem.name.upper() in (
226
+ "AUTO_INCREMENT",
227
+ "KEY",
228
+ "PRIMARY",
229
+ "FOREIGN",
230
+ ):
231
+ # Try to recover correct name
232
+ match = re.search(
233
+ r"CREATE\s+FUNCTION\s+([a-zA-Z_][a-zA-Z0-9_]*)",
234
+ elem.raw_text,
235
+ re.IGNORECASE,
236
+ )
237
+ if match:
238
+ correct_name = match.group(1)
239
+ log_debug(
240
+ f"Fixing garbage function name: {elem.name} -> {correct_name}"
241
+ )
242
+ elem.name = correct_name
243
+ else:
244
+ log_debug(f"Removing garbage function name: {elem.name}")
245
+ continue
246
+
247
+ # General name verification
248
+ match = re.search(
249
+ r"CREATE\s+FUNCTION\s+([a-zA-Z_][a-zA-Z0-9_]*)",
250
+ elem.raw_text,
251
+ re.IGNORECASE,
252
+ )
253
+ if match:
254
+ correct_name = match.group(1)
255
+ if elem.name != correct_name and self._is_valid_identifier(
256
+ correct_name
257
+ ):
258
+ log_debug(
259
+ f"Fixing function name: {elem.name} -> {correct_name}"
260
+ )
261
+ elem.name = correct_name
262
+
263
+ # Deduplication
264
+ key = (getattr(elem, "sql_element_type", None), elem.name, elem.start_line)
265
+ if key in seen_names:
266
+ continue
267
+ seen_names.add(key)
268
+
269
+ validated.append(elem)
270
+
271
+ # Recover missing Views (often missed in ERROR nodes on some platforms)
272
+ # This is a fallback scan of the entire source code
273
+ if self.source_code:
274
+ existing_views = {
275
+ e.name
276
+ for e in validated
277
+ if hasattr(e, "sql_element_type") and e.sql_element_type.value == "view"
278
+ }
279
+
280
+ view_matches = re.finditer(
281
+ r"^\s*CREATE\s+VIEW\s+(?:IF\s+NOT\s+EXISTS\s+)?(\w+)\s+AS",
282
+ self.source_code,
283
+ re.IGNORECASE | re.MULTILINE,
284
+ )
285
+
286
+ for match in view_matches:
287
+ view_name = match.group(1)
288
+ if view_name not in existing_views and self._is_valid_identifier(
289
+ view_name
290
+ ):
291
+ log_debug(f"Recovering missing view: {view_name}")
292
+
293
+ # Calculate approximate line numbers
294
+ start_pos = match.start()
295
+ # Count newlines before start_pos
296
+ start_line = self.source_code.count("\n", 0, start_pos) + 1
297
+
298
+ # Estimate end line (until next semicolon or empty line)
299
+ view_context = self.source_code[start_pos:]
300
+ semicolon_match = re.search(r";", view_context)
301
+ if semicolon_match:
302
+ end_pos = start_pos + semicolon_match.end()
303
+ end_line = self.source_code.count("\n", 0, end_pos) + 1
304
+ else:
305
+ end_line = start_line + 5 # Fallback estimate
306
+
307
+ # Extract source tables roughly
308
+ source_tables = []
309
+ table_matches = re.findall(
310
+ r"(?:FROM|JOIN)\s+([a-zA-Z_][a-zA-Z0-9_]*)",
311
+ view_context[
312
+ : semicolon_match.end() if semicolon_match else 500
313
+ ],
314
+ re.IGNORECASE,
315
+ )
316
+ source_tables.extend(table_matches)
317
+
318
+ view = SQLView(
319
+ name=view_name,
320
+ start_line=start_line,
321
+ end_line=end_line,
322
+ raw_text=f"CREATE VIEW {view_name} ...",
323
+ language="sql",
324
+ source_tables=sorted(set(source_tables)),
325
+ dependencies=sorted(set(source_tables)),
326
+ )
327
+ validated.append(view)
328
+ existing_views.add(view_name)
329
+
330
+ return validated
331
+
332
+ def extract_functions(
333
+ self, tree: "tree_sitter.Tree", source_code: str
334
+ ) -> list[Function]:
335
+ """
336
+ Extract stored procedures, functions, and triggers from SQL code.
337
+
338
+ Maps SQL executable units to Function elements:
339
+ - CREATE PROCEDURE statements → Function
340
+ - CREATE FUNCTION statements → Function
341
+ - CREATE TRIGGER statements → Function
342
+
343
+ Args:
344
+ tree: Tree-sitter AST tree parsed from SQL source
345
+ source_code: Original SQL source code as string
346
+
347
+ Returns:
348
+ List of Function elements representing procedures, functions, and triggers
349
+ """
350
+ self.source_code = source_code or ""
351
+ self.content_lines = self.source_code.split("\n")
352
+ self._reset_caches()
353
+
354
+ functions: list[Function] = []
355
+
356
+ if tree is not None and tree.root_node is not None:
357
+ try:
358
+ # Extract procedures, functions, and triggers
359
+ self._extract_procedures(tree.root_node, functions)
360
+ self._extract_sql_functions(tree.root_node, functions)
361
+ self._extract_triggers(tree.root_node, functions)
362
+ log_debug(
363
+ f"Extracted {len(functions)} SQL functions/procedures/triggers"
364
+ )
365
+ except Exception as e:
366
+ log_debug(f"Error during function extraction: {e}")
367
+
368
+ return functions
369
+
370
+ def extract_classes(
371
+ self, tree: "tree_sitter.Tree", source_code: str
372
+ ) -> list[Class]:
373
+ """
374
+ Extract tables and views from SQL code.
375
+
376
+ Maps SQL structural definitions to Class elements:
377
+ - CREATE TABLE statements → Class
378
+ - CREATE VIEW statements → Class
379
+
380
+ Args:
381
+ tree: Tree-sitter AST tree parsed from SQL source
382
+ source_code: Original SQL source code as string
383
+
384
+ Returns:
385
+ List of Class elements representing tables and views
386
+ """
387
+ self.source_code = source_code or ""
388
+ self.content_lines = self.source_code.split("\n")
389
+ self._reset_caches()
390
+
391
+ classes: list[Class] = []
392
+
393
+ if tree is not None and tree.root_node is not None:
394
+ try:
395
+ # Extract tables and views
396
+ self._extract_tables(tree.root_node, classes)
397
+ self._extract_views(tree.root_node, classes)
398
+ log_debug(f"Extracted {len(classes)} SQL tables/views")
399
+ except Exception as e:
400
+ log_debug(f"Error during class extraction: {e}")
401
+
402
+ return classes
403
+
404
+ def extract_variables(
405
+ self, tree: "tree_sitter.Tree", source_code: str
406
+ ) -> list[Variable]:
407
+ """
408
+ Extract indexes from SQL code.
409
+
410
+ Maps SQL metadata definitions to Variable elements:
411
+ - CREATE INDEX statements → Variable
412
+
413
+ Args:
414
+ tree: Tree-sitter AST tree parsed from SQL source
415
+ source_code: Original SQL source code as string
416
+
417
+ Returns:
418
+ List of Variable elements representing indexes
419
+ """
420
+ self.source_code = source_code or ""
421
+ self.content_lines = self.source_code.split("\n")
422
+ self._reset_caches()
423
+
424
+ variables: list[Variable] = []
425
+
426
+ if tree is not None and tree.root_node is not None:
427
+ try:
428
+ # Extract indexes
429
+ self._extract_indexes(tree.root_node, variables)
430
+ log_debug(f"Extracted {len(variables)} SQL indexes")
431
+ except Exception as e:
432
+ log_debug(f"Error during variable extraction: {e}")
433
+
434
+ return variables
435
+
436
+ def extract_imports(
437
+ self, tree: "tree_sitter.Tree", source_code: str
438
+ ) -> list[Import]:
439
+ """
440
+ Extract schema references and dependencies from SQL code.
441
+
442
+ Extracts qualified names (schema.table) that represent cross-schema
443
+ dependencies, mapping them to Import elements.
444
+
445
+ Args:
446
+ tree: Tree-sitter AST tree parsed from SQL source
447
+ source_code: Original SQL source code as string
448
+
449
+ Returns:
450
+ List of Import elements representing schema references
451
+ """
452
+ self.source_code = source_code or ""
453
+ self.content_lines = self.source_code.split("\n")
454
+ self._reset_caches()
455
+
456
+ imports: list[Import] = []
457
+
458
+ if tree is not None and tree.root_node is not None:
459
+ try:
460
+ # Extract schema references (e.g., FROM schema.table)
461
+ self._extract_schema_references(tree.root_node, imports)
462
+ log_debug(f"Extracted {len(imports)} SQL schema references")
463
+ except Exception as e:
464
+ log_debug(f"Error during import extraction: {e}")
465
+
466
+ return imports
467
+
468
+ def _reset_caches(self) -> None:
469
+ """Reset performance caches."""
470
+ self._node_text_cache.clear()
471
+ self._processed_nodes.clear()
472
+
473
+ def _get_node_text(self, node: "tree_sitter.Node") -> str:
474
+ """
475
+ Get text content from a tree-sitter node with caching.
476
+
477
+ Uses byte-based extraction first, falls back to line-based extraction
478
+ if byte extraction fails. Results are cached for performance.
479
+
480
+ Args:
481
+ node: Tree-sitter node to extract text from
482
+
483
+ Returns:
484
+ Text content of the node, or empty string if extraction fails
485
+ """
486
+ node_id = id(node)
487
+
488
+ if node_id in self._node_text_cache:
489
+ return self._node_text_cache[node_id]
490
+
491
+ try:
492
+ start_byte = node.start_byte
493
+ end_byte = node.end_byte
494
+ encoding = self._file_encoding or "utf-8"
495
+ content_bytes = safe_encode("\n".join(self.content_lines), encoding)
496
+ text = extract_text_slice(content_bytes, start_byte, end_byte, encoding)
497
+
498
+ if text:
499
+ self._node_text_cache[node_id] = text
500
+ return text
501
+ except Exception as e:
502
+ log_debug(f"Error in _get_node_text: {e}")
503
+
504
+ # Fallback to line-based extraction
505
+ try:
506
+ start_point = node.start_point
507
+ end_point = node.end_point
508
+
509
+ if start_point[0] < 0 or start_point[0] >= len(self.content_lines):
510
+ return ""
511
+
512
+ if end_point[0] < 0 or end_point[0] >= len(self.content_lines):
513
+ return ""
514
+
515
+ if start_point[0] == end_point[0]:
516
+ line = self.content_lines[start_point[0]]
517
+ start_col = max(0, min(start_point[1], len(line)))
518
+ end_col = max(start_col, min(end_point[1], len(line)))
519
+ result: str = line[start_col:end_col]
520
+ self._node_text_cache[node_id] = result
521
+ return result
522
+ else:
523
+ lines = []
524
+ for i in range(
525
+ start_point[0], min(end_point[0] + 1, len(self.content_lines))
526
+ ):
527
+ if i < len(self.content_lines):
528
+ line = self.content_lines[i]
529
+ if i == start_point[0] and i == end_point[0]:
530
+ start_col = max(0, min(start_point[1], len(line)))
531
+ end_col = max(start_col, min(end_point[1], len(line)))
532
+ lines.append(line[start_col:end_col])
533
+ elif i == start_point[0]:
534
+ start_col = max(0, min(start_point[1], len(line)))
535
+ lines.append(line[start_col:])
536
+ elif i == end_point[0]:
537
+ end_col = max(0, min(end_point[1], len(line)))
538
+ lines.append(line[:end_col])
539
+ else:
540
+ lines.append(line)
541
+ result = "\n".join(lines)
542
+ self._node_text_cache[node_id] = result
543
+ return result
544
+ except Exception as fallback_error:
545
+ log_debug(f"Fallback text extraction also failed: {fallback_error}")
546
+ return ""
547
+
548
+ def _traverse_nodes(self, node: "tree_sitter.Node") -> Iterator["tree_sitter.Node"]:
549
+ """
550
+ Traverse tree nodes recursively in depth-first order.
551
+
552
+ Args:
553
+ node: Root node to start traversal from
554
+
555
+ Yields:
556
+ Each node in the tree, starting with the root node
557
+ """
558
+ yield node
559
+ if hasattr(node, "children"):
560
+ for child in node.children:
561
+ yield from self._traverse_nodes(child)
562
+
563
+ def _is_valid_identifier(self, name: str) -> bool:
564
+ """
565
+ Validate that a name is a valid SQL identifier.
566
+
567
+ This prevents accepting multi-line text or SQL statements as identifiers.
568
+ Also rejects common column names and SQL reserved keywords.
569
+
570
+ Args:
571
+ name: The identifier to validate
572
+
573
+ Returns:
574
+ True if the name is a valid identifier, False otherwise
575
+ """
576
+ if not name:
577
+ return False
578
+
579
+ # Reject if contains newlines or other control characters
580
+ if "\n" in name or "\r" in name or "\t" in name:
581
+ return False
582
+
583
+ # Reject if matches SQL statement patterns (keyword followed by space)
584
+ # This catches "CREATE TABLE" but allows "create_table" as an identifier
585
+ name_upper = name.upper()
586
+ sql_statement_patterns = [
587
+ "CREATE ",
588
+ "SELECT ",
589
+ "INSERT ",
590
+ "UPDATE ",
591
+ "DELETE ",
592
+ "DROP ",
593
+ "ALTER ",
594
+ "TABLE ",
595
+ "VIEW ",
596
+ "PROCEDURE ",
597
+ "FUNCTION ",
598
+ "TRIGGER ",
599
+ ]
600
+ if any(name_upper.startswith(pattern) for pattern in sql_statement_patterns):
601
+ return False
602
+
603
+ # Reject common column names that should never be function names
604
+ # These are typical column names that might appear in SELECT statements
605
+ common_column_names = {
606
+ "PRICE",
607
+ "QUANTITY",
608
+ "TOTAL",
609
+ "AMOUNT",
610
+ "COUNT",
611
+ "SUM",
612
+ "CREATED_AT",
613
+ "UPDATED_AT",
614
+ "ID",
615
+ "NAME",
616
+ "EMAIL",
617
+ "STATUS",
618
+ "VALUE",
619
+ "DATE",
620
+ "TIME",
621
+ "TIMESTAMP",
622
+ "USER_ID",
623
+ "ORDER_ID",
624
+ "PRODUCT_ID",
625
+ }
626
+ if name_upper in common_column_names:
627
+ return False
628
+
629
+ # Reject common SQL keywords that should never be identifiers
630
+ sql_keywords = {
631
+ "SELECT",
632
+ "FROM",
633
+ "WHERE",
634
+ "AS",
635
+ "IF",
636
+ "NOT",
637
+ "EXISTS",
638
+ "NULL",
639
+ "CURRENT_TIMESTAMP",
640
+ "NOW",
641
+ "SYSDATE",
642
+ "AVG",
643
+ "MAX",
644
+ "MIN",
645
+ "AND",
646
+ "OR",
647
+ "IN",
648
+ "LIKE",
649
+ "BETWEEN",
650
+ "JOIN",
651
+ "LEFT",
652
+ "RIGHT",
653
+ "INNER",
654
+ "OUTER",
655
+ "CROSS",
656
+ "ON",
657
+ "USING",
658
+ "GROUP",
659
+ "BY",
660
+ "ORDER",
661
+ "HAVING",
662
+ "LIMIT",
663
+ "OFFSET",
664
+ "DISTINCT",
665
+ "ALL",
666
+ "UNION",
667
+ "INTERSECT",
668
+ "EXCEPT",
669
+ "INSERT",
670
+ "UPDATE",
671
+ "DELETE",
672
+ "CREATE",
673
+ "DROP",
674
+ "ALTER",
675
+ "TABLE",
676
+ "VIEW",
677
+ "INDEX",
678
+ "TRIGGER",
679
+ "PROCEDURE",
680
+ "FUNCTION",
681
+ "PRIMARY",
682
+ "FOREIGN",
683
+ "KEY",
684
+ "UNIQUE",
685
+ "CHECK",
686
+ "DEFAULT",
687
+ "REFERENCES",
688
+ "CASCADE",
689
+ "RESTRICT",
690
+ "SET",
691
+ "NO",
692
+ "ACTION",
693
+ "INTO",
694
+ "VALUES",
695
+ "BEGIN",
696
+ "END",
697
+ "DECLARE",
698
+ "RETURN",
699
+ "RETURNS",
700
+ "READS",
701
+ "SQL",
702
+ "DATA",
703
+ "DETERMINISTIC",
704
+ "BEFORE",
705
+ "AFTER",
706
+ "EACH",
707
+ "ROW",
708
+ "FOR",
709
+ "COALESCE",
710
+ "CASE",
711
+ "WHEN",
712
+ "THEN",
713
+ "ELSE",
714
+ }
715
+ if name_upper in sql_keywords:
716
+ return False
717
+
718
+ # Reject if contains parentheses (like "users (" or "(id")
719
+ if "(" in name or ")" in name:
720
+ return False
721
+
722
+ # Reject if too long (identifiers should be reasonable length)
723
+ if len(name) > 128:
724
+ return False
725
+
726
+ # Accept if it matches standard identifier pattern
727
+ import re
728
+
729
+ # Allow alphanumeric, underscore, and some special chars used in SQL identifiers
730
+ if re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", name):
731
+ return True
732
+
733
+ # Also allow quoted identifiers (backticks, double quotes, square brackets)
734
+ if re.match(r'^[`"\[].*[`"\]]$', name):
735
+ return True
736
+
737
+ return False
738
+
739
+ def _extract_tables(
740
+ self, root_node: "tree_sitter.Node", classes: list[Class]
741
+ ) -> None:
742
+ """
743
+ Extract CREATE TABLE statements from SQL AST.
744
+
745
+ Searches for create_table nodes and identifies table names from
746
+ object_reference.identifier, supporting both simple identifiers
747
+ and qualified names (schema.table).
748
+
749
+ Args:
750
+ root_node: Root node of the SQL AST
751
+ classes: List to append extracted table Class elements to
752
+ """
753
+ for node in self._traverse_nodes(root_node):
754
+ if node.type == "create_table":
755
+ # Look for object_reference within create_table
756
+ table_name = None
757
+ for child in node.children:
758
+ if child.type == "object_reference":
759
+ # object_reference contains identifier
760
+ for subchild in child.children:
761
+ if subchild.type == "identifier":
762
+ table_name = self._get_node_text(subchild).strip()
763
+ # Validate table name
764
+ if table_name and self._is_valid_identifier(table_name):
765
+ break
766
+ else:
767
+ table_name = None
768
+ if table_name:
769
+ break
770
+
771
+ if table_name:
772
+ try:
773
+ start_line = node.start_point[0] + 1
774
+ end_line = node.end_point[0] + 1
775
+ raw_text = self._get_node_text(node)
776
+
777
+ cls = Class(
778
+ name=table_name,
779
+ start_line=start_line,
780
+ end_line=end_line,
781
+ raw_text=raw_text,
782
+ language="sql",
783
+ )
784
+ classes.append(cls)
785
+ except Exception as e:
786
+ log_debug(f"Failed to extract table: {e}")
787
+
788
+ def _extract_views(
789
+ self, root_node: "tree_sitter.Node", classes: list[Class]
790
+ ) -> None:
791
+ """
792
+ Extract CREATE VIEW statements from SQL AST.
793
+
794
+ Searches for create_view nodes and extracts view names from
795
+ object_reference.identifier, supporting qualified names.
796
+
797
+ Args:
798
+ root_node: Root node of the SQL AST
799
+ classes: List to append extracted view Class elements to
800
+ """
801
+ import re
802
+
803
+ for node in self._traverse_nodes(root_node):
804
+ if node.type == "create_view":
805
+ # Get raw text first for fallback regex
806
+ raw_text = self._get_node_text(node)
807
+ view_name = None
808
+
809
+ # FIRST: Try regex parsing (most reliable for CREATE VIEW)
810
+ if raw_text:
811
+ # Pattern: CREATE VIEW [IF NOT EXISTS] view_name AS
812
+ match = re.search(
813
+ r"CREATE\s+VIEW\s+(?:IF\s+NOT\s+EXISTS\s+)?(\w+)\s+AS",
814
+ raw_text,
815
+ re.IGNORECASE,
816
+ )
817
+ if match:
818
+ potential_name = match.group(1).strip()
819
+ if self._is_valid_identifier(potential_name):
820
+ view_name = potential_name
821
+
822
+ # Fallback: Try AST parsing if regex didn't work
823
+ if not view_name:
824
+ for child in node.children:
825
+ if child.type == "object_reference":
826
+ # object_reference contains identifier
827
+ for subchild in child.children:
828
+ if subchild.type == "identifier":
829
+ potential_name = self._get_node_text(subchild)
830
+ if potential_name:
831
+ potential_name = potential_name.strip()
832
+ # Validate view name - exclude SQL keywords
833
+ if (
834
+ potential_name
835
+ and self._is_valid_identifier(
836
+ potential_name
837
+ )
838
+ and potential_name.upper()
839
+ not in (
840
+ "SELECT",
841
+ "FROM",
842
+ "WHERE",
843
+ "AS",
844
+ "IF",
845
+ "NOT",
846
+ "EXISTS",
847
+ "NULL",
848
+ "CURRENT_TIMESTAMP",
849
+ "NOW",
850
+ "SYSDATE",
851
+ )
852
+ ):
853
+ view_name = potential_name
854
+ break
855
+ if view_name:
856
+ break
857
+
858
+ if view_name:
859
+ try:
860
+ start_line = node.start_point[0] + 1
861
+ end_line = node.end_point[0] + 1
862
+
863
+ # Fix for truncated view definitions (single-line misparsing)
864
+ # When tree-sitter misparses a view as a single line (e.g. lines 47-47),
865
+ # we need to expand the range to include the actual query definition.
866
+ # We look for the next semicolon or empty line to find the true end.
867
+ if start_line == end_line and self.source_code:
868
+ # This logic is similar to the recovery logic in _validate_and_fix_elements
869
+ # Find where the view definition actually ends
870
+ current_line_idx = start_line - 1
871
+
872
+ # Scan forward for semicolon to find end of statement
873
+ found_end = False
874
+ for i in range(current_line_idx, len(self.content_lines)):
875
+ line = self.content_lines[i]
876
+ if ";" in line:
877
+ end_line = i + 1
878
+ found_end = True
879
+ break
880
+
881
+ # If no semicolon found within reasonable range, use a fallback
882
+ if not found_end:
883
+ # Look for empty line as separator or next CREATE statement
884
+ for i in range(
885
+ current_line_idx + 1,
886
+ min(len(self.content_lines), current_line_idx + 50),
887
+ ):
888
+ line = self.content_lines[i].strip()
889
+ if not line or line.upper().startswith("CREATE "):
890
+ end_line = i # End before this line
891
+ found_end = True
892
+ break
893
+
894
+ # Update raw_text to cover the full range
895
+ # Re-extract text for the corrected range
896
+ if found_end and end_line > start_line:
897
+ raw_text = "\n".join(
898
+ self.content_lines[current_line_idx:end_line]
899
+ )
900
+ log_debug(
901
+ f"Corrected view span for {view_name}: {start_line}-{end_line}"
902
+ )
903
+
904
+ cls = Class(
905
+ name=view_name,
906
+ start_line=start_line,
907
+ end_line=end_line,
908
+ raw_text=raw_text,
909
+ language="sql",
910
+ )
911
+ classes.append(cls)
912
+ except Exception as e:
913
+ log_debug(f"Failed to extract view: {e}")
914
+
915
+ def _extract_procedures(
916
+ self, root_node: "tree_sitter.Node", functions: list[Function]
917
+ ) -> None:
918
+ """
919
+ Extract CREATE PROCEDURE statements from SQL AST.
920
+
921
+ Since tree-sitter-sql doesn't fully support PROCEDURE syntax, these
922
+ appear as ERROR nodes. The PROCEDURE keyword is not tokenized, so we
923
+ need to check the raw text content of ERROR nodes that contain
924
+ keyword_create and look for "PROCEDURE" in the text.
925
+
926
+ Args:
927
+ root_node: Root node of the SQL AST
928
+ functions: List to append extracted procedure Function elements to
929
+ """
930
+ for node in self._traverse_nodes(root_node):
931
+ if node.type == "ERROR":
932
+ # Check if this ERROR node contains CREATE and PROCEDURE in text
933
+ has_create = False
934
+ node_text = self._get_node_text(node)
935
+ node_text_upper = node_text.upper()
936
+
937
+ # Look for keyword_create child
938
+ for child in node.children:
939
+ if child.type == "keyword_create":
940
+ has_create = True
941
+ break
942
+
943
+ # Check if the text contains PROCEDURE
944
+ if has_create and "PROCEDURE" in node_text_upper:
945
+ # Extract procedure name from the text (preserve original case)
946
+ # Use finditer to find ALL procedures in the ERROR node
947
+ import re
948
+
949
+ matches = re.finditer(
950
+ r"CREATE\s+PROCEDURE\s+([a-zA-Z_][a-zA-Z0-9_]*)",
951
+ node_text,
952
+ re.IGNORECASE,
953
+ )
954
+
955
+ for match in matches:
956
+ proc_name = match.group(1)
957
+
958
+ if proc_name:
959
+ try:
960
+ # Calculate start line based on match position
961
+ newlines_before = node_text[: match.start()].count("\n")
962
+ start_line = node.start_point[0] + 1 + newlines_before
963
+ end_line = node.end_point[0] + 1
964
+
965
+ # Use specific text for this procedure if possible,
966
+ # but for legacy extraction we often just use the whole node text
967
+ # or we could slice it. For now, keeping whole node text is safer for legacy
968
+ raw_text = self._get_node_text(node)
969
+
970
+ func = Function(
971
+ name=proc_name,
972
+ start_line=start_line,
973
+ end_line=end_line,
974
+ raw_text=raw_text,
975
+ language="sql",
976
+ )
977
+ functions.append(func)
978
+ except Exception as e:
979
+ log_debug(f"Failed to extract procedure: {e}")
980
+
981
+ def _extract_sql_functions(
982
+ self, root_node: "tree_sitter.Node", functions: list[Function]
983
+ ) -> None:
984
+ """
985
+ Extract CREATE FUNCTION statements from SQL AST.
986
+
987
+ Functions are properly parsed as create_function nodes, so we search
988
+ for these nodes and extract the function name from object_reference > identifier.
989
+
990
+ Args:
991
+ root_node: Root node of the SQL AST
992
+ functions: List to append extracted function Function elements to
993
+ """
994
+ for node in self._traverse_nodes(root_node):
995
+ if node.type == "create_function":
996
+ func_name = None
997
+ # Only use the FIRST object_reference as the function name
998
+ for child in node.children:
999
+ if child.type == "object_reference":
1000
+ # Only process the first object_reference
1001
+ for subchild in child.children:
1002
+ if subchild.type == "identifier":
1003
+ func_name = self._get_node_text(subchild).strip()
1004
+ if func_name and self._is_valid_identifier(func_name):
1005
+ break
1006
+ else:
1007
+ func_name = None
1008
+ break # Stop after first object_reference
1009
+
1010
+ # Fallback: Parse from raw text if AST parsing failed or returned invalid name
1011
+ if not func_name:
1012
+ raw_text = self._get_node_text(node)
1013
+ import re
1014
+
1015
+ match = re.search(
1016
+ r"CREATE\s+FUNCTION\s+(\w+)\s*\(", raw_text, re.IGNORECASE
1017
+ )
1018
+ if match:
1019
+ potential_name = match.group(1).strip()
1020
+ if self._is_valid_identifier(potential_name):
1021
+ func_name = potential_name
1022
+
1023
+ if func_name:
1024
+ try:
1025
+ start_line = node.start_point[0] + 1
1026
+ end_line = node.end_point[0] + 1
1027
+ raw_text = self._get_node_text(node)
1028
+ func = Function(
1029
+ name=func_name,
1030
+ start_line=start_line,
1031
+ end_line=end_line,
1032
+ raw_text=raw_text,
1033
+ language="sql",
1034
+ )
1035
+ functions.append(func)
1036
+ except Exception as e:
1037
+ log_debug(f"Failed to extract function: {e}")
1038
+
1039
+ def _extract_triggers(
1040
+ self, root_node: "tree_sitter.Node", functions: list[Function]
1041
+ ) -> None:
1042
+ """
1043
+ Extract CREATE TRIGGER statements from SQL AST.
1044
+
1045
+ Since tree-sitter-sql doesn't fully support TRIGGER syntax, these
1046
+ appear as ERROR nodes. We search for ERROR nodes containing both
1047
+ keyword_create and keyword_trigger, then extract the trigger name
1048
+ from the first object_reference > identifier that appears after
1049
+ keyword_trigger.
1050
+
1051
+ Args:
1052
+ root_node: Root node of the SQL AST
1053
+ functions: List to append extracted trigger Function elements to
1054
+ """
1055
+ for node in self._traverse_nodes(root_node):
1056
+ if node.type == "ERROR":
1057
+ # Check if this ERROR node contains CREATE TRIGGER
1058
+ # Since multiple triggers might be lumped into one ERROR node,
1059
+ # we need to scan all children or use regex.
1060
+ # Using regex on the node text is more robust for ERROR nodes.
1061
+
1062
+ node_text = self._get_node_text(node)
1063
+ if not node_text:
1064
+ continue
1065
+
1066
+ node_text_upper = node_text.upper()
1067
+ if "CREATE" in node_text_upper and "TRIGGER" in node_text_upper:
1068
+ import re
1069
+
1070
+ # Regex to find CREATE TRIGGER statements
1071
+ # Matches: CREATE TRIGGER [IF NOT EXISTS] trigger_name
1072
+ matches = re.finditer(
1073
+ r"CREATE\s+TRIGGER\s+(?:IF\s+NOT\s+EXISTS\s+)?([a-zA-Z_][a-zA-Z0-9_]*)",
1074
+ node_text,
1075
+ re.IGNORECASE,
1076
+ )
1077
+
1078
+ for match in matches:
1079
+ trigger_name = match.group(1)
1080
+
1081
+ if trigger_name and self._is_valid_identifier(trigger_name):
1082
+ # Skip common SQL keywords
1083
+ if trigger_name.upper() in (
1084
+ "KEY",
1085
+ "AUTO_INCREMENT",
1086
+ "PRIMARY",
1087
+ "FOREIGN",
1088
+ "INDEX",
1089
+ "UNIQUE",
1090
+ "PRICE",
1091
+ "QUANTITY",
1092
+ "TOTAL",
1093
+ "SUM",
1094
+ "COUNT",
1095
+ "AVG",
1096
+ "MAX",
1097
+ "MIN",
1098
+ "CONSTRAINT",
1099
+ "CHECK",
1100
+ "DEFAULT",
1101
+ "REFERENCES",
1102
+ "ON",
1103
+ "UPDATE",
1104
+ "DELETE",
1105
+ "INSERT",
1106
+ "BEFORE",
1107
+ "AFTER",
1108
+ "INSTEAD",
1109
+ "OF",
1110
+ ):
1111
+ continue
1112
+
1113
+ try:
1114
+ # Calculate start line based on match position
1115
+ newlines_before = node_text[: match.start()].count("\n")
1116
+ start_line = node.start_point[0] + 1 + newlines_before
1117
+ end_line = node.end_point[0] + 1
1118
+
1119
+ # Use the whole error node text as raw text for now
1120
+ raw_text = node_text
1121
+
1122
+ func = Function(
1123
+ name=trigger_name,
1124
+ start_line=start_line,
1125
+ end_line=end_line,
1126
+ raw_text=raw_text,
1127
+ language="sql",
1128
+ )
1129
+ functions.append(func)
1130
+ except Exception as e:
1131
+ log_debug(f"Failed to extract trigger: {e}")
1132
+
1133
+ def _extract_indexes(
1134
+ self, root_node: "tree_sitter.Node", variables: list[Variable]
1135
+ ) -> None:
1136
+ """
1137
+ Extract CREATE INDEX statements from SQL AST.
1138
+
1139
+ Searches for create_index nodes and extracts index names from
1140
+ identifier child nodes.
1141
+
1142
+ Args:
1143
+ root_node: Root node of the SQL AST
1144
+ variables: List to append extracted index Variable elements to
1145
+ """
1146
+ for node in self._traverse_nodes(root_node):
1147
+ if node.type == "create_index":
1148
+ # Index name is directly in identifier child
1149
+ index_name = None
1150
+ for child in node.children:
1151
+ if child.type == "identifier":
1152
+ index_name = self._get_node_text(child).strip()
1153
+ break
1154
+
1155
+ if index_name:
1156
+ try:
1157
+ start_line = node.start_point[0] + 1
1158
+ end_line = node.end_point[0] + 1
1159
+ raw_text = self._get_node_text(node)
1160
+
1161
+ var = Variable(
1162
+ name=index_name,
1163
+ start_line=start_line,
1164
+ end_line=end_line,
1165
+ raw_text=raw_text,
1166
+ language="sql",
1167
+ )
1168
+ variables.append(var)
1169
+ except Exception as e:
1170
+ log_debug(f"Failed to extract index: {e}")
1171
+
1172
+ def _extract_schema_references(
1173
+ self, root_node: "tree_sitter.Node", imports: list[Import]
1174
+ ) -> None:
1175
+ """Extract schema references (e.g., FROM schema.table)."""
1176
+ # This is a simplified implementation
1177
+ # In a full implementation, we would extract schema.table references
1178
+ # For now, we'll extract qualified names that might represent schema references
1179
+ for node in self._traverse_nodes(root_node):
1180
+ if node.type == "qualified_name":
1181
+ # Check if this looks like a schema reference
1182
+ text = self._get_node_text(node)
1183
+ if "." in text and len(text.split(".")) == 2:
1184
+ try:
1185
+ start_line = node.start_point[0] + 1
1186
+ end_line = node.end_point[0] + 1
1187
+ raw_text = text
1188
+
1189
+ imp = Import(
1190
+ name=text,
1191
+ start_line=start_line,
1192
+ end_line=end_line,
1193
+ raw_text=raw_text,
1194
+ language="sql",
1195
+ )
1196
+ imports.append(imp)
1197
+ except Exception as e:
1198
+ log_debug(f"Failed to extract schema reference: {e}")
1199
+
1200
+ def _extract_sql_tables(
1201
+ self, root_node: "tree_sitter.Node", sql_elements: list[SQLElement]
1202
+ ) -> None:
1203
+ """
1204
+ Extract CREATE TABLE statements with enhanced metadata.
1205
+
1206
+ Extracts table information including columns, data types, constraints,
1207
+ and dependencies for comprehensive table analysis.
1208
+ """
1209
+ for node in self._traverse_nodes(root_node):
1210
+ if node.type == "create_table":
1211
+ table_name = None
1212
+ columns = []
1213
+ constraints = []
1214
+
1215
+ # Extract table name
1216
+ for child in node.children:
1217
+ if child.type == "object_reference":
1218
+ for subchild in child.children:
1219
+ if subchild.type == "identifier":
1220
+ table_name = self._get_node_text(subchild).strip()
1221
+ # Validate table name - should be a simple identifier
1222
+ if table_name and self._is_valid_identifier(table_name):
1223
+ break
1224
+ else:
1225
+ table_name = None
1226
+ if table_name:
1227
+ break
1228
+
1229
+ # Extract column definitions
1230
+ self._extract_table_columns(node, columns, constraints)
1231
+
1232
+ if table_name:
1233
+ try:
1234
+ start_line = node.start_point[0] + 1
1235
+ end_line = node.end_point[0] + 1
1236
+ raw_text = self._get_node_text(node)
1237
+
1238
+ table = SQLTable(
1239
+ name=table_name,
1240
+ start_line=start_line,
1241
+ end_line=end_line,
1242
+ raw_text=raw_text,
1243
+ language="sql",
1244
+ columns=columns,
1245
+ constraints=constraints,
1246
+ )
1247
+ sql_elements.append(table)
1248
+ except Exception as e:
1249
+ log_debug(f"Failed to extract enhanced table: {e}")
1250
+
1251
+ def _extract_table_columns(
1252
+ self,
1253
+ table_node: "tree_sitter.Node",
1254
+ columns: list[SQLColumn],
1255
+ constraints: list[SQLConstraint],
1256
+ ) -> None:
1257
+ """Extract column definitions from CREATE TABLE statement."""
1258
+ # Use a more robust approach to extract columns
1259
+ table_text = self._get_node_text(table_node)
1260
+
1261
+ # Parse the table definition using regex as fallback
1262
+ import re
1263
+
1264
+ # Extract the content between parentheses
1265
+ table_content_match = re.search(
1266
+ r"\(\s*(.*?)\s*\)(?:\s*;)?$", table_text, re.DOTALL
1267
+ )
1268
+ if table_content_match:
1269
+ table_content = table_content_match.group(1)
1270
+
1271
+ # Split by commas, but be careful with nested parentheses
1272
+ column_definitions = self._split_column_definitions(table_content)
1273
+
1274
+ for col_def in column_definitions:
1275
+ col_def = col_def.strip()
1276
+ if not col_def or col_def.upper().startswith(
1277
+ ("PRIMARY KEY", "FOREIGN KEY", "UNIQUE", "INDEX", "KEY")
1278
+ ):
1279
+ continue
1280
+
1281
+ # Parse individual column definition
1282
+ column = self._parse_column_definition(col_def)
1283
+ if column:
1284
+ columns.append(column)
1285
+
1286
+ # Also try tree-sitter approach as backup
1287
+ for node in self._traverse_nodes(table_node):
1288
+ if node.type == "column_definition":
1289
+ column_name = None
1290
+ data_type = None
1291
+ nullable = True
1292
+ is_primary_key = False
1293
+
1294
+ # Extract column name and type
1295
+ for child in node.children:
1296
+ if child.type == "identifier" and column_name is None:
1297
+ column_name = self._get_node_text(child).strip()
1298
+ elif child.type in ["data_type", "type_name"]:
1299
+ data_type = self._get_node_text(child).strip()
1300
+ elif (
1301
+ child.type == "not_null"
1302
+ or "NOT NULL" in self._get_node_text(child).upper()
1303
+ ):
1304
+ nullable = False
1305
+ elif (
1306
+ child.type == "primary_key"
1307
+ or "PRIMARY KEY" in self._get_node_text(child).upper()
1308
+ ):
1309
+ is_primary_key = True
1310
+
1311
+ if column_name and data_type:
1312
+ # Check if this column is already added by regex parsing
1313
+ existing_column = next(
1314
+ (c for c in columns if c.name == column_name), None
1315
+ )
1316
+ if not existing_column:
1317
+ column = SQLColumn(
1318
+ name=column_name,
1319
+ data_type=data_type,
1320
+ nullable=nullable,
1321
+ is_primary_key=is_primary_key,
1322
+ )
1323
+ columns.append(column)
1324
+
1325
+ def _split_column_definitions(self, content: str) -> list[str]:
1326
+ """Split column definitions by commas, handling nested parentheses."""
1327
+ definitions = []
1328
+ current_def = ""
1329
+ paren_count = 0
1330
+
1331
+ for char in content:
1332
+ if char == "(":
1333
+ paren_count += 1
1334
+ elif char == ")":
1335
+ paren_count -= 1
1336
+ elif char == "," and paren_count == 0:
1337
+ if current_def.strip():
1338
+ definitions.append(current_def.strip())
1339
+ current_def = ""
1340
+ continue
1341
+
1342
+ current_def += char
1343
+
1344
+ if current_def.strip():
1345
+ definitions.append(current_def.strip())
1346
+
1347
+ return definitions
1348
+
1349
+ def _parse_column_definition(self, col_def: str) -> SQLColumn | None:
1350
+ """Parse a single column definition string."""
1351
+ import re
1352
+
1353
+ # Basic pattern: column_name data_type [constraints]
1354
+ match = re.match(
1355
+ r"^\s*([a-zA-Z_][a-zA-Z0-9_]*)\s+([A-Z]+(?:\([^)]*\))?)",
1356
+ col_def,
1357
+ re.IGNORECASE,
1358
+ )
1359
+ if not match:
1360
+ return None
1361
+
1362
+ column_name = match.group(1)
1363
+ data_type = match.group(2)
1364
+
1365
+ # Check for constraints
1366
+ col_def_upper = col_def.upper()
1367
+ nullable = "NOT NULL" not in col_def_upper
1368
+ is_primary_key = (
1369
+ "PRIMARY KEY" in col_def_upper or "AUTO_INCREMENT" in col_def_upper
1370
+ )
1371
+ is_foreign_key = "REFERENCES" in col_def_upper
1372
+
1373
+ foreign_key_reference = None
1374
+ if is_foreign_key:
1375
+ ref_match = re.search(
1376
+ r"REFERENCES\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(([^)]+)\)",
1377
+ col_def,
1378
+ re.IGNORECASE,
1379
+ )
1380
+ if ref_match:
1381
+ foreign_key_reference = f"{ref_match.group(1)}({ref_match.group(2)})"
1382
+
1383
+ return SQLColumn(
1384
+ name=column_name,
1385
+ data_type=data_type,
1386
+ nullable=nullable,
1387
+ is_primary_key=is_primary_key,
1388
+ is_foreign_key=is_foreign_key,
1389
+ foreign_key_reference=foreign_key_reference,
1390
+ )
1391
+
1392
+ def _extract_sql_views(
1393
+ self, root_node: "tree_sitter.Node", sql_elements: list[SQLElement]
1394
+ ) -> None:
1395
+ """Extract CREATE VIEW statements with enhanced metadata."""
1396
+ for node in self._traverse_nodes(root_node):
1397
+ if node.type == "ERROR":
1398
+ # Handle views inside ERROR nodes (common in some environments)
1399
+ raw_text = self._get_node_text(node)
1400
+ if not raw_text:
1401
+ continue
1402
+
1403
+ import re
1404
+
1405
+ # Find all views in this error node
1406
+ view_matches = re.finditer(
1407
+ r"CREATE\s+VIEW\s+(?:IF\s+NOT\s+EXISTS\s+)?(\w+)\s+AS",
1408
+ raw_text,
1409
+ re.IGNORECASE,
1410
+ )
1411
+
1412
+ for match in view_matches:
1413
+ view_name = match.group(1).strip()
1414
+ if not self._is_valid_identifier(view_name):
1415
+ continue
1416
+
1417
+ # Avoid duplicates
1418
+ if any(
1419
+ e.name == view_name and isinstance(e, SQLView)
1420
+ for e in sql_elements
1421
+ ):
1422
+ continue
1423
+
1424
+ start_line = node.start_point[0] + 1
1425
+ end_line = node.end_point[0] + 1
1426
+
1427
+ # Extract source tables from context following the view definition
1428
+ view_context = raw_text[match.end() :]
1429
+ semicolon_match = re.search(r";", view_context)
1430
+ if semicolon_match:
1431
+ view_context = view_context[: semicolon_match.end()]
1432
+
1433
+ source_tables = []
1434
+ # Simple extraction for source tables
1435
+ table_matches = re.findall(
1436
+ r"(?:FROM|JOIN)\s+([a-zA-Z_][a-zA-Z0-9_]*)",
1437
+ view_context,
1438
+ re.IGNORECASE,
1439
+ )
1440
+ source_tables.extend(table_matches)
1441
+
1442
+ view = SQLView(
1443
+ name=view_name,
1444
+ start_line=start_line,
1445
+ end_line=end_line,
1446
+ raw_text=f"CREATE VIEW {view_name} ...",
1447
+ language="sql",
1448
+ source_tables=sorted(set(source_tables)),
1449
+ dependencies=sorted(set(source_tables)),
1450
+ )
1451
+ sql_elements.append(view)
1452
+
1453
+ elif node.type == "create_view":
1454
+ view_name = None
1455
+ source_tables = []
1456
+
1457
+ # Get raw text for regex parsing
1458
+ raw_text = self._get_node_text(node)
1459
+
1460
+ # FIRST: Try regex parsing (most reliable for CREATE VIEW)
1461
+ if raw_text:
1462
+ # Pattern: CREATE VIEW [IF NOT EXISTS] view_name AS
1463
+ import re
1464
+
1465
+ match = re.search(
1466
+ r"CREATE\s+VIEW\s+(?:IF\s+NOT\s+EXISTS\s+)?(\w+)\s+AS",
1467
+ raw_text,
1468
+ re.IGNORECASE,
1469
+ )
1470
+ if match:
1471
+ potential_name = match.group(1).strip()
1472
+ if self._is_valid_identifier(potential_name):
1473
+ view_name = potential_name
1474
+
1475
+ # Fallback: Try AST parsing if regex didn't work
1476
+ if not view_name:
1477
+ for child in node.children:
1478
+ if child.type == "object_reference":
1479
+ for subchild in child.children:
1480
+ if subchild.type == "identifier":
1481
+ potential_name = self._get_node_text(
1482
+ subchild
1483
+ ).strip()
1484
+ # Validate view name more strictly - exclude SQL keywords
1485
+ if (
1486
+ potential_name
1487
+ and self._is_valid_identifier(potential_name)
1488
+ and potential_name.upper()
1489
+ not in (
1490
+ "SELECT",
1491
+ "FROM",
1492
+ "WHERE",
1493
+ "AS",
1494
+ "IF",
1495
+ "NOT",
1496
+ "EXISTS",
1497
+ "NULL",
1498
+ "CURRENT_TIMESTAMP",
1499
+ "NOW",
1500
+ "SYSDATE",
1501
+ "COUNT",
1502
+ "SUM",
1503
+ "AVG",
1504
+ "MAX",
1505
+ "MIN",
1506
+ )
1507
+ ):
1508
+ view_name = potential_name
1509
+ break
1510
+ if view_name:
1511
+ break
1512
+
1513
+ # Extract source tables from SELECT statement
1514
+ self._extract_view_sources(node, source_tables)
1515
+
1516
+ if view_name:
1517
+ try:
1518
+ start_line = node.start_point[0] + 1
1519
+ end_line = node.end_point[0] + 1
1520
+ raw_text = self._get_node_text(node)
1521
+
1522
+ view = SQLView(
1523
+ name=view_name,
1524
+ start_line=start_line,
1525
+ end_line=end_line,
1526
+ raw_text=raw_text,
1527
+ language="sql",
1528
+ source_tables=source_tables,
1529
+ dependencies=source_tables,
1530
+ )
1531
+ sql_elements.append(view)
1532
+ except Exception as e:
1533
+ log_debug(f"Failed to extract enhanced view: {e}")
1534
+
1535
+ def _extract_view_sources(
1536
+ self, view_node: "tree_sitter.Node", source_tables: list[str]
1537
+ ) -> None:
1538
+ """Extract source tables from view definition."""
1539
+ for node in self._traverse_nodes(view_node):
1540
+ if node.type == "from_clause":
1541
+ for child in self._traverse_nodes(node):
1542
+ if child.type == "object_reference":
1543
+ for subchild in child.children:
1544
+ if subchild.type == "identifier":
1545
+ table_name = self._get_node_text(subchild).strip()
1546
+ if table_name and table_name not in source_tables:
1547
+ source_tables.append(table_name)
1548
+
1549
+ def _extract_sql_procedures(
1550
+ self, root_node: "tree_sitter.Node", sql_elements: list[SQLElement]
1551
+ ) -> None:
1552
+ """Extract CREATE PROCEDURE statements with enhanced metadata."""
1553
+ # Use regex-based approach to find all procedures in the source code
1554
+ import re
1555
+
1556
+ lines = self.source_code.split("\n")
1557
+
1558
+ # Pattern to match CREATE PROCEDURE statements
1559
+ procedure_pattern = re.compile(
1560
+ r"^\s*CREATE\s+PROCEDURE\s+([a-zA-Z_][a-zA-Z0-9_]*)",
1561
+ re.IGNORECASE | re.MULTILINE,
1562
+ )
1563
+
1564
+ i = 0
1565
+ while i < len(lines):
1566
+ line = lines[i].strip()
1567
+ if line.upper().startswith("CREATE") and "PROCEDURE" in line.upper():
1568
+ match = procedure_pattern.match(lines[i])
1569
+ if match:
1570
+ proc_name = match.group(1)
1571
+ start_line = i + 1
1572
+
1573
+ # Find the end of the procedure (look for END; or END$$)
1574
+ end_line = start_line
1575
+ for j in range(i + 1, len(lines)):
1576
+ if lines[j].strip().upper() in ["END;", "END$$", "END"]:
1577
+ end_line = j + 1
1578
+ break
1579
+ elif lines[j].strip().upper().startswith("END;"):
1580
+ end_line = j + 1
1581
+ break
1582
+
1583
+ # Extract the full procedure text
1584
+ proc_lines = lines[i:end_line]
1585
+ raw_text = "\n".join(proc_lines)
1586
+
1587
+ parameters = []
1588
+ dependencies = []
1589
+
1590
+ # Extract parameters and dependencies from the text
1591
+ self._extract_procedure_parameters(raw_text, parameters)
1592
+
1593
+ try:
1594
+ procedure = SQLProcedure(
1595
+ name=proc_name,
1596
+ start_line=start_line,
1597
+ end_line=end_line,
1598
+ raw_text=raw_text,
1599
+ language="sql",
1600
+ parameters=parameters,
1601
+ dependencies=dependencies,
1602
+ )
1603
+ sql_elements.append(procedure)
1604
+ log_debug(
1605
+ f"Extracted procedure: {proc_name} at lines {start_line}-{end_line}"
1606
+ )
1607
+ except Exception as e:
1608
+ log_debug(f"Failed to extract enhanced procedure: {e}")
1609
+
1610
+ i = end_line
1611
+ else:
1612
+ i += 1
1613
+ else:
1614
+ i += 1
1615
+
1616
+ # Also try the original tree-sitter approach as fallback
1617
+ for node in self._traverse_nodes(root_node):
1618
+ if node.type == "ERROR":
1619
+ has_create = False
1620
+ node_text = self._get_node_text(node)
1621
+ node_text_upper = node_text.upper()
1622
+
1623
+ for child in node.children:
1624
+ if child.type == "keyword_create":
1625
+ has_create = True
1626
+ break
1627
+
1628
+ if has_create and "PROCEDURE" in node_text_upper:
1629
+ # Extract procedure name
1630
+ # Use finditer to find ALL procedures in the ERROR node
1631
+ matches = re.finditer(
1632
+ r"CREATE\s+PROCEDURE\s+([a-zA-Z_][a-zA-Z0-9_]*)",
1633
+ node_text,
1634
+ re.IGNORECASE,
1635
+ )
1636
+
1637
+ for match in matches:
1638
+ proc_name = match.group(1)
1639
+
1640
+ # Check if this procedure was already extracted by regex
1641
+ already_extracted = any(
1642
+ hasattr(elem, "name") and elem.name == proc_name
1643
+ for elem in sql_elements
1644
+ if hasattr(elem, "sql_element_type")
1645
+ and elem.sql_element_type.value == "procedure"
1646
+ )
1647
+
1648
+ if not already_extracted:
1649
+ # Extract parameters
1650
+ # Note: This extracts parameters from the WHOLE node text, which might be wrong
1651
+ # if there are multiple procedures. Ideally we should slice the text.
1652
+ # But _extract_procedure_parameters parses the whole text.
1653
+ # For now, we use the text starting from the match.
1654
+ current_proc_text = node_text[match.start() :]
1655
+
1656
+ # Reset parameters and dependencies for each procedure
1657
+ parameters = []
1658
+ dependencies = []
1659
+
1660
+ self._extract_procedure_parameters(
1661
+ current_proc_text, parameters
1662
+ )
1663
+
1664
+ # Extract dependencies (table references)
1665
+ # This still uses the whole node for dependencies, which is hard to fix without
1666
+ # proper parsing, but acceptable for fallback.
1667
+ self._extract_procedure_dependencies(node, dependencies)
1668
+
1669
+ try:
1670
+ # Calculate start line
1671
+ newlines_before = node_text[: match.start()].count("\n")
1672
+ start_line = node.start_point[0] + 1 + newlines_before
1673
+ end_line = node.end_point[0] + 1
1674
+
1675
+ # Use current_proc_text as raw_text
1676
+ raw_text = current_proc_text
1677
+
1678
+ procedure = SQLProcedure(
1679
+ name=proc_name,
1680
+ start_line=start_line,
1681
+ end_line=end_line,
1682
+ raw_text=raw_text,
1683
+ language="sql",
1684
+ parameters=parameters,
1685
+ dependencies=dependencies,
1686
+ )
1687
+ sql_elements.append(procedure)
1688
+ except Exception as e:
1689
+ log_debug(f"Failed to extract enhanced procedure: {e}")
1690
+
1691
+ def _extract_procedure_parameters(
1692
+ self, proc_text: str, parameters: list[SQLParameter]
1693
+ ) -> None:
1694
+ """Extract parameters from procedure definition."""
1695
+ import re
1696
+
1697
+ # First, extract the parameter section from the procedure/function definition
1698
+ # Look for the parameter list in parentheses after the procedure/function name
1699
+ param_section_match = re.search(
1700
+ r"(?:PROCEDURE|FUNCTION)\s+[a-zA-Z_][a-zA-Z0-9_]*\s*\(([^)]*)\)",
1701
+ proc_text,
1702
+ re.IGNORECASE | re.DOTALL,
1703
+ )
1704
+
1705
+ if not param_section_match:
1706
+ return
1707
+
1708
+ param_section = param_section_match.group(1).strip()
1709
+ if not param_section:
1710
+ return
1711
+
1712
+ # Look for parameter patterns like: IN param_name TYPE
1713
+ # Only search within the parameter section to avoid SQL statement content
1714
+ # Ensure IN/OUT/INOUT is followed by space to avoid ambiguity
1715
+ param_matches = re.findall(
1716
+ r"(?:(?:IN|OUT|INOUT)\s+)?([a-zA-Z_][a-zA-Z0-9_]*)\s+([A-Z]+(?:\([^)]*\))?)",
1717
+ param_section,
1718
+ re.IGNORECASE,
1719
+ )
1720
+ for match in param_matches:
1721
+ param_name = match[0]
1722
+ data_type = match[1]
1723
+
1724
+ # Skip common SQL keywords and column names that might be incorrectly matched
1725
+ if param_name.upper() in (
1726
+ "SELECT",
1727
+ "FROM",
1728
+ "WHERE",
1729
+ "INTO",
1730
+ "VALUES",
1731
+ "SET",
1732
+ "UPDATE",
1733
+ "INSERT",
1734
+ "DELETE",
1735
+ "CREATED_AT",
1736
+ "UPDATED_AT",
1737
+ "ID",
1738
+ "NAME",
1739
+ "EMAIL",
1740
+ "STATUS",
1741
+ "IN",
1742
+ "OUT",
1743
+ "INOUT",
1744
+ ):
1745
+ continue
1746
+
1747
+ # Determine direction from the original text
1748
+ direction = "IN" # Default
1749
+ if f"OUT {param_name}" in param_section:
1750
+ direction = "OUT"
1751
+ elif f"INOUT {param_name}" in param_section:
1752
+ direction = "INOUT"
1753
+
1754
+ parameter = SQLParameter(
1755
+ name=param_name,
1756
+ data_type=data_type,
1757
+ direction=direction,
1758
+ )
1759
+ parameters.append(parameter)
1760
+
1761
+ def _extract_procedure_dependencies(
1762
+ self, proc_node: "tree_sitter.Node", dependencies: list[str]
1763
+ ) -> None:
1764
+ """Extract table dependencies from procedure body."""
1765
+ for node in self._traverse_nodes(proc_node):
1766
+ if node.type == "object_reference":
1767
+ for child in node.children:
1768
+ if child.type == "identifier":
1769
+ table_name = self._get_node_text(child).strip()
1770
+ if table_name and table_name not in dependencies:
1771
+ # Simple heuristic: if it's referenced in FROM, UPDATE, INSERT, etc.
1772
+ dependencies.append(table_name)
1773
+
1774
+ def _extract_sql_functions_enhanced(
1775
+ self, root_node: "tree_sitter.Node", sql_elements: list[SQLElement]
1776
+ ) -> None:
1777
+ """Extract CREATE FUNCTION statements with enhanced metadata."""
1778
+ # Use regex-based approach to find all functions in the source code
1779
+ import re
1780
+
1781
+ lines = self.source_code.split("\n")
1782
+
1783
+ # Pattern to match CREATE FUNCTION statements - requires opening parenthesis
1784
+ function_pattern = re.compile(
1785
+ r"^\s*CREATE\s+FUNCTION\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(",
1786
+ re.IGNORECASE,
1787
+ )
1788
+
1789
+ i = 0
1790
+ inside_function = False
1791
+
1792
+ while i < len(lines):
1793
+ # Skip lines if we're inside a function body
1794
+ if inside_function:
1795
+ if lines[i].strip().upper() in ["END;", "END$"] or lines[
1796
+ i
1797
+ ].strip().upper().startswith("END;"):
1798
+ inside_function = False
1799
+ i += 1
1800
+ continue
1801
+
1802
+ # Only check for CREATE FUNCTION when not inside a function
1803
+ match = function_pattern.match(lines[i])
1804
+ if match:
1805
+ func_name = match.group(1)
1806
+
1807
+ # Validate the function name using the centralized validation method
1808
+ if not self._is_valid_identifier(func_name):
1809
+ i += 1
1810
+ continue
1811
+
1812
+ start_line = i + 1
1813
+ inside_function = True
1814
+
1815
+ # Find the end of the function (look for END; or END$$)
1816
+ end_line = start_line
1817
+ nesting_level = 0
1818
+
1819
+ for j in range(i + 1, len(lines)):
1820
+ line_stripped = lines[j].strip().upper()
1821
+
1822
+ # Skip comments to avoid false positives
1823
+ if line_stripped.startswith("--") or line_stripped.startswith("#"):
1824
+ continue
1825
+
1826
+ # Handle nesting of BEGIN ... END blocks
1827
+ # This is a heuristic: if we see BEGIN, we expect a matching END;
1828
+ # We use word boundaries to avoid matching BEGIN in other contexts if possible
1829
+ if re.search(r"\bBEGIN\b", line_stripped):
1830
+ nesting_level += 1
1831
+
1832
+ is_end = False
1833
+ if line_stripped in ["END;", "END$", "END"]:
1834
+ is_end = True
1835
+ elif line_stripped.startswith("END;"):
1836
+ is_end = True
1837
+
1838
+ if is_end:
1839
+ if nesting_level > 0:
1840
+ nesting_level -= 1
1841
+
1842
+ if nesting_level == 0:
1843
+ end_line = j + 1
1844
+ inside_function = False
1845
+ break
1846
+
1847
+ # Extract the full function text
1848
+ func_lines = lines[i:end_line]
1849
+ raw_text = "\n".join(func_lines)
1850
+
1851
+ parameters = []
1852
+ dependencies = []
1853
+ return_type = None
1854
+
1855
+ # Extract parameters, return type and dependencies from the text
1856
+ self._extract_procedure_parameters(raw_text, parameters)
1857
+
1858
+ # Extract return type
1859
+ returns_match = re.search(
1860
+ r"RETURNS\s+([A-Z]+(?:\([^)]*\))?)", raw_text, re.IGNORECASE
1861
+ )
1862
+ if returns_match:
1863
+ return_type = returns_match.group(1)
1864
+
1865
+ try:
1866
+ function = SQLFunction(
1867
+ name=func_name,
1868
+ start_line=start_line,
1869
+ end_line=end_line,
1870
+ raw_text=raw_text,
1871
+ language="sql",
1872
+ parameters=parameters,
1873
+ dependencies=dependencies,
1874
+ return_type=return_type,
1875
+ )
1876
+ sql_elements.append(function)
1877
+ log_debug(
1878
+ f"Extracted function: {func_name} at lines {start_line}-{end_line}"
1879
+ )
1880
+ except Exception as e:
1881
+ log_debug(f"Failed to extract enhanced function: {e}")
1882
+
1883
+ i = end_line
1884
+ else:
1885
+ i += 1
1886
+
1887
+ # Also try the original tree-sitter approach as fallback
1888
+ for node in self._traverse_nodes(root_node):
1889
+ if node.type == "create_function":
1890
+ func_name = None
1891
+ parameters = []
1892
+ return_type = None
1893
+ dependencies = []
1894
+
1895
+ # Extract function name - only from the FIRST object_reference child
1896
+ # This should be the function name, not references within the function body
1897
+ found_first_object_ref = False
1898
+ for child in node.children:
1899
+ if child.type == "object_reference" and not found_first_object_ref:
1900
+ found_first_object_ref = True
1901
+ for subchild in child.children:
1902
+ if subchild.type == "identifier":
1903
+ func_name = self._get_node_text(subchild).strip()
1904
+ # Validate function name using centralized validation
1905
+ if func_name and self._is_valid_identifier(func_name):
1906
+ break
1907
+ else:
1908
+ func_name = None
1909
+ if func_name:
1910
+ break
1911
+
1912
+ if func_name:
1913
+ # Check if this function was already extracted by regex
1914
+ already_extracted = any(
1915
+ hasattr(elem, "name") and elem.name == func_name
1916
+ for elem in sql_elements
1917
+ if hasattr(elem, "sql_element_type")
1918
+ and elem.sql_element_type.value == "function"
1919
+ )
1920
+
1921
+ if not already_extracted:
1922
+ # Extract return type and other metadata
1923
+ self._extract_function_metadata(
1924
+ node, parameters, return_type, dependencies
1925
+ )
1926
+
1927
+ try:
1928
+ start_line = node.start_point[0] + 1
1929
+ end_line = node.end_point[0] + 1
1930
+ raw_text = self._get_node_text(node)
1931
+
1932
+ function = SQLFunction(
1933
+ name=func_name,
1934
+ start_line=start_line,
1935
+ end_line=end_line,
1936
+ raw_text=raw_text,
1937
+ language="sql",
1938
+ parameters=parameters,
1939
+ dependencies=dependencies,
1940
+ return_type=return_type,
1941
+ )
1942
+ sql_elements.append(function)
1943
+ except Exception as e:
1944
+ log_debug(f"Failed to extract enhanced function: {e}")
1945
+
1946
+ def _extract_function_metadata(
1947
+ self,
1948
+ func_node: "tree_sitter.Node",
1949
+ parameters: list[SQLParameter],
1950
+ return_type: str | None,
1951
+ dependencies: list[str],
1952
+ ) -> None:
1953
+ """Extract function metadata including parameters and return type."""
1954
+ func_text = self._get_node_text(func_node)
1955
+
1956
+ # Extract return type
1957
+ import re
1958
+
1959
+ returns_match = re.search(
1960
+ r"RETURNS\s+([A-Z]+(?:\([^)]*\))?)", func_text, re.IGNORECASE
1961
+ )
1962
+ if returns_match:
1963
+ _return_type = returns_match.group(1) # Reserved for future use
1964
+
1965
+ # Extract parameters (similar to procedure parameters)
1966
+ self._extract_procedure_parameters(func_text, parameters)
1967
+
1968
+ # Extract dependencies
1969
+ self._extract_procedure_dependencies(func_node, dependencies)
1970
+
1971
+ def _extract_sql_triggers(
1972
+ self, root_node: "tree_sitter.Node", sql_elements: list[SQLElement]
1973
+ ) -> None:
1974
+ """Extract CREATE TRIGGER statements with enhanced metadata."""
1975
+ import re
1976
+
1977
+ # Use self.source_code which is set by parent method _extract_sql_elements
1978
+ # This is more reliable than _get_node_text(root_node) which may fail
1979
+ # on some platforms due to encoding or byte offset issues
1980
+ source_code = self.source_code
1981
+
1982
+ if not source_code:
1983
+ log_debug("WARNING: source_code is empty in _extract_sql_triggers")
1984
+ return
1985
+
1986
+ # Track processed triggers by name to avoid duplicates
1987
+ processed_triggers = set()
1988
+
1989
+ # Use regex on the full source to find all triggers with accurate positions
1990
+ trigger_pattern = re.compile(
1991
+ r"CREATE\s+TRIGGER\s+([a-zA-Z_][a-zA-Z0-9_]*)", re.IGNORECASE | re.MULTILINE
1992
+ )
1993
+
1994
+ trigger_matches = list(trigger_pattern.finditer(source_code))
1995
+ log_debug(f"Found {len(trigger_matches)} CREATE TRIGGER statements in source")
1996
+
1997
+ for match in trigger_matches:
1998
+ trigger_name = match.group(1)
1999
+
2000
+ # Skip if already processed
2001
+ if trigger_name in processed_triggers:
2002
+ continue
2003
+
2004
+ if not self._is_valid_identifier(trigger_name):
2005
+ continue
2006
+
2007
+ # Skip invalid trigger names (too short or common SQL keywords)
2008
+ if len(trigger_name) <= 2:
2009
+ continue
2010
+
2011
+ # Skip common SQL keywords that might be incorrectly identified
2012
+ if trigger_name.upper() in (
2013
+ "KEY",
2014
+ "AUTO_INCREMENT",
2015
+ "PRIMARY",
2016
+ "FOREIGN",
2017
+ "INDEX",
2018
+ "UNIQUE",
2019
+ ):
2020
+ continue
2021
+
2022
+ # Mark as processed
2023
+ processed_triggers.add(trigger_name)
2024
+
2025
+ # Calculate start line (1-indexed)
2026
+ start_line = source_code[: match.start()].count("\n") + 1
2027
+
2028
+ # Find the end of this trigger statement (looking for the END keyword followed by semicolon)
2029
+ trigger_start_pos = match.start()
2030
+ # Search for END; after the trigger definition
2031
+ end_pattern = re.compile(r"\bEND\s*;", re.IGNORECASE)
2032
+ end_match = end_pattern.search(source_code, trigger_start_pos)
2033
+
2034
+ if end_match:
2035
+ end_line = source_code[: end_match.end()].count("\n") + 1
2036
+ trigger_text = source_code[trigger_start_pos : end_match.end()]
2037
+ else:
2038
+ # Fallback: use a reasonable default
2039
+ end_line = start_line + 20
2040
+ trigger_text = source_code[trigger_start_pos : trigger_start_pos + 500]
2041
+
2042
+ # Extract trigger metadata from the extracted text
2043
+ trigger_timing, trigger_event, table_name = self._extract_trigger_metadata(
2044
+ trigger_text
2045
+ )
2046
+
2047
+ try:
2048
+ trigger = SQLTrigger(
2049
+ name=trigger_name,
2050
+ start_line=start_line,
2051
+ end_line=end_line,
2052
+ raw_text=trigger_text,
2053
+ language="sql",
2054
+ table_name=table_name,
2055
+ trigger_timing=trigger_timing,
2056
+ trigger_event=trigger_event,
2057
+ dependencies=[table_name] if table_name else [],
2058
+ )
2059
+ sql_elements.append(trigger)
2060
+ except Exception as e:
2061
+ log_debug(f"Failed to extract enhanced trigger: {e}")
2062
+
2063
+ def _extract_trigger_metadata(
2064
+ self,
2065
+ trigger_text: str,
2066
+ ) -> tuple[str | None, str | None, str | None]:
2067
+ """Extract trigger timing, event, and target table."""
2068
+ import re
2069
+
2070
+ timing = None
2071
+ event = None
2072
+ table_name = None
2073
+
2074
+ # Extract timing (BEFORE/AFTER)
2075
+ timing_match = re.search(r"(BEFORE|AFTER)", trigger_text, re.IGNORECASE)
2076
+ if timing_match:
2077
+ timing = timing_match.group(1).upper()
2078
+
2079
+ # Extract event (INSERT/UPDATE/DELETE)
2080
+ event_match = re.search(r"(INSERT|UPDATE|DELETE)", trigger_text, re.IGNORECASE)
2081
+ if event_match:
2082
+ event = event_match.group(1).upper()
2083
+
2084
+ # Extract target table
2085
+ table_match = re.search(
2086
+ r"ON\s+([a-zA-Z_][a-zA-Z0-9_]*)", trigger_text, re.IGNORECASE
2087
+ )
2088
+ if table_match:
2089
+ table_name = table_match.group(1)
2090
+
2091
+ return timing, event, table_name
2092
+
2093
+ def _extract_sql_indexes(
2094
+ self, root_node: "tree_sitter.Node", sql_elements: list[SQLElement]
2095
+ ) -> None:
2096
+ """Extract CREATE INDEX statements with enhanced metadata."""
2097
+ processed_indexes = set() # Track processed indexes to avoid duplicates
2098
+
2099
+ # First try tree-sitter parsing
2100
+ for node in self._traverse_nodes(root_node):
2101
+ if node.type == "create_index":
2102
+ index_name = None
2103
+
2104
+ # Use regex to extract index name from raw text for better accuracy
2105
+ import re
2106
+
2107
+ raw_text = self._get_node_text(node)
2108
+ # Pattern: CREATE [UNIQUE] INDEX index_name ON table_name
2109
+ index_pattern = re.search(
2110
+ r"CREATE\s+(?:UNIQUE\s+)?INDEX\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+ON",
2111
+ raw_text,
2112
+ re.IGNORECASE,
2113
+ )
2114
+ if index_pattern:
2115
+ extracted_name = index_pattern.group(1)
2116
+ # Validate index name
2117
+ if self._is_valid_identifier(extracted_name):
2118
+ index_name = extracted_name
2119
+
2120
+ if index_name and index_name not in processed_indexes:
2121
+ try:
2122
+ start_line = node.start_point[0] + 1
2123
+ end_line = node.end_point[0] + 1
2124
+ raw_text = self._get_node_text(node)
2125
+
2126
+ # Create index object first
2127
+ index = SQLIndex(
2128
+ name=index_name,
2129
+ start_line=start_line,
2130
+ end_line=end_line,
2131
+ raw_text=raw_text,
2132
+ language="sql",
2133
+ table_name=None,
2134
+ indexed_columns=[],
2135
+ is_unique=False,
2136
+ dependencies=[],
2137
+ )
2138
+
2139
+ # Extract metadata and populate the index object
2140
+ self._extract_index_metadata(node, index)
2141
+
2142
+ sql_elements.append(index)
2143
+ processed_indexes.add(index_name)
2144
+ log_debug(
2145
+ f"Extracted index: {index_name} on table {index.table_name}"
2146
+ )
2147
+ except Exception as e:
2148
+ log_debug(f"Failed to extract enhanced index {index_name}: {e}")
2149
+
2150
+ # Add regex-based fallback for indexes that tree-sitter might miss
2151
+ self._extract_indexes_with_regex(sql_elements, processed_indexes)
2152
+
2153
+ def _extract_index_metadata(
2154
+ self,
2155
+ index_node: "tree_sitter.Node",
2156
+ index: "SQLIndex",
2157
+ ) -> None:
2158
+ """Extract index metadata including target table and columns."""
2159
+ index_text = self._get_node_text(index_node)
2160
+
2161
+ # Check for UNIQUE keyword
2162
+ if "UNIQUE" in index_text.upper():
2163
+ index.is_unique = True
2164
+
2165
+ # Extract table name
2166
+ import re
2167
+
2168
+ table_match = re.search(
2169
+ r"ON\s+([a-zA-Z_][a-zA-Z0-9_]*)", index_text, re.IGNORECASE
2170
+ )
2171
+ if table_match:
2172
+ index.table_name = table_match.group(1)
2173
+ # Update dependencies
2174
+ if index.table_name and index.table_name not in index.dependencies:
2175
+ index.dependencies.append(index.table_name)
2176
+
2177
+ # Extract column names
2178
+ columns_match = re.search(r"\(([^)]+)\)", index_text)
2179
+ if columns_match:
2180
+ columns_str = columns_match.group(1)
2181
+ columns = [col.strip() for col in columns_str.split(",")]
2182
+ index.indexed_columns.extend(columns)
2183
+
2184
+ def _extract_indexes_with_regex(
2185
+ self, sql_elements: list[SQLElement], processed_indexes: set[str]
2186
+ ) -> None:
2187
+ """Extract CREATE INDEX statements using regex as fallback."""
2188
+ import re
2189
+
2190
+ # Split source code into lines for line number tracking
2191
+ lines = self.source_code.split("\n")
2192
+
2193
+ # Pattern to match CREATE INDEX statements
2194
+ index_pattern = re.compile(
2195
+ r"^\s*CREATE\s+(UNIQUE\s+)?INDEX\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+ON\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(([^)]+)\)",
2196
+ re.IGNORECASE | re.MULTILINE,
2197
+ )
2198
+
2199
+ for line_num, line in enumerate(lines, 1):
2200
+ line = line.strip()
2201
+ if not line.upper().startswith("CREATE") or "INDEX" not in line.upper():
2202
+ continue
2203
+
2204
+ match = index_pattern.match(line)
2205
+ if match:
2206
+ is_unique = match.group(1) is not None
2207
+ index_name = match.group(2)
2208
+ table_name = match.group(3)
2209
+ columns_str = match.group(4)
2210
+
2211
+ # Skip if already processed
2212
+ if index_name in processed_indexes:
2213
+ continue
2214
+
2215
+ # Parse columns
2216
+ columns = [col.strip() for col in columns_str.split(",")]
2217
+
2218
+ try:
2219
+ index = SQLIndex(
2220
+ name=index_name,
2221
+ start_line=line_num,
2222
+ end_line=line_num,
2223
+ raw_text=line,
2224
+ language="sql",
2225
+ table_name=table_name,
2226
+ indexed_columns=columns,
2227
+ is_unique=is_unique,
2228
+ dependencies=[table_name] if table_name else [],
2229
+ )
2230
+
2231
+ sql_elements.append(index)
2232
+ processed_indexes.add(index_name)
2233
+ log_debug(
2234
+ f"Regex extracted index: {index_name} on table {table_name}"
2235
+ )
2236
+
2237
+ except Exception as e:
2238
+ log_debug(
2239
+ f"Failed to create regex-extracted index {index_name}: {e}"
2240
+ )
2241
+
2242
+
2243
+ class SQLPlugin(LanguagePlugin):
2244
+ """
2245
+ SQL language plugin implementation.
2246
+
2247
+ Provides SQL language support for tree-sitter-analyzer, enabling analysis
2248
+ of SQL files including database schema definitions, stored procedures,
2249
+ functions, triggers, and indexes.
2250
+
2251
+ The plugin follows the standard LanguagePlugin interface and integrates
2252
+ with the plugin manager for automatic discovery. It requires the
2253
+ tree-sitter-sql package to be installed (available as optional dependency).
2254
+ """
2255
+
2256
+ def __init__(self, diagnostic_mode: bool = False) -> None:
2257
+ """
2258
+ Initialize the SQL language plugin.
2259
+
2260
+ Sets up the extractor instance and caches for tree-sitter language
2261
+ loading. The plugin supports .sql file extensions.
2262
+ """
2263
+ super().__init__()
2264
+ self.diagnostic_mode = diagnostic_mode
2265
+ self.extractor = SQLElementExtractor(diagnostic_mode=diagnostic_mode)
2266
+ self.language = "sql" # Add language property for test compatibility
2267
+ self.supported_extensions = self.get_file_extensions()
2268
+ self._cached_language: Any | None = None # Cache for tree-sitter language
2269
+
2270
+ # Platform compatibility initialization
2271
+ self.platform_info = None
2272
+ try:
2273
+ self.platform_info = PlatformDetector.detect()
2274
+ self.extractor.platform_info = self.platform_info
2275
+
2276
+ platform_info = self.platform_info
2277
+ profile = BehaviorProfile.load(platform_info.platform_key)
2278
+
2279
+ if self.diagnostic_mode:
2280
+ log_debug(f"Diagnostic: Platform detected: {platform_info}")
2281
+ if profile:
2282
+ log_debug(
2283
+ f"Diagnostic: Loaded SQL behavior profile for {platform_info.platform_key}"
2284
+ )
2285
+ log_debug(f"Diagnostic: Profile rules: {profile.adaptation_rules}")
2286
+ else:
2287
+ log_debug(
2288
+ f"Diagnostic: No SQL behavior profile found for {platform_info.platform_key}"
2289
+ )
2290
+ elif profile:
2291
+ log_debug(
2292
+ f"Loaded SQL behavior profile for {platform_info.platform_key}"
2293
+ )
2294
+ else:
2295
+ log_debug(
2296
+ f"No SQL behavior profile found for {platform_info.platform_key}, using defaults"
2297
+ )
2298
+
2299
+ self.adapter = CompatibilityAdapter(profile)
2300
+ self.extractor.set_adapter(self.adapter)
2301
+ except Exception as e:
2302
+ log_error(f"Failed to initialize SQL platform compatibility: {e}")
2303
+ self.adapter = CompatibilityAdapter(None) # Use default adapter
2304
+ self.extractor.set_adapter(self.adapter)
2305
+
2306
+ def get_tree_sitter_language(self) -> Any:
2307
+ """
2308
+ Get the tree-sitter language object for SQL.
2309
+
2310
+ Returns:
2311
+ The tree-sitter language object.
2312
+
2313
+ Raises:
2314
+ RuntimeError: If tree-sitter-sql is not installed.
2315
+ """
2316
+ if self._cached_language:
2317
+ return self._cached_language
2318
+
2319
+ try:
2320
+ import tree_sitter
2321
+ import tree_sitter_sql
2322
+
2323
+ self._cached_language = tree_sitter.Language(tree_sitter_sql.language())
2324
+ return self._cached_language
2325
+ except ImportError as e:
2326
+ raise RuntimeError(
2327
+ "tree-sitter-sql is required for SQL analysis but not installed."
2328
+ ) from e
2329
+
2330
+ def get_language_name(self) -> str:
2331
+ """Get the language name."""
2332
+ return "sql"
2333
+
2334
+ def get_file_extensions(self) -> list[str]:
2335
+ """Get supported file extensions."""
2336
+ return [".sql"]
2337
+
2338
+ def create_extractor(self) -> ElementExtractor:
2339
+ """Create a new element extractor instance."""
2340
+ return SQLElementExtractor()
2341
+
2342
+ def extract_elements(self, tree: Any, source_code: str) -> dict[str, list[Any]]:
2343
+ """
2344
+ Legacy method for extracting elements.
2345
+ Maintained for backward compatibility and testing.
2346
+
2347
+ Args:
2348
+ tree: Tree-sitter AST tree
2349
+ source_code: Source code string
2350
+
2351
+ Returns:
2352
+ Dictionary with keys 'functions', 'classes', 'variables', 'imports'
2353
+ """
2354
+ elements = self.extractor.extract_sql_elements(tree, source_code)
2355
+
2356
+ result = {"functions": [], "classes": [], "variables": [], "imports": []}
2357
+
2358
+ for element in elements:
2359
+ if element.element_type in ["function", "procedure", "trigger"]:
2360
+ result["functions"].append(element)
2361
+ elif element.element_type in ["class", "table", "view"]:
2362
+ result["classes"].append(element)
2363
+ elif element.element_type in ["variable", "index"]:
2364
+ result["variables"].append(element)
2365
+ elif element.element_type == "import":
2366
+ result["imports"].append(element)
2367
+
2368
+ return result
2369
+
2370
+ async def analyze_file(
2371
+ self, file_path: str, request: "AnalysisRequest"
2372
+ ) -> "AnalysisResult":
2373
+ """
2374
+ Analyze SQL file and return structured results.
2375
+
2376
+ Parses the SQL file using tree-sitter-sql, extracts database elements
2377
+ (tables, views, procedures, functions, triggers, indexes), and returns
2378
+ an AnalysisResult with all extracted information.
2379
+
2380
+ Args:
2381
+ file_path: Path to the file to analyze
2382
+ request: Analysis request object
2383
+
2384
+ Returns:
2385
+ AnalysisResult object containing extracted elements
2386
+ """
2387
+ from ..core.parser import Parser
2388
+ from ..models import AnalysisResult
2389
+
2390
+ try:
2391
+ # Read file content
2392
+ with open(file_path, encoding="utf-8") as f:
2393
+ source_code = f.read()
2394
+
2395
+ # Parse using core parser
2396
+ parser = Parser()
2397
+ parse_result = parser.parse_code(source_code, "sql", file_path)
2398
+
2399
+ if not parse_result.success:
2400
+ return AnalysisResult(
2401
+ file_path=file_path,
2402
+ language="sql",
2403
+ line_count=len(source_code.splitlines()),
2404
+ elements=[],
2405
+ node_count=0,
2406
+ query_results={},
2407
+ source_code=source_code,
2408
+ success=False,
2409
+ error_message=parse_result.error_message,
2410
+ )
2411
+
2412
+ # Extract elements
2413
+ elements = self.extractor.extract_sql_elements(
2414
+ parse_result.tree, source_code
2415
+ )
2416
+
2417
+ # Create result
2418
+ return AnalysisResult(
2419
+ file_path=file_path,
2420
+ language="sql",
2421
+ line_count=len(source_code.splitlines()),
2422
+ elements=elements,
2423
+ node_count=(
2424
+ parse_result.tree.root_node.end_byte if parse_result.tree else 0
2425
+ ),
2426
+ query_results={},
2427
+ source_code=source_code,
2428
+ success=True,
2429
+ error_message=None,
2430
+ )
2431
+
2432
+ except Exception as e:
2433
+ log_error(f"Failed to analyze SQL file {file_path}: {e}")
2434
+ return AnalysisResult(
2435
+ file_path=file_path,
2436
+ language="sql",
2437
+ line_count=0,
2438
+ elements=[],
2439
+ node_count=0,
2440
+ query_results={},
2441
+ source_code="",
2442
+ success=False,
2443
+ error_message=str(e),
2444
+ )