tree-sitter-analyzer 1.9.17.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. tree_sitter_analyzer/__init__.py +132 -0
  2. tree_sitter_analyzer/__main__.py +11 -0
  3. tree_sitter_analyzer/api.py +853 -0
  4. tree_sitter_analyzer/cli/__init__.py +39 -0
  5. tree_sitter_analyzer/cli/__main__.py +12 -0
  6. tree_sitter_analyzer/cli/argument_validator.py +89 -0
  7. tree_sitter_analyzer/cli/commands/__init__.py +26 -0
  8. tree_sitter_analyzer/cli/commands/advanced_command.py +226 -0
  9. tree_sitter_analyzer/cli/commands/base_command.py +181 -0
  10. tree_sitter_analyzer/cli/commands/default_command.py +18 -0
  11. tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +188 -0
  12. tree_sitter_analyzer/cli/commands/list_files_cli.py +133 -0
  13. tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -0
  14. tree_sitter_analyzer/cli/commands/query_command.py +109 -0
  15. tree_sitter_analyzer/cli/commands/search_content_cli.py +161 -0
  16. tree_sitter_analyzer/cli/commands/structure_command.py +156 -0
  17. tree_sitter_analyzer/cli/commands/summary_command.py +116 -0
  18. tree_sitter_analyzer/cli/commands/table_command.py +414 -0
  19. tree_sitter_analyzer/cli/info_commands.py +124 -0
  20. tree_sitter_analyzer/cli_main.py +472 -0
  21. tree_sitter_analyzer/constants.py +85 -0
  22. tree_sitter_analyzer/core/__init__.py +15 -0
  23. tree_sitter_analyzer/core/analysis_engine.py +580 -0
  24. tree_sitter_analyzer/core/cache_service.py +333 -0
  25. tree_sitter_analyzer/core/engine.py +585 -0
  26. tree_sitter_analyzer/core/parser.py +293 -0
  27. tree_sitter_analyzer/core/query.py +605 -0
  28. tree_sitter_analyzer/core/query_filter.py +200 -0
  29. tree_sitter_analyzer/core/query_service.py +340 -0
  30. tree_sitter_analyzer/encoding_utils.py +530 -0
  31. tree_sitter_analyzer/exceptions.py +747 -0
  32. tree_sitter_analyzer/file_handler.py +246 -0
  33. tree_sitter_analyzer/formatters/__init__.py +1 -0
  34. tree_sitter_analyzer/formatters/base_formatter.py +201 -0
  35. tree_sitter_analyzer/formatters/csharp_formatter.py +367 -0
  36. tree_sitter_analyzer/formatters/formatter_config.py +197 -0
  37. tree_sitter_analyzer/formatters/formatter_factory.py +84 -0
  38. tree_sitter_analyzer/formatters/formatter_registry.py +377 -0
  39. tree_sitter_analyzer/formatters/formatter_selector.py +96 -0
  40. tree_sitter_analyzer/formatters/go_formatter.py +368 -0
  41. tree_sitter_analyzer/formatters/html_formatter.py +498 -0
  42. tree_sitter_analyzer/formatters/java_formatter.py +423 -0
  43. tree_sitter_analyzer/formatters/javascript_formatter.py +611 -0
  44. tree_sitter_analyzer/formatters/kotlin_formatter.py +268 -0
  45. tree_sitter_analyzer/formatters/language_formatter_factory.py +123 -0
  46. tree_sitter_analyzer/formatters/legacy_formatter_adapters.py +228 -0
  47. tree_sitter_analyzer/formatters/markdown_formatter.py +725 -0
  48. tree_sitter_analyzer/formatters/php_formatter.py +301 -0
  49. tree_sitter_analyzer/formatters/python_formatter.py +830 -0
  50. tree_sitter_analyzer/formatters/ruby_formatter.py +278 -0
  51. tree_sitter_analyzer/formatters/rust_formatter.py +233 -0
  52. tree_sitter_analyzer/formatters/sql_formatter_wrapper.py +689 -0
  53. tree_sitter_analyzer/formatters/sql_formatters.py +536 -0
  54. tree_sitter_analyzer/formatters/typescript_formatter.py +543 -0
  55. tree_sitter_analyzer/formatters/yaml_formatter.py +462 -0
  56. tree_sitter_analyzer/interfaces/__init__.py +9 -0
  57. tree_sitter_analyzer/interfaces/cli.py +535 -0
  58. tree_sitter_analyzer/interfaces/cli_adapter.py +359 -0
  59. tree_sitter_analyzer/interfaces/mcp_adapter.py +224 -0
  60. tree_sitter_analyzer/interfaces/mcp_server.py +428 -0
  61. tree_sitter_analyzer/language_detector.py +553 -0
  62. tree_sitter_analyzer/language_loader.py +271 -0
  63. tree_sitter_analyzer/languages/__init__.py +10 -0
  64. tree_sitter_analyzer/languages/csharp_plugin.py +1076 -0
  65. tree_sitter_analyzer/languages/css_plugin.py +449 -0
  66. tree_sitter_analyzer/languages/go_plugin.py +836 -0
  67. tree_sitter_analyzer/languages/html_plugin.py +496 -0
  68. tree_sitter_analyzer/languages/java_plugin.py +1299 -0
  69. tree_sitter_analyzer/languages/javascript_plugin.py +1622 -0
  70. tree_sitter_analyzer/languages/kotlin_plugin.py +656 -0
  71. tree_sitter_analyzer/languages/markdown_plugin.py +1928 -0
  72. tree_sitter_analyzer/languages/php_plugin.py +862 -0
  73. tree_sitter_analyzer/languages/python_plugin.py +1636 -0
  74. tree_sitter_analyzer/languages/ruby_plugin.py +757 -0
  75. tree_sitter_analyzer/languages/rust_plugin.py +673 -0
  76. tree_sitter_analyzer/languages/sql_plugin.py +2444 -0
  77. tree_sitter_analyzer/languages/typescript_plugin.py +1892 -0
  78. tree_sitter_analyzer/languages/yaml_plugin.py +695 -0
  79. tree_sitter_analyzer/legacy_table_formatter.py +860 -0
  80. tree_sitter_analyzer/mcp/__init__.py +34 -0
  81. tree_sitter_analyzer/mcp/resources/__init__.py +43 -0
  82. tree_sitter_analyzer/mcp/resources/code_file_resource.py +208 -0
  83. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +586 -0
  84. tree_sitter_analyzer/mcp/server.py +869 -0
  85. tree_sitter_analyzer/mcp/tools/__init__.py +28 -0
  86. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +779 -0
  87. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +291 -0
  88. tree_sitter_analyzer/mcp/tools/base_tool.py +139 -0
  89. tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +816 -0
  90. tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +686 -0
  91. tree_sitter_analyzer/mcp/tools/list_files_tool.py +413 -0
  92. tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
  93. tree_sitter_analyzer/mcp/tools/query_tool.py +443 -0
  94. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +464 -0
  95. tree_sitter_analyzer/mcp/tools/search_content_tool.py +836 -0
  96. tree_sitter_analyzer/mcp/tools/table_format_tool.py +572 -0
  97. tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +653 -0
  98. tree_sitter_analyzer/mcp/utils/__init__.py +113 -0
  99. tree_sitter_analyzer/mcp/utils/error_handler.py +569 -0
  100. tree_sitter_analyzer/mcp/utils/file_output_factory.py +217 -0
  101. tree_sitter_analyzer/mcp/utils/file_output_manager.py +322 -0
  102. tree_sitter_analyzer/mcp/utils/gitignore_detector.py +358 -0
  103. tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -0
  104. tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
  105. tree_sitter_analyzer/models.py +840 -0
  106. tree_sitter_analyzer/mypy_current_errors.txt +2 -0
  107. tree_sitter_analyzer/output_manager.py +255 -0
  108. tree_sitter_analyzer/platform_compat/__init__.py +3 -0
  109. tree_sitter_analyzer/platform_compat/adapter.py +324 -0
  110. tree_sitter_analyzer/platform_compat/compare.py +224 -0
  111. tree_sitter_analyzer/platform_compat/detector.py +67 -0
  112. tree_sitter_analyzer/platform_compat/fixtures.py +228 -0
  113. tree_sitter_analyzer/platform_compat/profiles.py +217 -0
  114. tree_sitter_analyzer/platform_compat/record.py +55 -0
  115. tree_sitter_analyzer/platform_compat/recorder.py +155 -0
  116. tree_sitter_analyzer/platform_compat/report.py +92 -0
  117. tree_sitter_analyzer/plugins/__init__.py +280 -0
  118. tree_sitter_analyzer/plugins/base.py +647 -0
  119. tree_sitter_analyzer/plugins/manager.py +384 -0
  120. tree_sitter_analyzer/project_detector.py +328 -0
  121. tree_sitter_analyzer/queries/__init__.py +27 -0
  122. tree_sitter_analyzer/queries/csharp.py +216 -0
  123. tree_sitter_analyzer/queries/css.py +615 -0
  124. tree_sitter_analyzer/queries/go.py +275 -0
  125. tree_sitter_analyzer/queries/html.py +543 -0
  126. tree_sitter_analyzer/queries/java.py +402 -0
  127. tree_sitter_analyzer/queries/javascript.py +724 -0
  128. tree_sitter_analyzer/queries/kotlin.py +192 -0
  129. tree_sitter_analyzer/queries/markdown.py +258 -0
  130. tree_sitter_analyzer/queries/php.py +95 -0
  131. tree_sitter_analyzer/queries/python.py +859 -0
  132. tree_sitter_analyzer/queries/ruby.py +92 -0
  133. tree_sitter_analyzer/queries/rust.py +223 -0
  134. tree_sitter_analyzer/queries/sql.py +555 -0
  135. tree_sitter_analyzer/queries/typescript.py +871 -0
  136. tree_sitter_analyzer/queries/yaml.py +236 -0
  137. tree_sitter_analyzer/query_loader.py +272 -0
  138. tree_sitter_analyzer/security/__init__.py +22 -0
  139. tree_sitter_analyzer/security/boundary_manager.py +277 -0
  140. tree_sitter_analyzer/security/regex_checker.py +297 -0
  141. tree_sitter_analyzer/security/validator.py +599 -0
  142. tree_sitter_analyzer/table_formatter.py +782 -0
  143. tree_sitter_analyzer/utils/__init__.py +53 -0
  144. tree_sitter_analyzer/utils/logging.py +433 -0
  145. tree_sitter_analyzer/utils/tree_sitter_compat.py +289 -0
  146. tree_sitter_analyzer-1.9.17.1.dist-info/METADATA +485 -0
  147. tree_sitter_analyzer-1.9.17.1.dist-info/RECORD +149 -0
  148. tree_sitter_analyzer-1.9.17.1.dist-info/WHEEL +4 -0
  149. tree_sitter_analyzer-1.9.17.1.dist-info/entry_points.txt +25 -0
@@ -0,0 +1,2 @@
1
+ mcp\server.py: error: Source file found twice under different module names:
2
+ "tree_sitter_analyzer.mcp.server" and "mcp.server"
@@ -0,0 +1,255 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Output Manager for CLI
4
+
5
+ Handles different types of outputs: user information, errors, and structured data.
6
+ """
7
+
8
+ import json
9
+ import sys
10
+ from typing import Any
11
+
12
+ from .utils import log_error, log_warning
13
+
14
+
15
+ class OutputManager:
16
+ """Manages different types of output for CLI"""
17
+
18
+ def __init__(self, quiet: bool = False, json_output: bool = False):
19
+ self.quiet = quiet
20
+ self.json_output = json_output
21
+
22
+ def info(self, message: str) -> None:
23
+ """Output informational message to user"""
24
+ if not self.quiet:
25
+ print(message)
26
+
27
+ def warning(self, message: str) -> None:
28
+ """Output warning message"""
29
+ if not self.quiet:
30
+ print(f"WARNING: {message}", file=sys.stderr)
31
+ log_warning(message)
32
+
33
+ def error(self, message: str) -> None:
34
+ """Output error message"""
35
+ print(f"ERROR: {message}", file=sys.stderr)
36
+ log_error(message)
37
+
38
+ def success(self, message: str) -> None:
39
+ """Output success message"""
40
+ if not self.quiet:
41
+ print(f"✓ {message}")
42
+
43
+ def output_info(self, message: str) -> None:
44
+ """Output info message (alias for info)"""
45
+ self.info(message)
46
+
47
+ def output_warning(self, message: str) -> None:
48
+ """Output warning message (alias for warning)"""
49
+ self.warning(message)
50
+
51
+ def output_error(self, message: str) -> None:
52
+ """Output error message (alias for error)"""
53
+ self.error(message)
54
+
55
+ def output_success(self, message: str) -> None:
56
+ """Output success message (alias for success)"""
57
+ self.success(message)
58
+
59
+ def data(self, data: Any, format_type: str = "json") -> None:
60
+ """Output structured data"""
61
+ if self.json_output or format_type == "json":
62
+ print(json.dumps(data, indent=2, ensure_ascii=False))
63
+ else:
64
+ self._format_data(data)
65
+
66
+ def _format_data(self, data: Any) -> None:
67
+ """Format data for human-readable output"""
68
+ if isinstance(data, dict):
69
+ for key, value in data.items():
70
+ print(f"{key}: {value}")
71
+ elif isinstance(data, list):
72
+ for i, item in enumerate(data, 1):
73
+ print(f"{i}. {item}")
74
+ else:
75
+ print(str(data))
76
+
77
+ def results_header(self, title: str) -> None:
78
+ """Output results section header"""
79
+ if not self.quiet:
80
+ print(f"\n--- {title} ---")
81
+
82
+ def query_result(self, index: int, result: dict[str, Any]) -> None:
83
+ """Output query result in formatted way"""
84
+ if not self.quiet:
85
+ print(
86
+ f"\n{index}. {result.get('capture_name', 'Unknown')} ({result.get('node_type', 'Unknown')})"
87
+ )
88
+ print(
89
+ f" Position: Line {result.get('start_line', '?')}-{result.get('end_line', '?')}"
90
+ )
91
+ if "content" in result:
92
+ print(f" Content:\n{result['content']}")
93
+
94
+ def analysis_summary(self, stats: dict[str, Any]) -> None:
95
+ """Output analysis summary"""
96
+ # Always print human-readable stats to satisfy CLI expectations in tests
97
+ self.results_header("Statistics")
98
+ for key, value in stats.items():
99
+ print(f"{key}: {value}")
100
+
101
+ def language_list(
102
+ self, languages: list[str], title: str = "Supported Languages"
103
+ ) -> None:
104
+ """Output language list"""
105
+ if not self.quiet:
106
+ print(f"{title}:")
107
+ for lang in languages:
108
+ print(f" {lang}")
109
+
110
+ def query_list(self, queries: dict[str, str], language: str) -> None:
111
+ """Output query list for a language"""
112
+ if not self.quiet:
113
+ print(f"Available query keys ({language}):")
114
+ for query_key, description in queries.items():
115
+ print(f" {query_key:<20} - {description}")
116
+
117
+ def extension_list(self, extensions: list[str]) -> None:
118
+ """Output supported extensions"""
119
+ if not self.quiet:
120
+ print("Supported file extensions:")
121
+ # Use more efficient chunking
122
+ from itertools import islice
123
+
124
+ chunk_size = 10
125
+ for i in range(0, len(extensions), chunk_size):
126
+ chunk = list(islice(extensions, i, i + chunk_size))
127
+ print(f" {' '.join(chunk)}")
128
+ print(f"Total {len(extensions)} extensions supported")
129
+
130
+ def output_json(self, data: Any) -> None:
131
+ """Output JSON data"""
132
+ print(json.dumps(data, indent=2, ensure_ascii=False))
133
+
134
+ def output_list(self, items: str | list[Any], title: str | None = None) -> None:
135
+ """Output a list of items"""
136
+ if title and not self.quiet:
137
+ print(f"{title}:")
138
+ # 文字列が単一要素として渡された場合の処理
139
+ if isinstance(items, str):
140
+ items = [items]
141
+ for item in items:
142
+ if not self.quiet:
143
+ print(f" {item}")
144
+
145
+ def output_section(self, title: str) -> None:
146
+ """Output a section header"""
147
+ if not self.quiet:
148
+ print(f"\n--- {title} ---")
149
+
150
+ def output_query_results(self, results: Any) -> None:
151
+ """Output query results"""
152
+ self.data(results)
153
+
154
+ def output_statistics(self, stats: dict[str, Any]) -> None:
155
+ """Output statistics"""
156
+ self.analysis_summary(stats)
157
+
158
+ def output_languages(self, languages: list[str]) -> None:
159
+ """Output available languages"""
160
+ self.language_list(languages)
161
+
162
+ def output_queries(self, queries: list[str]) -> None:
163
+ """Output available queries"""
164
+ query_dict = {q: f"Query {q}" for q in queries}
165
+ self.query_list(query_dict, "All")
166
+
167
+ def output_extensions(self, extensions: list[str]) -> None:
168
+ """Output file extensions"""
169
+ self.extension_list(extensions)
170
+
171
+ def output_data(self, data: Any, format_type: str = "json") -> None:
172
+ """Output data (alias for data)"""
173
+ self.data(data, format_type)
174
+
175
+
176
+ # Default instance for backward compatibility
177
+ _output_manager = OutputManager()
178
+
179
+
180
+ def set_output_mode(quiet: bool = False, json_output: bool = False) -> None:
181
+ """Set global output mode"""
182
+ global _output_manager
183
+ _output_manager = OutputManager(quiet=quiet, json_output=json_output)
184
+
185
+
186
+ def get_output_manager() -> OutputManager:
187
+ """Get current output manager"""
188
+ return _output_manager
189
+
190
+
191
+ # Convenience functions
192
+ def output_info(message: str) -> None:
193
+ """Output info message"""
194
+ _output_manager.info(message)
195
+
196
+
197
+ def output_warning(message: str) -> None:
198
+ """Output warning message"""
199
+ _output_manager.warning(message)
200
+
201
+
202
+ def output_error(message: str) -> None:
203
+ """Output error message using the global output manager"""
204
+ _output_manager.error(message)
205
+
206
+
207
+ def output_success(message: str) -> None:
208
+ """Output success message using the global output manager"""
209
+ _output_manager.success(message)
210
+
211
+
212
+ def output_json(data: Any) -> None:
213
+ """Output JSON data using the global output manager"""
214
+ _output_manager.output_json(data)
215
+
216
+
217
+ def output_list(items: str | list[Any], title: str | None = None) -> None:
218
+ """Output a list of items"""
219
+ _output_manager.output_list(items, title)
220
+
221
+
222
+ def output_section(title: str) -> None:
223
+ """Output a section header"""
224
+ _output_manager.output_section(title)
225
+
226
+
227
+ def output_query_results(results: Any) -> None:
228
+ """Output query results"""
229
+ _output_manager.output_query_results(results)
230
+
231
+
232
+ def output_statistics(stats: dict[str, Any]) -> None:
233
+ """Output statistics"""
234
+ _output_manager.output_statistics(stats)
235
+
236
+
237
+ def output_languages(languages: list[str], title: str = "Supported Languages") -> None:
238
+ """Output available languages"""
239
+ _output_manager.language_list(languages, title)
240
+
241
+
242
+ def output_queries(queries: list[str], language: str = "All") -> None:
243
+ """Output available queries"""
244
+ query_dict = {q: f"Query {q}" for q in queries}
245
+ _output_manager.query_list(query_dict, language)
246
+
247
+
248
+ def output_extensions(extensions: list[str]) -> None:
249
+ """Output file extensions"""
250
+ _output_manager.output_extensions(extensions)
251
+
252
+
253
+ def output_data(data: Any, format_type: str = "json") -> None:
254
+ """Output structured data"""
255
+ _output_manager.data(data, format_type)
@@ -0,0 +1,3 @@
1
+ from .detector import PlatformDetector, PlatformInfo
2
+
3
+ __all__ = ["PlatformDetector", "PlatformInfo"]
@@ -0,0 +1,324 @@
1
+ import logging
2
+ import re
3
+ from typing import Protocol, TypeVar
4
+
5
+ from tree_sitter_analyzer.models import (
6
+ SQLElement,
7
+ SQLElementType,
8
+ SQLFunction,
9
+ SQLTrigger,
10
+ SQLView,
11
+ )
12
+ from tree_sitter_analyzer.platform_compat.profiles import BehaviorProfile
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ T = TypeVar("T", bound=SQLElement)
17
+
18
+
19
+ class AdaptationRule(Protocol):
20
+ """Rule for adapting platform-specific behavior."""
21
+
22
+ @property
23
+ def rule_id(self) -> str:
24
+ """Unique identifier for the rule."""
25
+ ...
26
+
27
+ @property
28
+ def description(self) -> str:
29
+ """Description of what the rule does."""
30
+ ...
31
+
32
+ def apply(self, element: SQLElement, context: dict) -> SQLElement | None:
33
+ """
34
+ Applies the rule to an element.
35
+
36
+ Args:
37
+ element: The element to adapt.
38
+ context: Additional context (e.g. source code).
39
+
40
+ Returns:
41
+ The adapted element, or None if the element should be removed.
42
+ Returns the original element if no changes are needed.
43
+ """
44
+ ...
45
+
46
+
47
+ class CompatibilityAdapter:
48
+ """Applies platform-specific adaptations to SQL parsing results."""
49
+
50
+ def __init__(self, profile: BehaviorProfile | None = None):
51
+ self.profile = profile
52
+ self.rules: list[AdaptationRule] = []
53
+ self._load_rules()
54
+
55
+ def _load_rules(self):
56
+ """Loads adaptation rules based on the profile."""
57
+ # In a real implementation, we might load these dynamically or from a registry.
58
+ # For now, we'll hardcode the available rules and enable them based on the profile.
59
+
60
+ available_rules = {
61
+ "fix_function_name_keywords": FixFunctionNameKeywordsRule(),
62
+ "fix_trigger_name_description": FixTriggerNameDescriptionRule(),
63
+ "remove_phantom_triggers": RemovePhantomTriggersRule(),
64
+ "remove_phantom_functions": RemovePhantomFunctionsRule(),
65
+ "recover_views_from_errors": RecoverViewsFromErrorsRule(),
66
+ }
67
+
68
+ if self.profile:
69
+ for rule_id in self.profile.adaptation_rules:
70
+ if rule_id in available_rules:
71
+ self.rules.append(available_rules[rule_id])
72
+ elif rule_id == "*":
73
+ # Wildcard: enable all rules (useful for testing or "safe mode")
74
+ self.rules = list(available_rules.values())
75
+ break
76
+ else:
77
+ # Default behavior: enable all safe recovery rules?
78
+ # Or maybe none? Let's enable all for now as they should be safe.
79
+ self.rules = list(available_rules.values())
80
+
81
+ def adapt_elements(
82
+ self, elements: list[SQLElement], source_code: str
83
+ ) -> list[SQLElement]:
84
+ """
85
+ Main entry point for adapting elements.
86
+
87
+ Args:
88
+ elements: The list of extracted elements.
89
+ source_code: The original source code.
90
+
91
+ Returns:
92
+ The list of adapted elements.
93
+ """
94
+ context = {"source_code": source_code}
95
+ adapted_elements = []
96
+
97
+ # First pass: apply rules to existing elements
98
+ for element in elements:
99
+ current_element = element
100
+ keep_element = True
101
+
102
+ for rule in self.rules:
103
+ result = rule.apply(current_element, context)
104
+ if result is None:
105
+ keep_element = False
106
+ break
107
+ current_element = result
108
+
109
+ if keep_element:
110
+ adapted_elements.append(current_element)
111
+
112
+ # Second pass: recover missing elements (if any rule supports it)
113
+ # Some rules might look at the source code and generate new elements
114
+ # For example, RecoverViewsFromErrorsRule might want to scan source code
115
+ # independent of existing elements.
116
+ # However, the current Protocol definition is element-centric.
117
+ # We might need a separate method for "generation" rules or handle it differently.
118
+ # For RecoverViewsFromErrorsRule, we can treat it as a rule that inspects
119
+ # "ERROR" elements if we had them, or we can just run it once.
120
+
121
+ # Let's add a special hook for generation
122
+ for rule in self.rules:
123
+ if hasattr(rule, "generate_elements"):
124
+ new_elements = rule.generate_elements(context)
125
+ adapted_elements.extend(new_elements)
126
+
127
+ return adapted_elements
128
+
129
+
130
+ # --- Specific Rules ---
131
+
132
+
133
+ class FixFunctionNameKeywordsRule:
134
+ """
135
+ Rule: fix_function_name_keywords
136
+ Detects when function name is a SQL keyword and recovers correct name from raw_text.
137
+ """
138
+
139
+ @property
140
+ def rule_id(self) -> str:
141
+ return "fix_function_name_keywords"
142
+
143
+ @property
144
+ def description(self) -> str:
145
+ return "Recover correct function name when keyword is extracted"
146
+
147
+ def apply(self, element: SQLElement, context: dict) -> SQLElement | None:
148
+ if not isinstance(element, SQLFunction):
149
+ return element
150
+
151
+ # List of keywords that might be incorrectly extracted as names
152
+ keywords = {
153
+ "FUNCTION",
154
+ "PROCEDURE",
155
+ "CREATE",
156
+ "OR",
157
+ "REPLACE",
158
+ "AUTO_INCREMENT",
159
+ "KEY",
160
+ "PRIMARY",
161
+ "FOREIGN",
162
+ }
163
+
164
+ # Check if name is a keyword OR if we should verify the name generally
165
+ # This covers cases where the name is just wrong (e.g. garbage) but not necessarily a keyword
166
+ should_fix = False
167
+ if element.name.upper() in keywords:
168
+ should_fix = True
169
+ else:
170
+ # General verification: check if the name matches what's in the CREATE statement
171
+ # If the extracted name doesn't match the regex-extracted name, we should fix it
172
+ match = re.search(r"FUNCTION\s+([\w]+)", element.raw_text, re.IGNORECASE)
173
+ if match:
174
+ correct_name = match.group(1)
175
+ if element.name != correct_name:
176
+ should_fix = True
177
+
178
+ if should_fix:
179
+ # Try to extract name from raw_text
180
+ # Pattern: CREATE [OR REPLACE] FUNCTION name ...
181
+ # Use \w+ to match unicode word characters
182
+ match = re.search(r"FUNCTION\s+([\w]+)", element.raw_text, re.IGNORECASE)
183
+ if match:
184
+ element.name = match.group(1)
185
+
186
+ return element
187
+
188
+
189
+ class FixTriggerNameDescriptionRule:
190
+ """
191
+ Rule: fix_trigger_name_description
192
+ Detects when trigger name is incorrectly set to "description".
193
+ """
194
+
195
+ @property
196
+ def rule_id(self) -> str:
197
+ return "fix_trigger_name_description"
198
+
199
+ @property
200
+ def description(self) -> str:
201
+ return "Recover correct trigger name when 'description' is extracted"
202
+
203
+ def apply(self, element: SQLElement, context: dict) -> SQLElement | None:
204
+ if not isinstance(element, SQLTrigger):
205
+ return element
206
+
207
+ if element.name.lower() == "description":
208
+ # Try to extract name from raw_text
209
+ # Pattern: CREATE TRIGGER name ...
210
+ # Use \w+ to match unicode word characters
211
+ match = re.search(r"TRIGGER\s+([\w]+)", element.raw_text, re.IGNORECASE)
212
+ if match:
213
+ element.name = match.group(1)
214
+
215
+ return element
216
+
217
+
218
+ class RemovePhantomTriggersRule:
219
+ """
220
+ Rule: remove_phantom_triggers
221
+ Detects elements where type doesn't match content (phantom triggers).
222
+ """
223
+
224
+ @property
225
+ def rule_id(self) -> str:
226
+ return "remove_phantom_triggers"
227
+
228
+ @property
229
+ def description(self) -> str:
230
+ return "Remove phantom triggers with mismatched content"
231
+
232
+ def apply(self, element: SQLElement, context: dict) -> SQLElement | None:
233
+ if isinstance(element, SQLTrigger):
234
+ # Check if raw_text actually contains CREATE TRIGGER
235
+ # Phantom triggers often appear in comments or unrelated code
236
+ # Use regex to handle variable whitespace
237
+ if not re.search(r"CREATE\s+TRIGGER", element.raw_text, re.IGNORECASE):
238
+ # It might be a phantom
239
+ logger.debug(
240
+ f"Removing phantom trigger: {element.name} (raw_text: {element.raw_text[:50]}...)"
241
+ )
242
+ return None
243
+ return element
244
+
245
+
246
+ class RemovePhantomFunctionsRule:
247
+ """
248
+ Rule: remove_phantom_functions
249
+ Detects elements where type doesn't match content (phantom functions).
250
+ """
251
+
252
+ @property
253
+ def rule_id(self) -> str:
254
+ return "remove_phantom_functions"
255
+
256
+ @property
257
+ def description(self) -> str:
258
+ return "Remove phantom functions with mismatched content"
259
+
260
+ def apply(self, element: SQLElement, context: dict) -> SQLElement | None:
261
+ if isinstance(element, SQLFunction):
262
+ # Check if raw_text actually contains CREATE FUNCTION
263
+ # Phantom functions often appear in comments or unrelated code
264
+ # Use regex to handle variable whitespace
265
+ if not re.search(r"CREATE\s+FUNCTION", element.raw_text, re.IGNORECASE):
266
+ # It might be a phantom
267
+ return None
268
+ return element
269
+
270
+
271
+ class RecoverViewsFromErrorsRule:
272
+ """
273
+ Rule: recover_views_from_errors
274
+ Scans source code for CREATE VIEW statements that might have been missed (e.g. in ERROR nodes).
275
+ """
276
+
277
+ @property
278
+ def rule_id(self) -> str:
279
+ return "recover_views_from_errors"
280
+
281
+ @property
282
+ def description(self) -> str:
283
+ return "Recover views from ERROR nodes"
284
+
285
+ def apply(self, element: SQLElement, context: dict) -> SQLElement | None:
286
+ # This rule doesn't modify existing elements, it generates new ones.
287
+ return element
288
+
289
+ def generate_elements(self, context: dict) -> list[SQLElement]:
290
+ source_code = context.get("source_code", "")
291
+ new_elements = []
292
+
293
+ # Simple regex to find CREATE VIEW statements
294
+ # This is a fallback mechanism
295
+ # Use \w+ to match unicode word characters
296
+ # Updated to handle IF NOT EXISTS and multiline matching
297
+ view_pattern = re.compile(
298
+ r"^\s*CREATE\s+VIEW\s+(?:IF\s+NOT\s+EXISTS\s+)?([\w]+)\s+AS",
299
+ re.IGNORECASE | re.MULTILINE,
300
+ )
301
+
302
+ for match in view_pattern.finditer(source_code):
303
+ view_name = match.group(1)
304
+ # We need to check if this view is already extracted?
305
+ # For now, let's assume the caller handles duplicates or we just return it.
306
+ # Ideally we should check if 'view_name' is already in the elements list passed to adapt_elements.
307
+ # But generate_elements doesn't see the list.
308
+ # We'll return it and let the merger handle it, or we can improve the architecture.
309
+
310
+ # Calculate line number (rough approximation)
311
+ start_pos = match.start()
312
+ line_number = source_code.count("\n", 0, start_pos) + 1
313
+
314
+ view = SQLView(
315
+ name=view_name,
316
+ start_line=line_number,
317
+ end_line=line_number, # We don't know end line easily
318
+ raw_text=match.group(0), # Partial text
319
+ sql_element_type=SQLElementType.VIEW,
320
+ element_type="view",
321
+ )
322
+ new_elements.append(view)
323
+
324
+ return new_elements