tree-sitter-analyzer 1.9.17.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tree_sitter_analyzer/__init__.py +132 -0
- tree_sitter_analyzer/__main__.py +11 -0
- tree_sitter_analyzer/api.py +853 -0
- tree_sitter_analyzer/cli/__init__.py +39 -0
- tree_sitter_analyzer/cli/__main__.py +12 -0
- tree_sitter_analyzer/cli/argument_validator.py +89 -0
- tree_sitter_analyzer/cli/commands/__init__.py +26 -0
- tree_sitter_analyzer/cli/commands/advanced_command.py +226 -0
- tree_sitter_analyzer/cli/commands/base_command.py +181 -0
- tree_sitter_analyzer/cli/commands/default_command.py +18 -0
- tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +188 -0
- tree_sitter_analyzer/cli/commands/list_files_cli.py +133 -0
- tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -0
- tree_sitter_analyzer/cli/commands/query_command.py +109 -0
- tree_sitter_analyzer/cli/commands/search_content_cli.py +161 -0
- tree_sitter_analyzer/cli/commands/structure_command.py +156 -0
- tree_sitter_analyzer/cli/commands/summary_command.py +116 -0
- tree_sitter_analyzer/cli/commands/table_command.py +414 -0
- tree_sitter_analyzer/cli/info_commands.py +124 -0
- tree_sitter_analyzer/cli_main.py +472 -0
- tree_sitter_analyzer/constants.py +85 -0
- tree_sitter_analyzer/core/__init__.py +15 -0
- tree_sitter_analyzer/core/analysis_engine.py +580 -0
- tree_sitter_analyzer/core/cache_service.py +333 -0
- tree_sitter_analyzer/core/engine.py +585 -0
- tree_sitter_analyzer/core/parser.py +293 -0
- tree_sitter_analyzer/core/query.py +605 -0
- tree_sitter_analyzer/core/query_filter.py +200 -0
- tree_sitter_analyzer/core/query_service.py +340 -0
- tree_sitter_analyzer/encoding_utils.py +530 -0
- tree_sitter_analyzer/exceptions.py +747 -0
- tree_sitter_analyzer/file_handler.py +246 -0
- tree_sitter_analyzer/formatters/__init__.py +1 -0
- tree_sitter_analyzer/formatters/base_formatter.py +201 -0
- tree_sitter_analyzer/formatters/csharp_formatter.py +367 -0
- tree_sitter_analyzer/formatters/formatter_config.py +197 -0
- tree_sitter_analyzer/formatters/formatter_factory.py +84 -0
- tree_sitter_analyzer/formatters/formatter_registry.py +377 -0
- tree_sitter_analyzer/formatters/formatter_selector.py +96 -0
- tree_sitter_analyzer/formatters/go_formatter.py +368 -0
- tree_sitter_analyzer/formatters/html_formatter.py +498 -0
- tree_sitter_analyzer/formatters/java_formatter.py +423 -0
- tree_sitter_analyzer/formatters/javascript_formatter.py +611 -0
- tree_sitter_analyzer/formatters/kotlin_formatter.py +268 -0
- tree_sitter_analyzer/formatters/language_formatter_factory.py +123 -0
- tree_sitter_analyzer/formatters/legacy_formatter_adapters.py +228 -0
- tree_sitter_analyzer/formatters/markdown_formatter.py +725 -0
- tree_sitter_analyzer/formatters/php_formatter.py +301 -0
- tree_sitter_analyzer/formatters/python_formatter.py +830 -0
- tree_sitter_analyzer/formatters/ruby_formatter.py +278 -0
- tree_sitter_analyzer/formatters/rust_formatter.py +233 -0
- tree_sitter_analyzer/formatters/sql_formatter_wrapper.py +689 -0
- tree_sitter_analyzer/formatters/sql_formatters.py +536 -0
- tree_sitter_analyzer/formatters/typescript_formatter.py +543 -0
- tree_sitter_analyzer/formatters/yaml_formatter.py +462 -0
- tree_sitter_analyzer/interfaces/__init__.py +9 -0
- tree_sitter_analyzer/interfaces/cli.py +535 -0
- tree_sitter_analyzer/interfaces/cli_adapter.py +359 -0
- tree_sitter_analyzer/interfaces/mcp_adapter.py +224 -0
- tree_sitter_analyzer/interfaces/mcp_server.py +428 -0
- tree_sitter_analyzer/language_detector.py +553 -0
- tree_sitter_analyzer/language_loader.py +271 -0
- tree_sitter_analyzer/languages/__init__.py +10 -0
- tree_sitter_analyzer/languages/csharp_plugin.py +1076 -0
- tree_sitter_analyzer/languages/css_plugin.py +449 -0
- tree_sitter_analyzer/languages/go_plugin.py +836 -0
- tree_sitter_analyzer/languages/html_plugin.py +496 -0
- tree_sitter_analyzer/languages/java_plugin.py +1299 -0
- tree_sitter_analyzer/languages/javascript_plugin.py +1622 -0
- tree_sitter_analyzer/languages/kotlin_plugin.py +656 -0
- tree_sitter_analyzer/languages/markdown_plugin.py +1928 -0
- tree_sitter_analyzer/languages/php_plugin.py +862 -0
- tree_sitter_analyzer/languages/python_plugin.py +1636 -0
- tree_sitter_analyzer/languages/ruby_plugin.py +757 -0
- tree_sitter_analyzer/languages/rust_plugin.py +673 -0
- tree_sitter_analyzer/languages/sql_plugin.py +2444 -0
- tree_sitter_analyzer/languages/typescript_plugin.py +1892 -0
- tree_sitter_analyzer/languages/yaml_plugin.py +695 -0
- tree_sitter_analyzer/legacy_table_formatter.py +860 -0
- tree_sitter_analyzer/mcp/__init__.py +34 -0
- tree_sitter_analyzer/mcp/resources/__init__.py +43 -0
- tree_sitter_analyzer/mcp/resources/code_file_resource.py +208 -0
- tree_sitter_analyzer/mcp/resources/project_stats_resource.py +586 -0
- tree_sitter_analyzer/mcp/server.py +869 -0
- tree_sitter_analyzer/mcp/tools/__init__.py +28 -0
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +779 -0
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +291 -0
- tree_sitter_analyzer/mcp/tools/base_tool.py +139 -0
- tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +816 -0
- tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +686 -0
- tree_sitter_analyzer/mcp/tools/list_files_tool.py +413 -0
- tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
- tree_sitter_analyzer/mcp/tools/query_tool.py +443 -0
- tree_sitter_analyzer/mcp/tools/read_partial_tool.py +464 -0
- tree_sitter_analyzer/mcp/tools/search_content_tool.py +836 -0
- tree_sitter_analyzer/mcp/tools/table_format_tool.py +572 -0
- tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +653 -0
- tree_sitter_analyzer/mcp/utils/__init__.py +113 -0
- tree_sitter_analyzer/mcp/utils/error_handler.py +569 -0
- tree_sitter_analyzer/mcp/utils/file_output_factory.py +217 -0
- tree_sitter_analyzer/mcp/utils/file_output_manager.py +322 -0
- tree_sitter_analyzer/mcp/utils/gitignore_detector.py +358 -0
- tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -0
- tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
- tree_sitter_analyzer/models.py +840 -0
- tree_sitter_analyzer/mypy_current_errors.txt +2 -0
- tree_sitter_analyzer/output_manager.py +255 -0
- tree_sitter_analyzer/platform_compat/__init__.py +3 -0
- tree_sitter_analyzer/platform_compat/adapter.py +324 -0
- tree_sitter_analyzer/platform_compat/compare.py +224 -0
- tree_sitter_analyzer/platform_compat/detector.py +67 -0
- tree_sitter_analyzer/platform_compat/fixtures.py +228 -0
- tree_sitter_analyzer/platform_compat/profiles.py +217 -0
- tree_sitter_analyzer/platform_compat/record.py +55 -0
- tree_sitter_analyzer/platform_compat/recorder.py +155 -0
- tree_sitter_analyzer/platform_compat/report.py +92 -0
- tree_sitter_analyzer/plugins/__init__.py +280 -0
- tree_sitter_analyzer/plugins/base.py +647 -0
- tree_sitter_analyzer/plugins/manager.py +384 -0
- tree_sitter_analyzer/project_detector.py +328 -0
- tree_sitter_analyzer/queries/__init__.py +27 -0
- tree_sitter_analyzer/queries/csharp.py +216 -0
- tree_sitter_analyzer/queries/css.py +615 -0
- tree_sitter_analyzer/queries/go.py +275 -0
- tree_sitter_analyzer/queries/html.py +543 -0
- tree_sitter_analyzer/queries/java.py +402 -0
- tree_sitter_analyzer/queries/javascript.py +724 -0
- tree_sitter_analyzer/queries/kotlin.py +192 -0
- tree_sitter_analyzer/queries/markdown.py +258 -0
- tree_sitter_analyzer/queries/php.py +95 -0
- tree_sitter_analyzer/queries/python.py +859 -0
- tree_sitter_analyzer/queries/ruby.py +92 -0
- tree_sitter_analyzer/queries/rust.py +223 -0
- tree_sitter_analyzer/queries/sql.py +555 -0
- tree_sitter_analyzer/queries/typescript.py +871 -0
- tree_sitter_analyzer/queries/yaml.py +236 -0
- tree_sitter_analyzer/query_loader.py +272 -0
- tree_sitter_analyzer/security/__init__.py +22 -0
- tree_sitter_analyzer/security/boundary_manager.py +277 -0
- tree_sitter_analyzer/security/regex_checker.py +297 -0
- tree_sitter_analyzer/security/validator.py +599 -0
- tree_sitter_analyzer/table_formatter.py +782 -0
- tree_sitter_analyzer/utils/__init__.py +53 -0
- tree_sitter_analyzer/utils/logging.py +433 -0
- tree_sitter_analyzer/utils/tree_sitter_compat.py +289 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/METADATA +485 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/RECORD +149 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/WHEEL +4 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/entry_points.txt +25 -0
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Output Manager for CLI
|
|
4
|
+
|
|
5
|
+
Handles different types of outputs: user information, errors, and structured data.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import sys
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from .utils import log_error, log_warning
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class OutputManager:
|
|
16
|
+
"""Manages different types of output for CLI"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, quiet: bool = False, json_output: bool = False):
|
|
19
|
+
self.quiet = quiet
|
|
20
|
+
self.json_output = json_output
|
|
21
|
+
|
|
22
|
+
def info(self, message: str) -> None:
|
|
23
|
+
"""Output informational message to user"""
|
|
24
|
+
if not self.quiet:
|
|
25
|
+
print(message)
|
|
26
|
+
|
|
27
|
+
def warning(self, message: str) -> None:
|
|
28
|
+
"""Output warning message"""
|
|
29
|
+
if not self.quiet:
|
|
30
|
+
print(f"WARNING: {message}", file=sys.stderr)
|
|
31
|
+
log_warning(message)
|
|
32
|
+
|
|
33
|
+
def error(self, message: str) -> None:
|
|
34
|
+
"""Output error message"""
|
|
35
|
+
print(f"ERROR: {message}", file=sys.stderr)
|
|
36
|
+
log_error(message)
|
|
37
|
+
|
|
38
|
+
def success(self, message: str) -> None:
|
|
39
|
+
"""Output success message"""
|
|
40
|
+
if not self.quiet:
|
|
41
|
+
print(f"✓ {message}")
|
|
42
|
+
|
|
43
|
+
def output_info(self, message: str) -> None:
|
|
44
|
+
"""Output info message (alias for info)"""
|
|
45
|
+
self.info(message)
|
|
46
|
+
|
|
47
|
+
def output_warning(self, message: str) -> None:
|
|
48
|
+
"""Output warning message (alias for warning)"""
|
|
49
|
+
self.warning(message)
|
|
50
|
+
|
|
51
|
+
def output_error(self, message: str) -> None:
|
|
52
|
+
"""Output error message (alias for error)"""
|
|
53
|
+
self.error(message)
|
|
54
|
+
|
|
55
|
+
def output_success(self, message: str) -> None:
|
|
56
|
+
"""Output success message (alias for success)"""
|
|
57
|
+
self.success(message)
|
|
58
|
+
|
|
59
|
+
def data(self, data: Any, format_type: str = "json") -> None:
|
|
60
|
+
"""Output structured data"""
|
|
61
|
+
if self.json_output or format_type == "json":
|
|
62
|
+
print(json.dumps(data, indent=2, ensure_ascii=False))
|
|
63
|
+
else:
|
|
64
|
+
self._format_data(data)
|
|
65
|
+
|
|
66
|
+
def _format_data(self, data: Any) -> None:
|
|
67
|
+
"""Format data for human-readable output"""
|
|
68
|
+
if isinstance(data, dict):
|
|
69
|
+
for key, value in data.items():
|
|
70
|
+
print(f"{key}: {value}")
|
|
71
|
+
elif isinstance(data, list):
|
|
72
|
+
for i, item in enumerate(data, 1):
|
|
73
|
+
print(f"{i}. {item}")
|
|
74
|
+
else:
|
|
75
|
+
print(str(data))
|
|
76
|
+
|
|
77
|
+
def results_header(self, title: str) -> None:
|
|
78
|
+
"""Output results section header"""
|
|
79
|
+
if not self.quiet:
|
|
80
|
+
print(f"\n--- {title} ---")
|
|
81
|
+
|
|
82
|
+
def query_result(self, index: int, result: dict[str, Any]) -> None:
|
|
83
|
+
"""Output query result in formatted way"""
|
|
84
|
+
if not self.quiet:
|
|
85
|
+
print(
|
|
86
|
+
f"\n{index}. {result.get('capture_name', 'Unknown')} ({result.get('node_type', 'Unknown')})"
|
|
87
|
+
)
|
|
88
|
+
print(
|
|
89
|
+
f" Position: Line {result.get('start_line', '?')}-{result.get('end_line', '?')}"
|
|
90
|
+
)
|
|
91
|
+
if "content" in result:
|
|
92
|
+
print(f" Content:\n{result['content']}")
|
|
93
|
+
|
|
94
|
+
def analysis_summary(self, stats: dict[str, Any]) -> None:
|
|
95
|
+
"""Output analysis summary"""
|
|
96
|
+
# Always print human-readable stats to satisfy CLI expectations in tests
|
|
97
|
+
self.results_header("Statistics")
|
|
98
|
+
for key, value in stats.items():
|
|
99
|
+
print(f"{key}: {value}")
|
|
100
|
+
|
|
101
|
+
def language_list(
|
|
102
|
+
self, languages: list[str], title: str = "Supported Languages"
|
|
103
|
+
) -> None:
|
|
104
|
+
"""Output language list"""
|
|
105
|
+
if not self.quiet:
|
|
106
|
+
print(f"{title}:")
|
|
107
|
+
for lang in languages:
|
|
108
|
+
print(f" {lang}")
|
|
109
|
+
|
|
110
|
+
def query_list(self, queries: dict[str, str], language: str) -> None:
|
|
111
|
+
"""Output query list for a language"""
|
|
112
|
+
if not self.quiet:
|
|
113
|
+
print(f"Available query keys ({language}):")
|
|
114
|
+
for query_key, description in queries.items():
|
|
115
|
+
print(f" {query_key:<20} - {description}")
|
|
116
|
+
|
|
117
|
+
def extension_list(self, extensions: list[str]) -> None:
|
|
118
|
+
"""Output supported extensions"""
|
|
119
|
+
if not self.quiet:
|
|
120
|
+
print("Supported file extensions:")
|
|
121
|
+
# Use more efficient chunking
|
|
122
|
+
from itertools import islice
|
|
123
|
+
|
|
124
|
+
chunk_size = 10
|
|
125
|
+
for i in range(0, len(extensions), chunk_size):
|
|
126
|
+
chunk = list(islice(extensions, i, i + chunk_size))
|
|
127
|
+
print(f" {' '.join(chunk)}")
|
|
128
|
+
print(f"Total {len(extensions)} extensions supported")
|
|
129
|
+
|
|
130
|
+
def output_json(self, data: Any) -> None:
|
|
131
|
+
"""Output JSON data"""
|
|
132
|
+
print(json.dumps(data, indent=2, ensure_ascii=False))
|
|
133
|
+
|
|
134
|
+
def output_list(self, items: str | list[Any], title: str | None = None) -> None:
|
|
135
|
+
"""Output a list of items"""
|
|
136
|
+
if title and not self.quiet:
|
|
137
|
+
print(f"{title}:")
|
|
138
|
+
# 文字列が単一要素として渡された場合の処理
|
|
139
|
+
if isinstance(items, str):
|
|
140
|
+
items = [items]
|
|
141
|
+
for item in items:
|
|
142
|
+
if not self.quiet:
|
|
143
|
+
print(f" {item}")
|
|
144
|
+
|
|
145
|
+
def output_section(self, title: str) -> None:
|
|
146
|
+
"""Output a section header"""
|
|
147
|
+
if not self.quiet:
|
|
148
|
+
print(f"\n--- {title} ---")
|
|
149
|
+
|
|
150
|
+
def output_query_results(self, results: Any) -> None:
|
|
151
|
+
"""Output query results"""
|
|
152
|
+
self.data(results)
|
|
153
|
+
|
|
154
|
+
def output_statistics(self, stats: dict[str, Any]) -> None:
|
|
155
|
+
"""Output statistics"""
|
|
156
|
+
self.analysis_summary(stats)
|
|
157
|
+
|
|
158
|
+
def output_languages(self, languages: list[str]) -> None:
|
|
159
|
+
"""Output available languages"""
|
|
160
|
+
self.language_list(languages)
|
|
161
|
+
|
|
162
|
+
def output_queries(self, queries: list[str]) -> None:
|
|
163
|
+
"""Output available queries"""
|
|
164
|
+
query_dict = {q: f"Query {q}" for q in queries}
|
|
165
|
+
self.query_list(query_dict, "All")
|
|
166
|
+
|
|
167
|
+
def output_extensions(self, extensions: list[str]) -> None:
|
|
168
|
+
"""Output file extensions"""
|
|
169
|
+
self.extension_list(extensions)
|
|
170
|
+
|
|
171
|
+
def output_data(self, data: Any, format_type: str = "json") -> None:
|
|
172
|
+
"""Output data (alias for data)"""
|
|
173
|
+
self.data(data, format_type)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
# Default instance for backward compatibility
|
|
177
|
+
_output_manager = OutputManager()
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def set_output_mode(quiet: bool = False, json_output: bool = False) -> None:
|
|
181
|
+
"""Set global output mode"""
|
|
182
|
+
global _output_manager
|
|
183
|
+
_output_manager = OutputManager(quiet=quiet, json_output=json_output)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def get_output_manager() -> OutputManager:
|
|
187
|
+
"""Get current output manager"""
|
|
188
|
+
return _output_manager
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
# Convenience functions
|
|
192
|
+
def output_info(message: str) -> None:
|
|
193
|
+
"""Output info message"""
|
|
194
|
+
_output_manager.info(message)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def output_warning(message: str) -> None:
|
|
198
|
+
"""Output warning message"""
|
|
199
|
+
_output_manager.warning(message)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def output_error(message: str) -> None:
|
|
203
|
+
"""Output error message using the global output manager"""
|
|
204
|
+
_output_manager.error(message)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def output_success(message: str) -> None:
|
|
208
|
+
"""Output success message using the global output manager"""
|
|
209
|
+
_output_manager.success(message)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def output_json(data: Any) -> None:
|
|
213
|
+
"""Output JSON data using the global output manager"""
|
|
214
|
+
_output_manager.output_json(data)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def output_list(items: str | list[Any], title: str | None = None) -> None:
|
|
218
|
+
"""Output a list of items"""
|
|
219
|
+
_output_manager.output_list(items, title)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def output_section(title: str) -> None:
|
|
223
|
+
"""Output a section header"""
|
|
224
|
+
_output_manager.output_section(title)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def output_query_results(results: Any) -> None:
|
|
228
|
+
"""Output query results"""
|
|
229
|
+
_output_manager.output_query_results(results)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def output_statistics(stats: dict[str, Any]) -> None:
|
|
233
|
+
"""Output statistics"""
|
|
234
|
+
_output_manager.output_statistics(stats)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def output_languages(languages: list[str], title: str = "Supported Languages") -> None:
|
|
238
|
+
"""Output available languages"""
|
|
239
|
+
_output_manager.language_list(languages, title)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def output_queries(queries: list[str], language: str = "All") -> None:
|
|
243
|
+
"""Output available queries"""
|
|
244
|
+
query_dict = {q: f"Query {q}" for q in queries}
|
|
245
|
+
_output_manager.query_list(query_dict, language)
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def output_extensions(extensions: list[str]) -> None:
|
|
249
|
+
"""Output file extensions"""
|
|
250
|
+
_output_manager.output_extensions(extensions)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def output_data(data: Any, format_type: str = "json") -> None:
|
|
254
|
+
"""Output structured data"""
|
|
255
|
+
_output_manager.data(data, format_type)
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import re
|
|
3
|
+
from typing import Protocol, TypeVar
|
|
4
|
+
|
|
5
|
+
from tree_sitter_analyzer.models import (
|
|
6
|
+
SQLElement,
|
|
7
|
+
SQLElementType,
|
|
8
|
+
SQLFunction,
|
|
9
|
+
SQLTrigger,
|
|
10
|
+
SQLView,
|
|
11
|
+
)
|
|
12
|
+
from tree_sitter_analyzer.platform_compat.profiles import BehaviorProfile
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
T = TypeVar("T", bound=SQLElement)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AdaptationRule(Protocol):
|
|
20
|
+
"""Rule for adapting platform-specific behavior."""
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def rule_id(self) -> str:
|
|
24
|
+
"""Unique identifier for the rule."""
|
|
25
|
+
...
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def description(self) -> str:
|
|
29
|
+
"""Description of what the rule does."""
|
|
30
|
+
...
|
|
31
|
+
|
|
32
|
+
def apply(self, element: SQLElement, context: dict) -> SQLElement | None:
|
|
33
|
+
"""
|
|
34
|
+
Applies the rule to an element.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
element: The element to adapt.
|
|
38
|
+
context: Additional context (e.g. source code).
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
The adapted element, or None if the element should be removed.
|
|
42
|
+
Returns the original element if no changes are needed.
|
|
43
|
+
"""
|
|
44
|
+
...
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class CompatibilityAdapter:
|
|
48
|
+
"""Applies platform-specific adaptations to SQL parsing results."""
|
|
49
|
+
|
|
50
|
+
def __init__(self, profile: BehaviorProfile | None = None):
|
|
51
|
+
self.profile = profile
|
|
52
|
+
self.rules: list[AdaptationRule] = []
|
|
53
|
+
self._load_rules()
|
|
54
|
+
|
|
55
|
+
def _load_rules(self):
|
|
56
|
+
"""Loads adaptation rules based on the profile."""
|
|
57
|
+
# In a real implementation, we might load these dynamically or from a registry.
|
|
58
|
+
# For now, we'll hardcode the available rules and enable them based on the profile.
|
|
59
|
+
|
|
60
|
+
available_rules = {
|
|
61
|
+
"fix_function_name_keywords": FixFunctionNameKeywordsRule(),
|
|
62
|
+
"fix_trigger_name_description": FixTriggerNameDescriptionRule(),
|
|
63
|
+
"remove_phantom_triggers": RemovePhantomTriggersRule(),
|
|
64
|
+
"remove_phantom_functions": RemovePhantomFunctionsRule(),
|
|
65
|
+
"recover_views_from_errors": RecoverViewsFromErrorsRule(),
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if self.profile:
|
|
69
|
+
for rule_id in self.profile.adaptation_rules:
|
|
70
|
+
if rule_id in available_rules:
|
|
71
|
+
self.rules.append(available_rules[rule_id])
|
|
72
|
+
elif rule_id == "*":
|
|
73
|
+
# Wildcard: enable all rules (useful for testing or "safe mode")
|
|
74
|
+
self.rules = list(available_rules.values())
|
|
75
|
+
break
|
|
76
|
+
else:
|
|
77
|
+
# Default behavior: enable all safe recovery rules?
|
|
78
|
+
# Or maybe none? Let's enable all for now as they should be safe.
|
|
79
|
+
self.rules = list(available_rules.values())
|
|
80
|
+
|
|
81
|
+
def adapt_elements(
|
|
82
|
+
self, elements: list[SQLElement], source_code: str
|
|
83
|
+
) -> list[SQLElement]:
|
|
84
|
+
"""
|
|
85
|
+
Main entry point for adapting elements.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
elements: The list of extracted elements.
|
|
89
|
+
source_code: The original source code.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
The list of adapted elements.
|
|
93
|
+
"""
|
|
94
|
+
context = {"source_code": source_code}
|
|
95
|
+
adapted_elements = []
|
|
96
|
+
|
|
97
|
+
# First pass: apply rules to existing elements
|
|
98
|
+
for element in elements:
|
|
99
|
+
current_element = element
|
|
100
|
+
keep_element = True
|
|
101
|
+
|
|
102
|
+
for rule in self.rules:
|
|
103
|
+
result = rule.apply(current_element, context)
|
|
104
|
+
if result is None:
|
|
105
|
+
keep_element = False
|
|
106
|
+
break
|
|
107
|
+
current_element = result
|
|
108
|
+
|
|
109
|
+
if keep_element:
|
|
110
|
+
adapted_elements.append(current_element)
|
|
111
|
+
|
|
112
|
+
# Second pass: recover missing elements (if any rule supports it)
|
|
113
|
+
# Some rules might look at the source code and generate new elements
|
|
114
|
+
# For example, RecoverViewsFromErrorsRule might want to scan source code
|
|
115
|
+
# independent of existing elements.
|
|
116
|
+
# However, the current Protocol definition is element-centric.
|
|
117
|
+
# We might need a separate method for "generation" rules or handle it differently.
|
|
118
|
+
# For RecoverViewsFromErrorsRule, we can treat it as a rule that inspects
|
|
119
|
+
# "ERROR" elements if we had them, or we can just run it once.
|
|
120
|
+
|
|
121
|
+
# Let's add a special hook for generation
|
|
122
|
+
for rule in self.rules:
|
|
123
|
+
if hasattr(rule, "generate_elements"):
|
|
124
|
+
new_elements = rule.generate_elements(context)
|
|
125
|
+
adapted_elements.extend(new_elements)
|
|
126
|
+
|
|
127
|
+
return adapted_elements
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# --- Specific Rules ---
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
class FixFunctionNameKeywordsRule:
|
|
134
|
+
"""
|
|
135
|
+
Rule: fix_function_name_keywords
|
|
136
|
+
Detects when function name is a SQL keyword and recovers correct name from raw_text.
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
@property
|
|
140
|
+
def rule_id(self) -> str:
|
|
141
|
+
return "fix_function_name_keywords"
|
|
142
|
+
|
|
143
|
+
@property
|
|
144
|
+
def description(self) -> str:
|
|
145
|
+
return "Recover correct function name when keyword is extracted"
|
|
146
|
+
|
|
147
|
+
def apply(self, element: SQLElement, context: dict) -> SQLElement | None:
|
|
148
|
+
if not isinstance(element, SQLFunction):
|
|
149
|
+
return element
|
|
150
|
+
|
|
151
|
+
# List of keywords that might be incorrectly extracted as names
|
|
152
|
+
keywords = {
|
|
153
|
+
"FUNCTION",
|
|
154
|
+
"PROCEDURE",
|
|
155
|
+
"CREATE",
|
|
156
|
+
"OR",
|
|
157
|
+
"REPLACE",
|
|
158
|
+
"AUTO_INCREMENT",
|
|
159
|
+
"KEY",
|
|
160
|
+
"PRIMARY",
|
|
161
|
+
"FOREIGN",
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
# Check if name is a keyword OR if we should verify the name generally
|
|
165
|
+
# This covers cases where the name is just wrong (e.g. garbage) but not necessarily a keyword
|
|
166
|
+
should_fix = False
|
|
167
|
+
if element.name.upper() in keywords:
|
|
168
|
+
should_fix = True
|
|
169
|
+
else:
|
|
170
|
+
# General verification: check if the name matches what's in the CREATE statement
|
|
171
|
+
# If the extracted name doesn't match the regex-extracted name, we should fix it
|
|
172
|
+
match = re.search(r"FUNCTION\s+([\w]+)", element.raw_text, re.IGNORECASE)
|
|
173
|
+
if match:
|
|
174
|
+
correct_name = match.group(1)
|
|
175
|
+
if element.name != correct_name:
|
|
176
|
+
should_fix = True
|
|
177
|
+
|
|
178
|
+
if should_fix:
|
|
179
|
+
# Try to extract name from raw_text
|
|
180
|
+
# Pattern: CREATE [OR REPLACE] FUNCTION name ...
|
|
181
|
+
# Use \w+ to match unicode word characters
|
|
182
|
+
match = re.search(r"FUNCTION\s+([\w]+)", element.raw_text, re.IGNORECASE)
|
|
183
|
+
if match:
|
|
184
|
+
element.name = match.group(1)
|
|
185
|
+
|
|
186
|
+
return element
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class FixTriggerNameDescriptionRule:
|
|
190
|
+
"""
|
|
191
|
+
Rule: fix_trigger_name_description
|
|
192
|
+
Detects when trigger name is incorrectly set to "description".
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def rule_id(self) -> str:
|
|
197
|
+
return "fix_trigger_name_description"
|
|
198
|
+
|
|
199
|
+
@property
|
|
200
|
+
def description(self) -> str:
|
|
201
|
+
return "Recover correct trigger name when 'description' is extracted"
|
|
202
|
+
|
|
203
|
+
def apply(self, element: SQLElement, context: dict) -> SQLElement | None:
|
|
204
|
+
if not isinstance(element, SQLTrigger):
|
|
205
|
+
return element
|
|
206
|
+
|
|
207
|
+
if element.name.lower() == "description":
|
|
208
|
+
# Try to extract name from raw_text
|
|
209
|
+
# Pattern: CREATE TRIGGER name ...
|
|
210
|
+
# Use \w+ to match unicode word characters
|
|
211
|
+
match = re.search(r"TRIGGER\s+([\w]+)", element.raw_text, re.IGNORECASE)
|
|
212
|
+
if match:
|
|
213
|
+
element.name = match.group(1)
|
|
214
|
+
|
|
215
|
+
return element
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class RemovePhantomTriggersRule:
|
|
219
|
+
"""
|
|
220
|
+
Rule: remove_phantom_triggers
|
|
221
|
+
Detects elements where type doesn't match content (phantom triggers).
|
|
222
|
+
"""
|
|
223
|
+
|
|
224
|
+
@property
|
|
225
|
+
def rule_id(self) -> str:
|
|
226
|
+
return "remove_phantom_triggers"
|
|
227
|
+
|
|
228
|
+
@property
|
|
229
|
+
def description(self) -> str:
|
|
230
|
+
return "Remove phantom triggers with mismatched content"
|
|
231
|
+
|
|
232
|
+
def apply(self, element: SQLElement, context: dict) -> SQLElement | None:
|
|
233
|
+
if isinstance(element, SQLTrigger):
|
|
234
|
+
# Check if raw_text actually contains CREATE TRIGGER
|
|
235
|
+
# Phantom triggers often appear in comments or unrelated code
|
|
236
|
+
# Use regex to handle variable whitespace
|
|
237
|
+
if not re.search(r"CREATE\s+TRIGGER", element.raw_text, re.IGNORECASE):
|
|
238
|
+
# It might be a phantom
|
|
239
|
+
logger.debug(
|
|
240
|
+
f"Removing phantom trigger: {element.name} (raw_text: {element.raw_text[:50]}...)"
|
|
241
|
+
)
|
|
242
|
+
return None
|
|
243
|
+
return element
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
class RemovePhantomFunctionsRule:
|
|
247
|
+
"""
|
|
248
|
+
Rule: remove_phantom_functions
|
|
249
|
+
Detects elements where type doesn't match content (phantom functions).
|
|
250
|
+
"""
|
|
251
|
+
|
|
252
|
+
@property
|
|
253
|
+
def rule_id(self) -> str:
|
|
254
|
+
return "remove_phantom_functions"
|
|
255
|
+
|
|
256
|
+
@property
|
|
257
|
+
def description(self) -> str:
|
|
258
|
+
return "Remove phantom functions with mismatched content"
|
|
259
|
+
|
|
260
|
+
def apply(self, element: SQLElement, context: dict) -> SQLElement | None:
|
|
261
|
+
if isinstance(element, SQLFunction):
|
|
262
|
+
# Check if raw_text actually contains CREATE FUNCTION
|
|
263
|
+
# Phantom functions often appear in comments or unrelated code
|
|
264
|
+
# Use regex to handle variable whitespace
|
|
265
|
+
if not re.search(r"CREATE\s+FUNCTION", element.raw_text, re.IGNORECASE):
|
|
266
|
+
# It might be a phantom
|
|
267
|
+
return None
|
|
268
|
+
return element
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
class RecoverViewsFromErrorsRule:
|
|
272
|
+
"""
|
|
273
|
+
Rule: recover_views_from_errors
|
|
274
|
+
Scans source code for CREATE VIEW statements that might have been missed (e.g. in ERROR nodes).
|
|
275
|
+
"""
|
|
276
|
+
|
|
277
|
+
@property
|
|
278
|
+
def rule_id(self) -> str:
|
|
279
|
+
return "recover_views_from_errors"
|
|
280
|
+
|
|
281
|
+
@property
|
|
282
|
+
def description(self) -> str:
|
|
283
|
+
return "Recover views from ERROR nodes"
|
|
284
|
+
|
|
285
|
+
def apply(self, element: SQLElement, context: dict) -> SQLElement | None:
|
|
286
|
+
# This rule doesn't modify existing elements, it generates new ones.
|
|
287
|
+
return element
|
|
288
|
+
|
|
289
|
+
def generate_elements(self, context: dict) -> list[SQLElement]:
|
|
290
|
+
source_code = context.get("source_code", "")
|
|
291
|
+
new_elements = []
|
|
292
|
+
|
|
293
|
+
# Simple regex to find CREATE VIEW statements
|
|
294
|
+
# This is a fallback mechanism
|
|
295
|
+
# Use \w+ to match unicode word characters
|
|
296
|
+
# Updated to handle IF NOT EXISTS and multiline matching
|
|
297
|
+
view_pattern = re.compile(
|
|
298
|
+
r"^\s*CREATE\s+VIEW\s+(?:IF\s+NOT\s+EXISTS\s+)?([\w]+)\s+AS",
|
|
299
|
+
re.IGNORECASE | re.MULTILINE,
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
for match in view_pattern.finditer(source_code):
|
|
303
|
+
view_name = match.group(1)
|
|
304
|
+
# We need to check if this view is already extracted?
|
|
305
|
+
# For now, let's assume the caller handles duplicates or we just return it.
|
|
306
|
+
# Ideally we should check if 'view_name' is already in the elements list passed to adapt_elements.
|
|
307
|
+
# But generate_elements doesn't see the list.
|
|
308
|
+
# We'll return it and let the merger handle it, or we can improve the architecture.
|
|
309
|
+
|
|
310
|
+
# Calculate line number (rough approximation)
|
|
311
|
+
start_pos = match.start()
|
|
312
|
+
line_number = source_code.count("\n", 0, start_pos) + 1
|
|
313
|
+
|
|
314
|
+
view = SQLView(
|
|
315
|
+
name=view_name,
|
|
316
|
+
start_line=line_number,
|
|
317
|
+
end_line=line_number, # We don't know end line easily
|
|
318
|
+
raw_text=match.group(0), # Partial text
|
|
319
|
+
sql_element_type=SQLElementType.VIEW,
|
|
320
|
+
element_type="view",
|
|
321
|
+
)
|
|
322
|
+
new_elements.append(view)
|
|
323
|
+
|
|
324
|
+
return new_elements
|