tree-sitter-analyzer 1.9.17.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tree_sitter_analyzer/__init__.py +132 -0
- tree_sitter_analyzer/__main__.py +11 -0
- tree_sitter_analyzer/api.py +853 -0
- tree_sitter_analyzer/cli/__init__.py +39 -0
- tree_sitter_analyzer/cli/__main__.py +12 -0
- tree_sitter_analyzer/cli/argument_validator.py +89 -0
- tree_sitter_analyzer/cli/commands/__init__.py +26 -0
- tree_sitter_analyzer/cli/commands/advanced_command.py +226 -0
- tree_sitter_analyzer/cli/commands/base_command.py +181 -0
- tree_sitter_analyzer/cli/commands/default_command.py +18 -0
- tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +188 -0
- tree_sitter_analyzer/cli/commands/list_files_cli.py +133 -0
- tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -0
- tree_sitter_analyzer/cli/commands/query_command.py +109 -0
- tree_sitter_analyzer/cli/commands/search_content_cli.py +161 -0
- tree_sitter_analyzer/cli/commands/structure_command.py +156 -0
- tree_sitter_analyzer/cli/commands/summary_command.py +116 -0
- tree_sitter_analyzer/cli/commands/table_command.py +414 -0
- tree_sitter_analyzer/cli/info_commands.py +124 -0
- tree_sitter_analyzer/cli_main.py +472 -0
- tree_sitter_analyzer/constants.py +85 -0
- tree_sitter_analyzer/core/__init__.py +15 -0
- tree_sitter_analyzer/core/analysis_engine.py +580 -0
- tree_sitter_analyzer/core/cache_service.py +333 -0
- tree_sitter_analyzer/core/engine.py +585 -0
- tree_sitter_analyzer/core/parser.py +293 -0
- tree_sitter_analyzer/core/query.py +605 -0
- tree_sitter_analyzer/core/query_filter.py +200 -0
- tree_sitter_analyzer/core/query_service.py +340 -0
- tree_sitter_analyzer/encoding_utils.py +530 -0
- tree_sitter_analyzer/exceptions.py +747 -0
- tree_sitter_analyzer/file_handler.py +246 -0
- tree_sitter_analyzer/formatters/__init__.py +1 -0
- tree_sitter_analyzer/formatters/base_formatter.py +201 -0
- tree_sitter_analyzer/formatters/csharp_formatter.py +367 -0
- tree_sitter_analyzer/formatters/formatter_config.py +197 -0
- tree_sitter_analyzer/formatters/formatter_factory.py +84 -0
- tree_sitter_analyzer/formatters/formatter_registry.py +377 -0
- tree_sitter_analyzer/formatters/formatter_selector.py +96 -0
- tree_sitter_analyzer/formatters/go_formatter.py +368 -0
- tree_sitter_analyzer/formatters/html_formatter.py +498 -0
- tree_sitter_analyzer/formatters/java_formatter.py +423 -0
- tree_sitter_analyzer/formatters/javascript_formatter.py +611 -0
- tree_sitter_analyzer/formatters/kotlin_formatter.py +268 -0
- tree_sitter_analyzer/formatters/language_formatter_factory.py +123 -0
- tree_sitter_analyzer/formatters/legacy_formatter_adapters.py +228 -0
- tree_sitter_analyzer/formatters/markdown_formatter.py +725 -0
- tree_sitter_analyzer/formatters/php_formatter.py +301 -0
- tree_sitter_analyzer/formatters/python_formatter.py +830 -0
- tree_sitter_analyzer/formatters/ruby_formatter.py +278 -0
- tree_sitter_analyzer/formatters/rust_formatter.py +233 -0
- tree_sitter_analyzer/formatters/sql_formatter_wrapper.py +689 -0
- tree_sitter_analyzer/formatters/sql_formatters.py +536 -0
- tree_sitter_analyzer/formatters/typescript_formatter.py +543 -0
- tree_sitter_analyzer/formatters/yaml_formatter.py +462 -0
- tree_sitter_analyzer/interfaces/__init__.py +9 -0
- tree_sitter_analyzer/interfaces/cli.py +535 -0
- tree_sitter_analyzer/interfaces/cli_adapter.py +359 -0
- tree_sitter_analyzer/interfaces/mcp_adapter.py +224 -0
- tree_sitter_analyzer/interfaces/mcp_server.py +428 -0
- tree_sitter_analyzer/language_detector.py +553 -0
- tree_sitter_analyzer/language_loader.py +271 -0
- tree_sitter_analyzer/languages/__init__.py +10 -0
- tree_sitter_analyzer/languages/csharp_plugin.py +1076 -0
- tree_sitter_analyzer/languages/css_plugin.py +449 -0
- tree_sitter_analyzer/languages/go_plugin.py +836 -0
- tree_sitter_analyzer/languages/html_plugin.py +496 -0
- tree_sitter_analyzer/languages/java_plugin.py +1299 -0
- tree_sitter_analyzer/languages/javascript_plugin.py +1622 -0
- tree_sitter_analyzer/languages/kotlin_plugin.py +656 -0
- tree_sitter_analyzer/languages/markdown_plugin.py +1928 -0
- tree_sitter_analyzer/languages/php_plugin.py +862 -0
- tree_sitter_analyzer/languages/python_plugin.py +1636 -0
- tree_sitter_analyzer/languages/ruby_plugin.py +757 -0
- tree_sitter_analyzer/languages/rust_plugin.py +673 -0
- tree_sitter_analyzer/languages/sql_plugin.py +2444 -0
- tree_sitter_analyzer/languages/typescript_plugin.py +1892 -0
- tree_sitter_analyzer/languages/yaml_plugin.py +695 -0
- tree_sitter_analyzer/legacy_table_formatter.py +860 -0
- tree_sitter_analyzer/mcp/__init__.py +34 -0
- tree_sitter_analyzer/mcp/resources/__init__.py +43 -0
- tree_sitter_analyzer/mcp/resources/code_file_resource.py +208 -0
- tree_sitter_analyzer/mcp/resources/project_stats_resource.py +586 -0
- tree_sitter_analyzer/mcp/server.py +869 -0
- tree_sitter_analyzer/mcp/tools/__init__.py +28 -0
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +779 -0
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +291 -0
- tree_sitter_analyzer/mcp/tools/base_tool.py +139 -0
- tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +816 -0
- tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +686 -0
- tree_sitter_analyzer/mcp/tools/list_files_tool.py +413 -0
- tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
- tree_sitter_analyzer/mcp/tools/query_tool.py +443 -0
- tree_sitter_analyzer/mcp/tools/read_partial_tool.py +464 -0
- tree_sitter_analyzer/mcp/tools/search_content_tool.py +836 -0
- tree_sitter_analyzer/mcp/tools/table_format_tool.py +572 -0
- tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +653 -0
- tree_sitter_analyzer/mcp/utils/__init__.py +113 -0
- tree_sitter_analyzer/mcp/utils/error_handler.py +569 -0
- tree_sitter_analyzer/mcp/utils/file_output_factory.py +217 -0
- tree_sitter_analyzer/mcp/utils/file_output_manager.py +322 -0
- tree_sitter_analyzer/mcp/utils/gitignore_detector.py +358 -0
- tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -0
- tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
- tree_sitter_analyzer/models.py +840 -0
- tree_sitter_analyzer/mypy_current_errors.txt +2 -0
- tree_sitter_analyzer/output_manager.py +255 -0
- tree_sitter_analyzer/platform_compat/__init__.py +3 -0
- tree_sitter_analyzer/platform_compat/adapter.py +324 -0
- tree_sitter_analyzer/platform_compat/compare.py +224 -0
- tree_sitter_analyzer/platform_compat/detector.py +67 -0
- tree_sitter_analyzer/platform_compat/fixtures.py +228 -0
- tree_sitter_analyzer/platform_compat/profiles.py +217 -0
- tree_sitter_analyzer/platform_compat/record.py +55 -0
- tree_sitter_analyzer/platform_compat/recorder.py +155 -0
- tree_sitter_analyzer/platform_compat/report.py +92 -0
- tree_sitter_analyzer/plugins/__init__.py +280 -0
- tree_sitter_analyzer/plugins/base.py +647 -0
- tree_sitter_analyzer/plugins/manager.py +384 -0
- tree_sitter_analyzer/project_detector.py +328 -0
- tree_sitter_analyzer/queries/__init__.py +27 -0
- tree_sitter_analyzer/queries/csharp.py +216 -0
- tree_sitter_analyzer/queries/css.py +615 -0
- tree_sitter_analyzer/queries/go.py +275 -0
- tree_sitter_analyzer/queries/html.py +543 -0
- tree_sitter_analyzer/queries/java.py +402 -0
- tree_sitter_analyzer/queries/javascript.py +724 -0
- tree_sitter_analyzer/queries/kotlin.py +192 -0
- tree_sitter_analyzer/queries/markdown.py +258 -0
- tree_sitter_analyzer/queries/php.py +95 -0
- tree_sitter_analyzer/queries/python.py +859 -0
- tree_sitter_analyzer/queries/ruby.py +92 -0
- tree_sitter_analyzer/queries/rust.py +223 -0
- tree_sitter_analyzer/queries/sql.py +555 -0
- tree_sitter_analyzer/queries/typescript.py +871 -0
- tree_sitter_analyzer/queries/yaml.py +236 -0
- tree_sitter_analyzer/query_loader.py +272 -0
- tree_sitter_analyzer/security/__init__.py +22 -0
- tree_sitter_analyzer/security/boundary_manager.py +277 -0
- tree_sitter_analyzer/security/regex_checker.py +297 -0
- tree_sitter_analyzer/security/validator.py +599 -0
- tree_sitter_analyzer/table_formatter.py +782 -0
- tree_sitter_analyzer/utils/__init__.py +53 -0
- tree_sitter_analyzer/utils/logging.py +433 -0
- tree_sitter_analyzer/utils/tree_sitter_compat.py +289 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/METADATA +485 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/RECORD +149 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/WHEEL +4 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/entry_points.txt +25 -0
|
@@ -0,0 +1,1636 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Python Language Plugin
|
|
4
|
+
|
|
5
|
+
Enhanced Python-specific parsing and element extraction functionality.
|
|
6
|
+
Provides comprehensive support for modern Python features including async/await,
|
|
7
|
+
decorators, type hints, context managers, and framework-specific patterns.
|
|
8
|
+
Equivalent to JavaScript plugin capabilities for consistent language support.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
import tree_sitter
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
import tree_sitter
|
|
18
|
+
|
|
19
|
+
TREE_SITTER_AVAILABLE = True
|
|
20
|
+
except ImportError:
|
|
21
|
+
TREE_SITTER_AVAILABLE = False
|
|
22
|
+
|
|
23
|
+
from ..core.analysis_engine import AnalysisRequest
|
|
24
|
+
from ..encoding_utils import extract_text_slice, safe_encode
|
|
25
|
+
from ..models import AnalysisResult, Class, CodeElement, Function, Import, Variable
|
|
26
|
+
from ..plugins.base import ElementExtractor, LanguagePlugin
|
|
27
|
+
from ..utils import log_debug, log_error, log_warning
|
|
28
|
+
from ..utils.tree_sitter_compat import TreeSitterQueryCompat
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class PythonElementExtractor(ElementExtractor):
|
|
32
|
+
"""Enhanced Python-specific element extractor with comprehensive feature support"""
|
|
33
|
+
|
|
34
|
+
def __init__(self) -> None:
|
|
35
|
+
"""Initialize the Python element extractor."""
|
|
36
|
+
self.current_module: str = ""
|
|
37
|
+
self.current_file: str = ""
|
|
38
|
+
self.source_code: str = ""
|
|
39
|
+
self.content_lines: list[str] = []
|
|
40
|
+
self.imports: list[str] = []
|
|
41
|
+
self.exports: list[dict[str, Any]] = []
|
|
42
|
+
|
|
43
|
+
# Performance optimization caches
|
|
44
|
+
self._node_text_cache: dict[int, str] = {}
|
|
45
|
+
self._processed_nodes: set[int] = set()
|
|
46
|
+
self._element_cache: dict[tuple[int, str], Any] = {}
|
|
47
|
+
self._file_encoding: str | None = None
|
|
48
|
+
self._docstring_cache: dict[int, str] = {}
|
|
49
|
+
self._complexity_cache: dict[int, int] = {}
|
|
50
|
+
|
|
51
|
+
# Python-specific tracking
|
|
52
|
+
self.is_module: bool = False
|
|
53
|
+
self.framework_type: str = "" # django, flask, fastapi, etc.
|
|
54
|
+
self.python_version: str = "3.8" # default
|
|
55
|
+
|
|
56
|
+
def extract_functions(
|
|
57
|
+
self, tree: "tree_sitter.Tree", source_code: str
|
|
58
|
+
) -> list[Function]:
|
|
59
|
+
"""Extract Python function definitions with comprehensive details"""
|
|
60
|
+
self.source_code = source_code or ""
|
|
61
|
+
self.content_lines = self.source_code.split("\n")
|
|
62
|
+
self._reset_caches()
|
|
63
|
+
self._detect_file_characteristics()
|
|
64
|
+
|
|
65
|
+
functions: list[Function] = []
|
|
66
|
+
|
|
67
|
+
# Use optimized traversal for multiple function types
|
|
68
|
+
extractors = {
|
|
69
|
+
"function_definition": self._extract_function_optimized,
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if tree is not None and tree.root_node is not None:
|
|
73
|
+
try:
|
|
74
|
+
self._traverse_and_extract_iterative(
|
|
75
|
+
tree.root_node, extractors, functions, "function"
|
|
76
|
+
)
|
|
77
|
+
log_debug(f"Extracted {len(functions)} Python functions")
|
|
78
|
+
except Exception as e:
|
|
79
|
+
log_debug(f"Error during function extraction: {e}")
|
|
80
|
+
return []
|
|
81
|
+
|
|
82
|
+
return functions
|
|
83
|
+
|
|
84
|
+
def extract_classes(
|
|
85
|
+
self, tree: "tree_sitter.Tree", source_code: str
|
|
86
|
+
) -> list[Class]:
|
|
87
|
+
"""Extract Python class definitions with detailed information"""
|
|
88
|
+
self.source_code = source_code or ""
|
|
89
|
+
self.content_lines = self.source_code.split("\n")
|
|
90
|
+
self._reset_caches()
|
|
91
|
+
|
|
92
|
+
classes: list[Class] = []
|
|
93
|
+
|
|
94
|
+
# Extract class declarations
|
|
95
|
+
extractors = {
|
|
96
|
+
"class_definition": self._extract_class_optimized,
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if tree is not None and tree.root_node is not None:
|
|
100
|
+
try:
|
|
101
|
+
self._traverse_and_extract_iterative(
|
|
102
|
+
tree.root_node, extractors, classes, "class"
|
|
103
|
+
)
|
|
104
|
+
log_debug(f"Extracted {len(classes)} Python classes")
|
|
105
|
+
except Exception as e:
|
|
106
|
+
log_debug(f"Error during class extraction: {e}")
|
|
107
|
+
return []
|
|
108
|
+
|
|
109
|
+
return classes
|
|
110
|
+
|
|
111
|
+
def extract_variables(
|
|
112
|
+
self, tree: "tree_sitter.Tree", source_code: str
|
|
113
|
+
) -> list[Variable]:
|
|
114
|
+
"""Extract Python variable definitions (class attributes only)"""
|
|
115
|
+
variables: list[Variable] = []
|
|
116
|
+
|
|
117
|
+
# Only extract class-level attributes, not function-level variables
|
|
118
|
+
try:
|
|
119
|
+
# Find class declarations using compatible API
|
|
120
|
+
class_query = """
|
|
121
|
+
(class_definition
|
|
122
|
+
body: (block) @class.body) @class.definition
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
language = tree.language if hasattr(tree, "language") else None
|
|
126
|
+
if language:
|
|
127
|
+
try:
|
|
128
|
+
captures = TreeSitterQueryCompat.safe_execute_query(
|
|
129
|
+
language, class_query, tree.root_node, fallback_result=[]
|
|
130
|
+
)
|
|
131
|
+
class_bodies = []
|
|
132
|
+
for node, capture_name in captures:
|
|
133
|
+
if capture_name == "class.body":
|
|
134
|
+
class_bodies.append(node)
|
|
135
|
+
except Exception as e:
|
|
136
|
+
log_debug(
|
|
137
|
+
f"Could not extract Python class attributes using query: {e}"
|
|
138
|
+
)
|
|
139
|
+
class_bodies = []
|
|
140
|
+
|
|
141
|
+
# For each class body, extract attribute assignments
|
|
142
|
+
for class_body in class_bodies:
|
|
143
|
+
variables.extend(
|
|
144
|
+
self._extract_class_attributes(class_body, source_code)
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
except Exception as e:
|
|
148
|
+
log_warning(f"Could not extract Python class attributes: {e}")
|
|
149
|
+
|
|
150
|
+
return variables
|
|
151
|
+
|
|
152
|
+
def _reset_caches(self) -> None:
|
|
153
|
+
"""Reset performance caches"""
|
|
154
|
+
self._node_text_cache.clear()
|
|
155
|
+
self._processed_nodes.clear()
|
|
156
|
+
self._element_cache.clear()
|
|
157
|
+
self._docstring_cache.clear()
|
|
158
|
+
self._complexity_cache.clear()
|
|
159
|
+
|
|
160
|
+
def _detect_file_characteristics(self) -> None:
|
|
161
|
+
"""Detect Python file characteristics"""
|
|
162
|
+
# Check if it's a module
|
|
163
|
+
self.is_module = "import " in self.source_code or "from " in self.source_code
|
|
164
|
+
|
|
165
|
+
# Reset framework type
|
|
166
|
+
self.framework_type = ""
|
|
167
|
+
|
|
168
|
+
# Detect framework (case-sensitive)
|
|
169
|
+
if "django" in self.source_code or "from django" in self.source_code:
|
|
170
|
+
self.framework_type = "django"
|
|
171
|
+
elif "flask" in self.source_code or "from flask" in self.source_code:
|
|
172
|
+
self.framework_type = "flask"
|
|
173
|
+
elif "fastapi" in self.source_code or "from fastapi" in self.source_code:
|
|
174
|
+
self.framework_type = "fastapi"
|
|
175
|
+
|
|
176
|
+
def _traverse_and_extract_iterative(
|
|
177
|
+
self,
|
|
178
|
+
root_node: Optional["tree_sitter.Node"],
|
|
179
|
+
extractors: dict[str, Any],
|
|
180
|
+
results: list[Any],
|
|
181
|
+
element_type: str,
|
|
182
|
+
) -> None:
|
|
183
|
+
"""Iterative node traversal and extraction with caching"""
|
|
184
|
+
if not root_node:
|
|
185
|
+
return
|
|
186
|
+
|
|
187
|
+
target_node_types = set(extractors.keys())
|
|
188
|
+
container_node_types = {
|
|
189
|
+
"module",
|
|
190
|
+
"class_definition",
|
|
191
|
+
"function_definition",
|
|
192
|
+
"if_statement",
|
|
193
|
+
"for_statement",
|
|
194
|
+
"while_statement",
|
|
195
|
+
"with_statement",
|
|
196
|
+
"try_statement",
|
|
197
|
+
"block",
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
node_stack = [(root_node, 0)]
|
|
201
|
+
processed_nodes = 0
|
|
202
|
+
max_depth = 50
|
|
203
|
+
|
|
204
|
+
while node_stack:
|
|
205
|
+
current_node, depth = node_stack.pop()
|
|
206
|
+
|
|
207
|
+
if depth > max_depth:
|
|
208
|
+
log_warning(f"Maximum traversal depth ({max_depth}) exceeded")
|
|
209
|
+
continue
|
|
210
|
+
|
|
211
|
+
processed_nodes += 1
|
|
212
|
+
node_type = current_node.type
|
|
213
|
+
|
|
214
|
+
# Early termination for irrelevant nodes
|
|
215
|
+
if (
|
|
216
|
+
depth > 0
|
|
217
|
+
and node_type not in target_node_types
|
|
218
|
+
and node_type not in container_node_types
|
|
219
|
+
):
|
|
220
|
+
continue
|
|
221
|
+
|
|
222
|
+
# Process target nodes
|
|
223
|
+
if node_type in target_node_types:
|
|
224
|
+
node_id = id(current_node)
|
|
225
|
+
|
|
226
|
+
if node_id in self._processed_nodes:
|
|
227
|
+
continue
|
|
228
|
+
|
|
229
|
+
cache_key = (node_id, element_type)
|
|
230
|
+
if cache_key in self._element_cache:
|
|
231
|
+
element = self._element_cache[cache_key]
|
|
232
|
+
if element:
|
|
233
|
+
if isinstance(element, list):
|
|
234
|
+
results.extend(element)
|
|
235
|
+
else:
|
|
236
|
+
results.append(element)
|
|
237
|
+
self._processed_nodes.add(node_id)
|
|
238
|
+
continue
|
|
239
|
+
|
|
240
|
+
# Extract and cache
|
|
241
|
+
extractor = extractors.get(node_type)
|
|
242
|
+
if extractor:
|
|
243
|
+
try:
|
|
244
|
+
element = extractor(current_node)
|
|
245
|
+
self._element_cache[cache_key] = element
|
|
246
|
+
if element:
|
|
247
|
+
if isinstance(element, list):
|
|
248
|
+
results.extend(element)
|
|
249
|
+
else:
|
|
250
|
+
results.append(element)
|
|
251
|
+
self._processed_nodes.add(node_id)
|
|
252
|
+
except Exception:
|
|
253
|
+
# Skip nodes that cause extraction errors
|
|
254
|
+
self._processed_nodes.add(node_id)
|
|
255
|
+
|
|
256
|
+
# Add children to stack
|
|
257
|
+
if current_node.children:
|
|
258
|
+
try:
|
|
259
|
+
# Try to reverse children for proper traversal order
|
|
260
|
+
children_list = list(current_node.children)
|
|
261
|
+
children_iter = reversed(children_list)
|
|
262
|
+
except TypeError:
|
|
263
|
+
# Fallback for Mock objects or other non-reversible types
|
|
264
|
+
try:
|
|
265
|
+
children_list = list(current_node.children)
|
|
266
|
+
children_iter = iter(children_list) # type: ignore
|
|
267
|
+
except TypeError:
|
|
268
|
+
# If children is not iterable, skip
|
|
269
|
+
children_iter = iter([]) # type: ignore
|
|
270
|
+
|
|
271
|
+
for child in children_iter:
|
|
272
|
+
node_stack.append((child, depth + 1))
|
|
273
|
+
|
|
274
|
+
log_debug(f"Iterative traversal processed {processed_nodes} nodes")
|
|
275
|
+
|
|
276
|
+
def _get_node_text_optimized(self, node: "tree_sitter.Node") -> str:
|
|
277
|
+
"""Get node text with optimized caching"""
|
|
278
|
+
node_id = id(node)
|
|
279
|
+
|
|
280
|
+
if node_id in self._node_text_cache:
|
|
281
|
+
return self._node_text_cache[node_id]
|
|
282
|
+
|
|
283
|
+
try:
|
|
284
|
+
start_byte = node.start_byte
|
|
285
|
+
end_byte = node.end_byte
|
|
286
|
+
|
|
287
|
+
encoding = self._file_encoding or "utf-8"
|
|
288
|
+
content_bytes = safe_encode("\n".join(self.content_lines), encoding)
|
|
289
|
+
text = extract_text_slice(content_bytes, start_byte, end_byte, encoding)
|
|
290
|
+
|
|
291
|
+
# If byte extraction returns empty string, try fallback
|
|
292
|
+
if text:
|
|
293
|
+
self._node_text_cache[node_id] = text
|
|
294
|
+
return text
|
|
295
|
+
except Exception as e:
|
|
296
|
+
log_error(f"Error in _get_node_text_optimized: {e}")
|
|
297
|
+
|
|
298
|
+
# Fallback to simple text extraction
|
|
299
|
+
try:
|
|
300
|
+
start_point = node.start_point
|
|
301
|
+
end_point = node.end_point
|
|
302
|
+
|
|
303
|
+
# Validate points are within bounds
|
|
304
|
+
if start_point[0] < 0 or start_point[0] >= len(self.content_lines):
|
|
305
|
+
return ""
|
|
306
|
+
|
|
307
|
+
if end_point[0] < 0 or end_point[0] >= len(self.content_lines):
|
|
308
|
+
return ""
|
|
309
|
+
|
|
310
|
+
if start_point[0] == end_point[0]:
|
|
311
|
+
line = self.content_lines[start_point[0]]
|
|
312
|
+
# Ensure column indices are within line bounds
|
|
313
|
+
start_col = max(0, min(start_point[1], len(line)))
|
|
314
|
+
end_col = max(start_col, min(end_point[1], len(line)))
|
|
315
|
+
result: str = line[start_col:end_col]
|
|
316
|
+
self._node_text_cache[node_id] = result
|
|
317
|
+
return result
|
|
318
|
+
else:
|
|
319
|
+
lines = []
|
|
320
|
+
for i in range(start_point[0], end_point[0] + 1):
|
|
321
|
+
if i < len(self.content_lines):
|
|
322
|
+
line = self.content_lines[i]
|
|
323
|
+
if i == start_point[0]:
|
|
324
|
+
start_col = max(0, min(start_point[1], len(line)))
|
|
325
|
+
lines.append(line[start_col:])
|
|
326
|
+
elif i == end_point[0]:
|
|
327
|
+
end_col = max(0, min(end_point[1], len(line)))
|
|
328
|
+
lines.append(line[:end_col])
|
|
329
|
+
else:
|
|
330
|
+
lines.append(line)
|
|
331
|
+
result = "\n".join(lines)
|
|
332
|
+
self._node_text_cache[node_id] = result
|
|
333
|
+
return result
|
|
334
|
+
except Exception as fallback_error:
|
|
335
|
+
log_error(f"Fallback text extraction also failed: {fallback_error}")
|
|
336
|
+
return ""
|
|
337
|
+
|
|
338
|
+
def _extract_function_optimized(self, node: "tree_sitter.Node") -> Function | None:
|
|
339
|
+
"""Extract function information with detailed metadata"""
|
|
340
|
+
try:
|
|
341
|
+
start_line = node.start_point[0] + 1
|
|
342
|
+
end_line = node.end_point[0] + 1
|
|
343
|
+
|
|
344
|
+
# Extract function details
|
|
345
|
+
function_info = self._parse_function_signature_optimized(node)
|
|
346
|
+
if not function_info:
|
|
347
|
+
return None
|
|
348
|
+
|
|
349
|
+
name, parameters, is_async, decorators, return_type = function_info
|
|
350
|
+
|
|
351
|
+
# Extract docstring
|
|
352
|
+
docstring = self._extract_docstring_for_line(start_line)
|
|
353
|
+
|
|
354
|
+
# Calculate complexity
|
|
355
|
+
complexity_score = self._calculate_complexity_optimized(node)
|
|
356
|
+
|
|
357
|
+
# Extract raw text
|
|
358
|
+
start_line_idx = max(0, start_line - 1)
|
|
359
|
+
end_line_idx = min(len(self.content_lines), end_line)
|
|
360
|
+
raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
|
|
361
|
+
|
|
362
|
+
# Determine visibility (Python conventions)
|
|
363
|
+
visibility = "public"
|
|
364
|
+
if name.startswith("__") and name.endswith("__"):
|
|
365
|
+
visibility = "magic" # Magic methods
|
|
366
|
+
elif name.startswith("_"):
|
|
367
|
+
visibility = "private"
|
|
368
|
+
|
|
369
|
+
return Function(
|
|
370
|
+
name=name,
|
|
371
|
+
start_line=start_line,
|
|
372
|
+
end_line=end_line,
|
|
373
|
+
raw_text=raw_text,
|
|
374
|
+
language="python",
|
|
375
|
+
parameters=parameters,
|
|
376
|
+
return_type=return_type or "Any",
|
|
377
|
+
is_async=is_async,
|
|
378
|
+
is_generator="yield" in raw_text,
|
|
379
|
+
docstring=docstring,
|
|
380
|
+
complexity_score=complexity_score,
|
|
381
|
+
modifiers=decorators,
|
|
382
|
+
is_static="staticmethod" in decorators,
|
|
383
|
+
is_staticmethod="staticmethod" in decorators,
|
|
384
|
+
is_private=visibility == "private",
|
|
385
|
+
is_public=visibility == "public",
|
|
386
|
+
# Python-specific properties
|
|
387
|
+
framework_type=self.framework_type,
|
|
388
|
+
is_property="property" in decorators,
|
|
389
|
+
is_classmethod="classmethod" in decorators,
|
|
390
|
+
)
|
|
391
|
+
except Exception as e:
|
|
392
|
+
log_error(f"Failed to extract function info: {e}")
|
|
393
|
+
import traceback
|
|
394
|
+
|
|
395
|
+
traceback.print_exc()
|
|
396
|
+
return None
|
|
397
|
+
|
|
398
|
+
def _parse_function_signature_optimized(
|
|
399
|
+
self, node: "tree_sitter.Node"
|
|
400
|
+
) -> tuple[str, list[str], bool, list[str], str | None] | None:
|
|
401
|
+
"""Parse function signature for Python functions"""
|
|
402
|
+
try:
|
|
403
|
+
name = None
|
|
404
|
+
parameters = []
|
|
405
|
+
is_async = False
|
|
406
|
+
decorators = []
|
|
407
|
+
return_type = None
|
|
408
|
+
|
|
409
|
+
# Check for async keyword
|
|
410
|
+
node_text = self._get_node_text_optimized(node)
|
|
411
|
+
is_async = node_text.strip().startswith("async def")
|
|
412
|
+
|
|
413
|
+
# Extract return type from function signature text
|
|
414
|
+
if "->" in node_text:
|
|
415
|
+
# Split by '->' and extract return type
|
|
416
|
+
parts = node_text.split("->")
|
|
417
|
+
if len(parts) > 1:
|
|
418
|
+
# Get everything after '->' and before ':'
|
|
419
|
+
return_part = parts[1].split(":")[0].strip()
|
|
420
|
+
# Clean up the return type
|
|
421
|
+
return_type = return_part.replace("\n", " ").strip()
|
|
422
|
+
# Don't use decorator names as return types
|
|
423
|
+
if (
|
|
424
|
+
return_type
|
|
425
|
+
and not return_type.startswith("@")
|
|
426
|
+
and return_type != "dataclass"
|
|
427
|
+
):
|
|
428
|
+
# Additional validation - ensure it's a valid type annotation
|
|
429
|
+
if not any(
|
|
430
|
+
invalid in return_type
|
|
431
|
+
for invalid in ["def ", "class ", "import "]
|
|
432
|
+
):
|
|
433
|
+
pass # Keep the return_type
|
|
434
|
+
else:
|
|
435
|
+
return_type = None
|
|
436
|
+
|
|
437
|
+
# Extract decorators from preceding siblings
|
|
438
|
+
if node.parent:
|
|
439
|
+
for sibling in node.parent.children:
|
|
440
|
+
if sibling.type == "decorated_definition":
|
|
441
|
+
for child in sibling.children:
|
|
442
|
+
if child.type == "decorator":
|
|
443
|
+
decorator_text = self._get_node_text_optimized(child)
|
|
444
|
+
if decorator_text.startswith("@"):
|
|
445
|
+
decorator_text = decorator_text[1:].strip()
|
|
446
|
+
decorators.append(decorator_text)
|
|
447
|
+
|
|
448
|
+
for child in node.children:
|
|
449
|
+
if child.type == "identifier":
|
|
450
|
+
name = child.text.decode("utf8") if child.text else None
|
|
451
|
+
elif child.type == "parameters":
|
|
452
|
+
parameters = self._extract_parameters_from_node_optimized(child)
|
|
453
|
+
elif child.type == "type" and not return_type:
|
|
454
|
+
# Only use this if we didn't extract from text
|
|
455
|
+
type_text = self._get_node_text_optimized(child)
|
|
456
|
+
if (
|
|
457
|
+
type_text
|
|
458
|
+
and not type_text.startswith("@")
|
|
459
|
+
and type_text != "dataclass"
|
|
460
|
+
):
|
|
461
|
+
return_type = type_text
|
|
462
|
+
|
|
463
|
+
return name or "", parameters, is_async, decorators, return_type
|
|
464
|
+
except Exception:
|
|
465
|
+
return None
|
|
466
|
+
|
|
467
|
+
def _extract_parameters_from_node_optimized(
|
|
468
|
+
self, params_node: "tree_sitter.Node"
|
|
469
|
+
) -> list[str]:
|
|
470
|
+
"""Extract function parameters with type hints"""
|
|
471
|
+
parameters = []
|
|
472
|
+
|
|
473
|
+
for child in params_node.children:
|
|
474
|
+
if child.type == "identifier":
|
|
475
|
+
param_name = self._get_node_text_optimized(child)
|
|
476
|
+
parameters.append(param_name)
|
|
477
|
+
elif child.type == "typed_parameter":
|
|
478
|
+
# Handle typed parameters
|
|
479
|
+
param_text = self._get_node_text_optimized(child)
|
|
480
|
+
parameters.append(param_text)
|
|
481
|
+
elif child.type == "default_parameter":
|
|
482
|
+
# Handle default parameters
|
|
483
|
+
param_text = self._get_node_text_optimized(child)
|
|
484
|
+
parameters.append(param_text)
|
|
485
|
+
elif child.type == "list_splat_pattern":
|
|
486
|
+
# Handle *args
|
|
487
|
+
param_text = self._get_node_text_optimized(child)
|
|
488
|
+
parameters.append(param_text)
|
|
489
|
+
elif child.type == "dictionary_splat_pattern":
|
|
490
|
+
# Handle **kwargs
|
|
491
|
+
param_text = self._get_node_text_optimized(child)
|
|
492
|
+
parameters.append(param_text)
|
|
493
|
+
|
|
494
|
+
return parameters
|
|
495
|
+
|
|
496
|
+
def _extract_docstring_for_line(self, target_line: int) -> str | None:
|
|
497
|
+
"""Extract docstring for the specified line"""
|
|
498
|
+
if target_line in self._docstring_cache:
|
|
499
|
+
return self._docstring_cache[target_line]
|
|
500
|
+
|
|
501
|
+
try:
|
|
502
|
+
if not self.content_lines or target_line >= len(self.content_lines):
|
|
503
|
+
return None
|
|
504
|
+
|
|
505
|
+
# Look for docstring in the next few lines after function definition
|
|
506
|
+
for i in range(target_line, min(target_line + 5, len(self.content_lines))):
|
|
507
|
+
line = self.content_lines[i].strip()
|
|
508
|
+
if line.startswith('"""') or line.startswith("'''"):
|
|
509
|
+
# Found docstring start
|
|
510
|
+
quote_type = '"""' if line.startswith('"""') else "'''"
|
|
511
|
+
docstring_lines = []
|
|
512
|
+
|
|
513
|
+
# Single line docstring
|
|
514
|
+
if line.count(quote_type) >= 2:
|
|
515
|
+
docstring = line.replace(quote_type, "").strip()
|
|
516
|
+
self._docstring_cache[target_line] = docstring
|
|
517
|
+
return docstring
|
|
518
|
+
|
|
519
|
+
# Multi-line docstring
|
|
520
|
+
docstring_lines.append(line.replace(quote_type, ""))
|
|
521
|
+
found_closing_quote = False
|
|
522
|
+
for j in range(i + 1, len(self.content_lines)):
|
|
523
|
+
next_line = self.content_lines[j]
|
|
524
|
+
if quote_type in next_line:
|
|
525
|
+
docstring_lines.append(next_line.replace(quote_type, ""))
|
|
526
|
+
found_closing_quote = True
|
|
527
|
+
break
|
|
528
|
+
docstring_lines.append(next_line)
|
|
529
|
+
|
|
530
|
+
if not found_closing_quote:
|
|
531
|
+
self._docstring_cache[target_line] = ""
|
|
532
|
+
return None
|
|
533
|
+
|
|
534
|
+
# Join preserving formatting and add leading newline for multi-line
|
|
535
|
+
docstring = "\n".join(docstring_lines)
|
|
536
|
+
# Add leading newline for multi-line docstrings to match expected format
|
|
537
|
+
if not docstring.startswith("\n"):
|
|
538
|
+
docstring = "\n" + docstring
|
|
539
|
+
self._docstring_cache[target_line] = docstring
|
|
540
|
+
return docstring
|
|
541
|
+
|
|
542
|
+
self._docstring_cache[target_line] = ""
|
|
543
|
+
return None
|
|
544
|
+
|
|
545
|
+
except Exception as e:
|
|
546
|
+
log_debug(f"Failed to extract docstring: {e}")
|
|
547
|
+
return None
|
|
548
|
+
|
|
549
|
+
def _calculate_complexity_optimized(self, node: "tree_sitter.Node") -> int:
|
|
550
|
+
"""Calculate cyclomatic complexity efficiently"""
|
|
551
|
+
import re
|
|
552
|
+
|
|
553
|
+
node_id = id(node)
|
|
554
|
+
if node_id in self._complexity_cache:
|
|
555
|
+
return self._complexity_cache[node_id]
|
|
556
|
+
|
|
557
|
+
complexity = 1
|
|
558
|
+
try:
|
|
559
|
+
node_text = self._get_node_text_optimized(node).lower()
|
|
560
|
+
keywords = [
|
|
561
|
+
"if",
|
|
562
|
+
"elif",
|
|
563
|
+
"while",
|
|
564
|
+
"for",
|
|
565
|
+
"except",
|
|
566
|
+
"and",
|
|
567
|
+
"or",
|
|
568
|
+
"with",
|
|
569
|
+
"match",
|
|
570
|
+
"case",
|
|
571
|
+
]
|
|
572
|
+
for keyword in keywords:
|
|
573
|
+
# More flexible keyword matching
|
|
574
|
+
pattern = rf"\b{keyword}\b"
|
|
575
|
+
matches = re.findall(pattern, node_text)
|
|
576
|
+
complexity += len(matches)
|
|
577
|
+
except Exception as e:
|
|
578
|
+
log_debug(f"Failed to calculate complexity: {e}")
|
|
579
|
+
|
|
580
|
+
self._complexity_cache[node_id] = complexity
|
|
581
|
+
return complexity
|
|
582
|
+
|
|
583
|
+
def _extract_class_optimized(self, node: "tree_sitter.Node") -> Class | None:
|
|
584
|
+
"""Extract class information with detailed metadata"""
|
|
585
|
+
try:
|
|
586
|
+
start_line = node.start_point[0] + 1
|
|
587
|
+
end_line = node.end_point[0] + 1
|
|
588
|
+
|
|
589
|
+
# Extract class name
|
|
590
|
+
class_name = None
|
|
591
|
+
superclasses = []
|
|
592
|
+
decorators = []
|
|
593
|
+
|
|
594
|
+
# Extract decorators from preceding siblings
|
|
595
|
+
if node.parent:
|
|
596
|
+
for sibling in node.parent.children:
|
|
597
|
+
if sibling.type == "decorated_definition":
|
|
598
|
+
for child in sibling.children:
|
|
599
|
+
if child.type == "decorator":
|
|
600
|
+
decorator_text = self._get_node_text_optimized(child)
|
|
601
|
+
if decorator_text.startswith("@"):
|
|
602
|
+
decorator_text = decorator_text[1:].strip()
|
|
603
|
+
decorators.append(decorator_text)
|
|
604
|
+
|
|
605
|
+
for child in node.children:
|
|
606
|
+
if child.type == "identifier":
|
|
607
|
+
class_name = child.text.decode("utf8") if child.text else None
|
|
608
|
+
elif child.type == "argument_list":
|
|
609
|
+
# Extract superclasses
|
|
610
|
+
if child.children: # Check if children exists and is not None
|
|
611
|
+
for grandchild in child.children:
|
|
612
|
+
if grandchild.type == "identifier":
|
|
613
|
+
superclass_name = (
|
|
614
|
+
grandchild.text.decode("utf8")
|
|
615
|
+
if grandchild.text
|
|
616
|
+
else None
|
|
617
|
+
)
|
|
618
|
+
if superclass_name:
|
|
619
|
+
superclasses.append(superclass_name)
|
|
620
|
+
|
|
621
|
+
if not class_name:
|
|
622
|
+
return None
|
|
623
|
+
|
|
624
|
+
# Extract docstring
|
|
625
|
+
docstring = self._extract_docstring_for_line(start_line)
|
|
626
|
+
|
|
627
|
+
# Extract raw text
|
|
628
|
+
raw_text = self._get_node_text_optimized(node)
|
|
629
|
+
|
|
630
|
+
# Generate fully qualified name
|
|
631
|
+
full_qualified_name = (
|
|
632
|
+
f"{self.current_module}.{class_name}"
|
|
633
|
+
if self.current_module
|
|
634
|
+
else class_name
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
return Class(
|
|
638
|
+
name=class_name,
|
|
639
|
+
start_line=start_line,
|
|
640
|
+
end_line=end_line,
|
|
641
|
+
raw_text=raw_text,
|
|
642
|
+
language="python",
|
|
643
|
+
class_type="class",
|
|
644
|
+
superclass=superclasses[0] if superclasses else None,
|
|
645
|
+
interfaces=superclasses[1:] if len(superclasses) > 1 else [],
|
|
646
|
+
docstring=docstring,
|
|
647
|
+
modifiers=decorators,
|
|
648
|
+
full_qualified_name=full_qualified_name,
|
|
649
|
+
package_name=self.current_module,
|
|
650
|
+
# Python-specific properties
|
|
651
|
+
framework_type=self.framework_type,
|
|
652
|
+
is_dataclass="dataclass" in decorators,
|
|
653
|
+
is_abstract="ABC" in superclasses or "abstractmethod" in raw_text,
|
|
654
|
+
is_exception=any(
|
|
655
|
+
"Exception" in sc or "Error" in sc for sc in superclasses
|
|
656
|
+
),
|
|
657
|
+
)
|
|
658
|
+
except Exception as e:
|
|
659
|
+
log_debug(f"Failed to extract class info: {e}")
|
|
660
|
+
return None
|
|
661
|
+
|
|
662
|
+
def _is_framework_class(self, node: "tree_sitter.Node", class_name: str) -> bool:
|
|
663
|
+
"""Check if class is a framework-specific class"""
|
|
664
|
+
if self.framework_type == "django":
|
|
665
|
+
# Check for Django model, view, form, etc.
|
|
666
|
+
node_text = self._get_node_text_optimized(node)
|
|
667
|
+
return any(
|
|
668
|
+
pattern in node_text
|
|
669
|
+
for pattern in ["Model", "View", "Form", "Serializer", "TestCase"]
|
|
670
|
+
)
|
|
671
|
+
elif self.framework_type == "flask":
|
|
672
|
+
# Check for Flask patterns
|
|
673
|
+
return "Flask" in self.source_code or "Blueprint" in self.source_code
|
|
674
|
+
elif self.framework_type == "fastapi":
|
|
675
|
+
# Check for FastAPI patterns
|
|
676
|
+
return "APIRouter" in self.source_code or "BaseModel" in self.source_code
|
|
677
|
+
return False
|
|
678
|
+
|
|
679
|
+
def _extract_class_attributes(
|
|
680
|
+
self, class_body_node: "tree_sitter.Node", source_code: str
|
|
681
|
+
) -> list[Variable]:
|
|
682
|
+
"""Extract class-level attribute assignments"""
|
|
683
|
+
attributes: list[Variable] = []
|
|
684
|
+
|
|
685
|
+
try:
|
|
686
|
+
# Look for assignments directly under class body
|
|
687
|
+
for child in class_body_node.children:
|
|
688
|
+
if child.type == "expression_statement":
|
|
689
|
+
# Check if it's an assignment
|
|
690
|
+
for grandchild in child.children:
|
|
691
|
+
if grandchild.type == "assignment":
|
|
692
|
+
attribute = self._extract_class_attribute_info(
|
|
693
|
+
grandchild, source_code
|
|
694
|
+
)
|
|
695
|
+
if attribute:
|
|
696
|
+
attributes.append(attribute)
|
|
697
|
+
elif child.type == "assignment":
|
|
698
|
+
attribute = self._extract_class_attribute_info(child, source_code)
|
|
699
|
+
if attribute:
|
|
700
|
+
attributes.append(attribute)
|
|
701
|
+
|
|
702
|
+
except Exception as e:
|
|
703
|
+
log_warning(f"Could not extract class attributes: {e}")
|
|
704
|
+
|
|
705
|
+
return attributes
|
|
706
|
+
|
|
707
|
+
def _extract_class_attribute_info(
|
|
708
|
+
self, node: "tree_sitter.Node", source_code: str
|
|
709
|
+
) -> Variable | None:
|
|
710
|
+
"""Extract class attribute information from assignment node"""
|
|
711
|
+
try:
|
|
712
|
+
# Get the full assignment text
|
|
713
|
+
assignment_text = source_code[node.start_byte : node.end_byte]
|
|
714
|
+
|
|
715
|
+
# Extract attribute name and type annotation
|
|
716
|
+
if "=" in assignment_text:
|
|
717
|
+
left_part = assignment_text.split("=")[0].strip()
|
|
718
|
+
|
|
719
|
+
# Handle type annotations (e.g., "name: str = ...")
|
|
720
|
+
if ":" in left_part:
|
|
721
|
+
name_part, type_part = left_part.split(":", 1)
|
|
722
|
+
attr_name = name_part.strip()
|
|
723
|
+
attr_type = type_part.strip()
|
|
724
|
+
else:
|
|
725
|
+
attr_name = left_part
|
|
726
|
+
attr_type = None
|
|
727
|
+
|
|
728
|
+
return Variable(
|
|
729
|
+
name=attr_name,
|
|
730
|
+
start_line=node.start_point[0] + 1,
|
|
731
|
+
end_line=node.end_point[0] + 1,
|
|
732
|
+
raw_text=assignment_text,
|
|
733
|
+
language="python",
|
|
734
|
+
variable_type=attr_type,
|
|
735
|
+
)
|
|
736
|
+
|
|
737
|
+
except Exception as e:
|
|
738
|
+
log_warning(f"Could not extract class attribute info: {e}")
|
|
739
|
+
|
|
740
|
+
return None
|
|
741
|
+
|
|
742
|
+
def extract_imports(
|
|
743
|
+
self, tree: "tree_sitter.Tree", source_code: str
|
|
744
|
+
) -> list[Import]:
|
|
745
|
+
"""Extract Python import statements"""
|
|
746
|
+
imports: list[Import] = []
|
|
747
|
+
|
|
748
|
+
# Simplified import statement query - only capture statements, not individual elements
|
|
749
|
+
import_query = """
|
|
750
|
+
(import_statement) @import_stmt
|
|
751
|
+
(import_from_statement) @from_import_stmt
|
|
752
|
+
"""
|
|
753
|
+
|
|
754
|
+
try:
|
|
755
|
+
language = tree.language if hasattr(tree, "language") else None
|
|
756
|
+
if language:
|
|
757
|
+
try:
|
|
758
|
+
captures = TreeSitterQueryCompat.safe_execute_query(
|
|
759
|
+
language, import_query, tree.root_node, fallback_result=[]
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
# Track processed statements by their start/end positions to avoid duplicates
|
|
763
|
+
processed_positions: set[tuple[int, int]] = set()
|
|
764
|
+
|
|
765
|
+
for node, capture_name in captures:
|
|
766
|
+
# Use position as unique identifier
|
|
767
|
+
position_key = (node.start_point[0], node.end_point[0])
|
|
768
|
+
if position_key in processed_positions:
|
|
769
|
+
continue
|
|
770
|
+
|
|
771
|
+
processed_positions.add(position_key)
|
|
772
|
+
|
|
773
|
+
# Determine import type from capture name
|
|
774
|
+
if "from" in capture_name:
|
|
775
|
+
import_type = "from_import"
|
|
776
|
+
else:
|
|
777
|
+
import_type = "import"
|
|
778
|
+
|
|
779
|
+
imp = self._extract_import_info(node, source_code, import_type)
|
|
780
|
+
if imp:
|
|
781
|
+
imports.append(imp)
|
|
782
|
+
|
|
783
|
+
except Exception as query_error:
|
|
784
|
+
# Fallback to manual extraction for tree-sitter compatibility
|
|
785
|
+
log_debug(
|
|
786
|
+
f"Query execution failed, using manual extraction: {query_error}"
|
|
787
|
+
)
|
|
788
|
+
imports.extend(
|
|
789
|
+
self._extract_imports_manual(tree.root_node, source_code)
|
|
790
|
+
)
|
|
791
|
+
|
|
792
|
+
except Exception as e:
|
|
793
|
+
log_warning(f"Could not extract Python imports: {e}")
|
|
794
|
+
# Final fallback
|
|
795
|
+
imports.extend(self._extract_imports_manual(tree.root_node, source_code))
|
|
796
|
+
|
|
797
|
+
return imports
|
|
798
|
+
|
|
799
|
+
def _extract_imports_manual(
|
|
800
|
+
self, root_node: "tree_sitter.Node", source_code: str
|
|
801
|
+
) -> list[Import]:
|
|
802
|
+
"""Manual import extraction for tree-sitter 0.25.x compatibility"""
|
|
803
|
+
imports = []
|
|
804
|
+
|
|
805
|
+
def walk_tree(node: "tree_sitter.Node") -> None:
|
|
806
|
+
if node.type in ["import_statement", "import_from_statement"]:
|
|
807
|
+
try:
|
|
808
|
+
start_line = node.start_point[0] + 1
|
|
809
|
+
end_line = node.end_point[0] + 1
|
|
810
|
+
raw_text = (
|
|
811
|
+
source_code[node.start_byte : node.end_byte]
|
|
812
|
+
if hasattr(node, "start_byte")
|
|
813
|
+
else ""
|
|
814
|
+
)
|
|
815
|
+
|
|
816
|
+
# Parse the import statement correctly
|
|
817
|
+
if node.type == "import_statement":
|
|
818
|
+
# Simple import: import os, sys, json
|
|
819
|
+
# Extract all imported modules
|
|
820
|
+
for child in node.children:
|
|
821
|
+
if (
|
|
822
|
+
child.type == "dotted_name"
|
|
823
|
+
or child.type == "identifier"
|
|
824
|
+
):
|
|
825
|
+
module_name = (
|
|
826
|
+
source_code[child.start_byte : child.end_byte]
|
|
827
|
+
if hasattr(child, "start_byte")
|
|
828
|
+
else ""
|
|
829
|
+
)
|
|
830
|
+
if module_name and module_name != "import":
|
|
831
|
+
import_obj = Import(
|
|
832
|
+
name=module_name,
|
|
833
|
+
start_line=start_line,
|
|
834
|
+
end_line=end_line,
|
|
835
|
+
raw_text=raw_text,
|
|
836
|
+
module_name=module_name,
|
|
837
|
+
imported_names=[module_name],
|
|
838
|
+
element_type="import",
|
|
839
|
+
)
|
|
840
|
+
imports.append(import_obj)
|
|
841
|
+
elif node.type == "import_from_statement":
|
|
842
|
+
# From import: from abc import ABC, abstractmethod
|
|
843
|
+
module_name = ""
|
|
844
|
+
imported_items = []
|
|
845
|
+
|
|
846
|
+
# Find the module name (after 'from')
|
|
847
|
+
for child in node.children:
|
|
848
|
+
if child.type == "dotted_name" and not module_name:
|
|
849
|
+
module_name = (
|
|
850
|
+
source_code[child.start_byte : child.end_byte]
|
|
851
|
+
if hasattr(child, "start_byte")
|
|
852
|
+
else ""
|
|
853
|
+
)
|
|
854
|
+
elif child.type == "import_list":
|
|
855
|
+
# Extract items from import list
|
|
856
|
+
for grandchild in child.children:
|
|
857
|
+
if (
|
|
858
|
+
grandchild.type == "dotted_name"
|
|
859
|
+
or grandchild.type == "identifier"
|
|
860
|
+
):
|
|
861
|
+
item_name = (
|
|
862
|
+
source_code[
|
|
863
|
+
grandchild.start_byte : grandchild.end_byte
|
|
864
|
+
]
|
|
865
|
+
if hasattr(grandchild, "start_byte")
|
|
866
|
+
else ""
|
|
867
|
+
)
|
|
868
|
+
if item_name and item_name not in [
|
|
869
|
+
",",
|
|
870
|
+
"(",
|
|
871
|
+
")",
|
|
872
|
+
]:
|
|
873
|
+
imported_items.append(item_name)
|
|
874
|
+
elif child.type == "dotted_name" and module_name:
|
|
875
|
+
# Single import item (not in a list)
|
|
876
|
+
item_name = (
|
|
877
|
+
source_code[child.start_byte : child.end_byte]
|
|
878
|
+
if hasattr(child, "start_byte")
|
|
879
|
+
else ""
|
|
880
|
+
)
|
|
881
|
+
if item_name:
|
|
882
|
+
imported_items.append(item_name)
|
|
883
|
+
|
|
884
|
+
# Create import object for from import
|
|
885
|
+
if module_name:
|
|
886
|
+
import_obj = Import(
|
|
887
|
+
name=(
|
|
888
|
+
f"from {module_name} import {', '.join(imported_items)}"
|
|
889
|
+
if imported_items
|
|
890
|
+
else f"from {module_name}"
|
|
891
|
+
),
|
|
892
|
+
start_line=start_line,
|
|
893
|
+
end_line=end_line,
|
|
894
|
+
raw_text=raw_text,
|
|
895
|
+
module_name=module_name,
|
|
896
|
+
imported_names=imported_items,
|
|
897
|
+
element_type="import",
|
|
898
|
+
)
|
|
899
|
+
imports.append(import_obj)
|
|
900
|
+
|
|
901
|
+
except Exception as e:
|
|
902
|
+
log_warning(f"Failed to extract import manually: {e}")
|
|
903
|
+
|
|
904
|
+
# Recursively process children
|
|
905
|
+
for child in node.children:
|
|
906
|
+
walk_tree(child)
|
|
907
|
+
|
|
908
|
+
walk_tree(root_node)
|
|
909
|
+
return imports
|
|
910
|
+
|
|
911
|
+
def extract_packages(self, tree: "tree_sitter.Tree", source_code: str) -> list:
|
|
912
|
+
"""Extract Python package information from file path"""
|
|
913
|
+
import os
|
|
914
|
+
|
|
915
|
+
from ..models import Package
|
|
916
|
+
|
|
917
|
+
packages: list[Package] = []
|
|
918
|
+
|
|
919
|
+
# For Python, we infer package from file path structure
|
|
920
|
+
# Look for __init__.py in directories to determine package
|
|
921
|
+
if self.current_file:
|
|
922
|
+
file_path = os.path.abspath(self.current_file)
|
|
923
|
+
current_dir = os.path.dirname(file_path)
|
|
924
|
+
package_parts = []
|
|
925
|
+
|
|
926
|
+
# Walk up the directory tree looking for __init__.py
|
|
927
|
+
check_dir = current_dir
|
|
928
|
+
while check_dir:
|
|
929
|
+
# Check if current directory has __init__.py (indicating it's a package)
|
|
930
|
+
init_file = os.path.join(check_dir, "__init__.py")
|
|
931
|
+
|
|
932
|
+
if os.path.exists(init_file):
|
|
933
|
+
package_parts.insert(0, os.path.basename(check_dir))
|
|
934
|
+
# Move to parent directory
|
|
935
|
+
parent_dir = os.path.dirname(check_dir)
|
|
936
|
+
if parent_dir == check_dir: # Reached root
|
|
937
|
+
break
|
|
938
|
+
check_dir = parent_dir
|
|
939
|
+
else:
|
|
940
|
+
# No __init__.py, stop here
|
|
941
|
+
break
|
|
942
|
+
|
|
943
|
+
# If we found package structure, create Package object
|
|
944
|
+
if package_parts:
|
|
945
|
+
package_name = ".".join(package_parts)
|
|
946
|
+
self.current_module = package_name
|
|
947
|
+
|
|
948
|
+
package = Package(
|
|
949
|
+
name=package_name,
|
|
950
|
+
start_line=1,
|
|
951
|
+
end_line=1,
|
|
952
|
+
raw_text=f"# Package: {package_name}",
|
|
953
|
+
language="python",
|
|
954
|
+
)
|
|
955
|
+
packages.append(package)
|
|
956
|
+
|
|
957
|
+
return packages
|
|
958
|
+
|
|
959
|
+
def _extract_detailed_function_info(
|
|
960
|
+
self, node: "tree_sitter.Node", source_code: str, is_async: bool = False
|
|
961
|
+
) -> Function | None:
|
|
962
|
+
"""Extract comprehensive function information from AST node"""
|
|
963
|
+
try:
|
|
964
|
+
# Extract basic information
|
|
965
|
+
name = self._extract_name_from_node(node, source_code)
|
|
966
|
+
if not name:
|
|
967
|
+
return None
|
|
968
|
+
|
|
969
|
+
# Extract parameters
|
|
970
|
+
parameters = self._extract_parameters_from_node(node, source_code)
|
|
971
|
+
|
|
972
|
+
# Extract decorators
|
|
973
|
+
decorators = self._extract_decorators_from_node(node, source_code)
|
|
974
|
+
|
|
975
|
+
# Extract return type hint
|
|
976
|
+
return_type = self._extract_return_type_from_node(node, source_code)
|
|
977
|
+
|
|
978
|
+
# Extract docstring
|
|
979
|
+
# docstring = self._extract_docstring_from_node(node, source_code) # Not used currently
|
|
980
|
+
|
|
981
|
+
# Extract function body
|
|
982
|
+
# body = self._extract_function_body(node, source_code) # Not used currently
|
|
983
|
+
|
|
984
|
+
# Calculate complexity (simplified)
|
|
985
|
+
# complexity_score = self._calculate_complexity(body) # Not used currently
|
|
986
|
+
|
|
987
|
+
# Determine visibility (Python conventions)
|
|
988
|
+
visibility = "public"
|
|
989
|
+
if name.startswith("__") and name.endswith("__"):
|
|
990
|
+
visibility = "magic" # Magic methods
|
|
991
|
+
elif name.startswith("_"):
|
|
992
|
+
visibility = "private"
|
|
993
|
+
|
|
994
|
+
# Safely extract raw text, avoiding index out of bounds
|
|
995
|
+
start_byte = min(node.start_byte, len(source_code))
|
|
996
|
+
end_byte = min(node.end_byte, len(source_code))
|
|
997
|
+
raw_text = (
|
|
998
|
+
source_code[start_byte:end_byte]
|
|
999
|
+
if start_byte < end_byte
|
|
1000
|
+
else source_code
|
|
1001
|
+
)
|
|
1002
|
+
|
|
1003
|
+
return Function(
|
|
1004
|
+
name=name,
|
|
1005
|
+
start_line=node.start_point[0] + 1,
|
|
1006
|
+
end_line=node.end_point[0] + 1,
|
|
1007
|
+
raw_text=raw_text,
|
|
1008
|
+
language="python",
|
|
1009
|
+
parameters=parameters,
|
|
1010
|
+
return_type=return_type or "Any",
|
|
1011
|
+
modifiers=decorators,
|
|
1012
|
+
is_static="staticmethod" in decorators,
|
|
1013
|
+
is_private=visibility == "private",
|
|
1014
|
+
is_public=visibility == "public",
|
|
1015
|
+
)
|
|
1016
|
+
|
|
1017
|
+
except Exception as e:
|
|
1018
|
+
log_warning(f"Could not extract detailed function info: {e}")
|
|
1019
|
+
return None
|
|
1020
|
+
|
|
1021
|
+
def _extract_detailed_class_info(
|
|
1022
|
+
self, node: "tree_sitter.Node", source_code: str
|
|
1023
|
+
) -> Class | None:
|
|
1024
|
+
"""Extract comprehensive class information from AST node"""
|
|
1025
|
+
try:
|
|
1026
|
+
# Extract basic information
|
|
1027
|
+
name = self._extract_name_from_node(node, source_code)
|
|
1028
|
+
if not name:
|
|
1029
|
+
return None
|
|
1030
|
+
|
|
1031
|
+
# Extract superclasses
|
|
1032
|
+
superclasses = self._extract_superclasses_from_node(node, source_code)
|
|
1033
|
+
|
|
1034
|
+
# Extract decorators
|
|
1035
|
+
decorators = self._extract_decorators_from_node(node, source_code)
|
|
1036
|
+
|
|
1037
|
+
# Extract docstring
|
|
1038
|
+
# docstring = self._extract_docstring_from_node(node, source_code) # Not used currently
|
|
1039
|
+
|
|
1040
|
+
# Generate fully qualified name
|
|
1041
|
+
full_qualified_name = (
|
|
1042
|
+
f"{self.current_module}.{name}" if self.current_module else name
|
|
1043
|
+
)
|
|
1044
|
+
|
|
1045
|
+
# Determine visibility
|
|
1046
|
+
# visibility = "public"
|
|
1047
|
+
# if name.startswith("_"):
|
|
1048
|
+
# visibility = "private" # Not used currently
|
|
1049
|
+
|
|
1050
|
+
return Class(
|
|
1051
|
+
name=name,
|
|
1052
|
+
start_line=node.start_point[0] + 1,
|
|
1053
|
+
end_line=node.end_point[0] + 1,
|
|
1054
|
+
raw_text=source_code[node.start_byte : node.end_byte],
|
|
1055
|
+
language="python",
|
|
1056
|
+
class_type="class",
|
|
1057
|
+
full_qualified_name=full_qualified_name,
|
|
1058
|
+
package_name=self.current_module,
|
|
1059
|
+
superclass=superclasses[0] if superclasses else None,
|
|
1060
|
+
interfaces=superclasses[1:] if len(superclasses) > 1 else [],
|
|
1061
|
+
modifiers=decorators,
|
|
1062
|
+
)
|
|
1063
|
+
|
|
1064
|
+
except Exception as e:
|
|
1065
|
+
log_warning(f"Could not extract detailed class info: {e}")
|
|
1066
|
+
return None
|
|
1067
|
+
|
|
1068
|
+
def _extract_variable_info(
|
|
1069
|
+
self, node: "tree_sitter.Node", source_code: str, assignment_type: str
|
|
1070
|
+
) -> Variable | None:
|
|
1071
|
+
"""Extract detailed variable information from AST node"""
|
|
1072
|
+
try:
|
|
1073
|
+
if not self._validate_node(node):
|
|
1074
|
+
return None
|
|
1075
|
+
|
|
1076
|
+
# Extract variable text
|
|
1077
|
+
variable_text = source_code[node.start_byte : node.end_byte]
|
|
1078
|
+
|
|
1079
|
+
# Extract variable name (simplified)
|
|
1080
|
+
if "=" in variable_text:
|
|
1081
|
+
name_part = variable_text.split("=")[0].strip()
|
|
1082
|
+
if assignment_type == "multiple_assignment" and "," in name_part:
|
|
1083
|
+
name = name_part.split(",")[0].strip()
|
|
1084
|
+
else:
|
|
1085
|
+
name = name_part
|
|
1086
|
+
else:
|
|
1087
|
+
name = "variable"
|
|
1088
|
+
|
|
1089
|
+
return Variable(
|
|
1090
|
+
name=name,
|
|
1091
|
+
start_line=node.start_point[0] + 1,
|
|
1092
|
+
end_line=node.end_point[0] + 1,
|
|
1093
|
+
raw_text=variable_text,
|
|
1094
|
+
language="python",
|
|
1095
|
+
variable_type=assignment_type,
|
|
1096
|
+
)
|
|
1097
|
+
|
|
1098
|
+
except Exception as e:
|
|
1099
|
+
log_warning(f"Could not extract variable info: {e}")
|
|
1100
|
+
return None
|
|
1101
|
+
|
|
1102
|
+
def _extract_import_info(
|
|
1103
|
+
self, node: "tree_sitter.Node", source_code: str, import_type: str
|
|
1104
|
+
) -> Import | None:
|
|
1105
|
+
"""Extract detailed import information from AST node"""
|
|
1106
|
+
try:
|
|
1107
|
+
if not self._validate_node(node):
|
|
1108
|
+
return None
|
|
1109
|
+
|
|
1110
|
+
# Safely extract import text, avoiding index out of bounds
|
|
1111
|
+
start_byte = min(node.start_byte, len(source_code))
|
|
1112
|
+
end_byte = min(node.end_byte, len(source_code))
|
|
1113
|
+
import_text = (
|
|
1114
|
+
source_code[start_byte:end_byte]
|
|
1115
|
+
if start_byte < end_byte
|
|
1116
|
+
else source_code
|
|
1117
|
+
)
|
|
1118
|
+
|
|
1119
|
+
# Extract import name and module name (simplified)
|
|
1120
|
+
if import_type == "from_import":
|
|
1121
|
+
if "from" in import_text and "import" in import_text:
|
|
1122
|
+
parts = import_text.split("import")
|
|
1123
|
+
module_name = parts[0].replace("from", "").strip()
|
|
1124
|
+
import_name = parts[1].strip()
|
|
1125
|
+
else:
|
|
1126
|
+
module_name = ""
|
|
1127
|
+
import_name = import_text
|
|
1128
|
+
elif import_type == "aliased_import":
|
|
1129
|
+
module_name = ""
|
|
1130
|
+
import_name = import_text
|
|
1131
|
+
else:
|
|
1132
|
+
module_name = ""
|
|
1133
|
+
import_name = import_text.replace("import", "").strip()
|
|
1134
|
+
|
|
1135
|
+
return Import(
|
|
1136
|
+
name=import_name,
|
|
1137
|
+
start_line=node.start_point[0] + 1,
|
|
1138
|
+
end_line=node.end_point[0] + 1,
|
|
1139
|
+
raw_text=import_text,
|
|
1140
|
+
language="python",
|
|
1141
|
+
module_name=module_name,
|
|
1142
|
+
)
|
|
1143
|
+
|
|
1144
|
+
except Exception as e:
|
|
1145
|
+
log_warning(f"Could not extract import info: {e}")
|
|
1146
|
+
return None
|
|
1147
|
+
|
|
1148
|
+
# Helper methods
|
|
1149
|
+
def _validate_node(self, node: "tree_sitter.Node") -> bool:
|
|
1150
|
+
"""Validate that a node has required attributes"""
|
|
1151
|
+
required_attrs = ["start_byte", "end_byte", "start_point", "end_point"]
|
|
1152
|
+
for attr in required_attrs:
|
|
1153
|
+
if not hasattr(node, attr) or getattr(node, attr) is None:
|
|
1154
|
+
return False
|
|
1155
|
+
return True
|
|
1156
|
+
|
|
1157
|
+
def _extract_name_from_node(
|
|
1158
|
+
self, node: "tree_sitter.Node", source_code: str
|
|
1159
|
+
) -> str | None:
|
|
1160
|
+
"""Extract name from AST node"""
|
|
1161
|
+
for child in node.children:
|
|
1162
|
+
if child.type == "identifier":
|
|
1163
|
+
return source_code[child.start_byte : child.end_byte]
|
|
1164
|
+
return None
|
|
1165
|
+
|
|
1166
|
+
def _extract_parameters_from_node(
|
|
1167
|
+
self, node: "tree_sitter.Node", source_code: str
|
|
1168
|
+
) -> list[str]:
|
|
1169
|
+
"""Extract parameters from function node"""
|
|
1170
|
+
parameters: list[str] = []
|
|
1171
|
+
for child in node.children:
|
|
1172
|
+
if child.type == "parameters":
|
|
1173
|
+
for param_child in child.children:
|
|
1174
|
+
if param_child.type in [
|
|
1175
|
+
"identifier",
|
|
1176
|
+
"typed_parameter",
|
|
1177
|
+
"default_parameter",
|
|
1178
|
+
]:
|
|
1179
|
+
param_text = source_code[
|
|
1180
|
+
param_child.start_byte : param_child.end_byte
|
|
1181
|
+
]
|
|
1182
|
+
parameters.append(param_text)
|
|
1183
|
+
return parameters
|
|
1184
|
+
|
|
1185
|
+
def _extract_decorators_from_node(
|
|
1186
|
+
self, node: "tree_sitter.Node", source_code: str
|
|
1187
|
+
) -> list[str]:
|
|
1188
|
+
"""Extract decorators from node"""
|
|
1189
|
+
decorators: list[str] = []
|
|
1190
|
+
|
|
1191
|
+
# Decorators are before function/class definitions
|
|
1192
|
+
if hasattr(node, "parent") and node.parent:
|
|
1193
|
+
for sibling in node.parent.children:
|
|
1194
|
+
if (
|
|
1195
|
+
sibling.type == "decorator"
|
|
1196
|
+
and sibling.end_point[0] < node.start_point[0]
|
|
1197
|
+
):
|
|
1198
|
+
decorator_text = source_code[sibling.start_byte : sibling.end_byte]
|
|
1199
|
+
# Remove @
|
|
1200
|
+
if decorator_text.startswith("@"):
|
|
1201
|
+
decorator_text = decorator_text[1:].strip()
|
|
1202
|
+
decorators.append(decorator_text)
|
|
1203
|
+
|
|
1204
|
+
return decorators
|
|
1205
|
+
|
|
1206
|
+
def _extract_return_type_from_node(
|
|
1207
|
+
self, node: "tree_sitter.Node", source_code: str
|
|
1208
|
+
) -> str | None:
|
|
1209
|
+
"""Extract return type annotation from function node"""
|
|
1210
|
+
# Look for return type annotation after '->'
|
|
1211
|
+
node_text = self._get_node_text_optimized(node)
|
|
1212
|
+
if "->" in node_text:
|
|
1213
|
+
# Extract everything after '->' and before ':'
|
|
1214
|
+
parts = node_text.split("->")
|
|
1215
|
+
if len(parts) > 1:
|
|
1216
|
+
return_part = parts[1].split(":")[0].strip()
|
|
1217
|
+
# Clean up the return type (remove whitespace and newlines)
|
|
1218
|
+
return_type = return_part.replace("\n", " ").strip()
|
|
1219
|
+
# Don't return decorator names as return types
|
|
1220
|
+
if return_type and not return_type.startswith("@"):
|
|
1221
|
+
return return_type
|
|
1222
|
+
|
|
1223
|
+
# Fallback to original method
|
|
1224
|
+
for child in node.children:
|
|
1225
|
+
if child.type == "type":
|
|
1226
|
+
type_text = source_code[child.start_byte : child.end_byte]
|
|
1227
|
+
# Don't return decorator names as return types
|
|
1228
|
+
if type_text and not type_text.startswith("@"):
|
|
1229
|
+
return type_text
|
|
1230
|
+
return None
|
|
1231
|
+
|
|
1232
|
+
def _extract_docstring_from_node(
|
|
1233
|
+
self, node: "tree_sitter.Node", source_code: str
|
|
1234
|
+
) -> str | None:
|
|
1235
|
+
"""Extract docstring from function/class node"""
|
|
1236
|
+
for child in node.children:
|
|
1237
|
+
if child.type == "block":
|
|
1238
|
+
# Check if the first statement in the block is a docstring
|
|
1239
|
+
for stmt in child.children:
|
|
1240
|
+
if stmt.type == "expression_statement":
|
|
1241
|
+
for expr in stmt.children:
|
|
1242
|
+
if expr.type == "string":
|
|
1243
|
+
if self._validate_node(expr):
|
|
1244
|
+
docstring = source_code[
|
|
1245
|
+
expr.start_byte : expr.end_byte
|
|
1246
|
+
]
|
|
1247
|
+
# Remove quotes
|
|
1248
|
+
if docstring.startswith(
|
|
1249
|
+
'"""'
|
|
1250
|
+
) or docstring.startswith("'''"):
|
|
1251
|
+
return docstring[3:-3].strip()
|
|
1252
|
+
elif docstring.startswith(
|
|
1253
|
+
'"'
|
|
1254
|
+
) or docstring.startswith("'"):
|
|
1255
|
+
return docstring[1:-1].strip()
|
|
1256
|
+
return docstring
|
|
1257
|
+
break
|
|
1258
|
+
break
|
|
1259
|
+
return None
|
|
1260
|
+
|
|
1261
|
+
def _extract_function_body(self, node: "tree_sitter.Node", source_code: str) -> str:
|
|
1262
|
+
"""Extract function body"""
|
|
1263
|
+
for child in node.children:
|
|
1264
|
+
if child.type == "block":
|
|
1265
|
+
return source_code[child.start_byte : child.end_byte]
|
|
1266
|
+
return ""
|
|
1267
|
+
|
|
1268
|
+
def _extract_superclasses_from_node(
|
|
1269
|
+
self, node: "tree_sitter.Node", source_code: str
|
|
1270
|
+
) -> list[str]:
|
|
1271
|
+
"""Extract superclasses from class node"""
|
|
1272
|
+
superclasses: list[str] = []
|
|
1273
|
+
for child in node.children:
|
|
1274
|
+
if child.type == "argument_list":
|
|
1275
|
+
for arg in child.children:
|
|
1276
|
+
if arg.type == "identifier":
|
|
1277
|
+
superclasses.append(source_code[arg.start_byte : arg.end_byte])
|
|
1278
|
+
return superclasses
|
|
1279
|
+
|
|
1280
|
+
def _calculate_complexity(self, body: str) -> int:
|
|
1281
|
+
"""Calculate cyclomatic complexity (simplified)"""
|
|
1282
|
+
complexity = 1 # Base complexity
|
|
1283
|
+
keywords = ["if", "elif", "for", "while", "try", "except", "with", "and", "or"]
|
|
1284
|
+
for keyword in keywords:
|
|
1285
|
+
complexity += body.count(f" {keyword} ") + body.count(f"\n{keyword} ")
|
|
1286
|
+
return complexity
|
|
1287
|
+
|
|
1288
|
+
|
|
1289
|
+
class PythonPlugin(LanguagePlugin):
|
|
1290
|
+
"""Python language plugin for the new architecture"""
|
|
1291
|
+
|
|
1292
|
+
def __init__(self) -> None:
|
|
1293
|
+
"""Initialize the Python plugin"""
|
|
1294
|
+
super().__init__()
|
|
1295
|
+
self._language_cache: tree_sitter.Language | None = None
|
|
1296
|
+
self._extractor: PythonElementExtractor | None = None
|
|
1297
|
+
|
|
1298
|
+
# Legacy compatibility attributes for tests
|
|
1299
|
+
self.language = "python"
|
|
1300
|
+
self.extractor = self.get_extractor()
|
|
1301
|
+
|
|
1302
|
+
def get_language_name(self) -> str:
|
|
1303
|
+
"""Return the name of the programming language this plugin supports"""
|
|
1304
|
+
return "python"
|
|
1305
|
+
|
|
1306
|
+
def get_file_extensions(self) -> list[str]:
|
|
1307
|
+
"""Return list of file extensions this plugin supports"""
|
|
1308
|
+
return [".py", ".pyw", ".pyi"]
|
|
1309
|
+
|
|
1310
|
+
def create_extractor(self) -> ElementExtractor:
|
|
1311
|
+
"""Create and return an element extractor for this language"""
|
|
1312
|
+
return PythonElementExtractor()
|
|
1313
|
+
|
|
1314
|
+
def get_extractor(self) -> ElementExtractor:
|
|
1315
|
+
"""Get the cached extractor instance, creating it if necessary"""
|
|
1316
|
+
if self._extractor is None:
|
|
1317
|
+
self._extractor = PythonElementExtractor()
|
|
1318
|
+
return self._extractor
|
|
1319
|
+
|
|
1320
|
+
def get_language(self) -> str:
|
|
1321
|
+
"""Get the language name for Python (legacy compatibility)"""
|
|
1322
|
+
return "python"
|
|
1323
|
+
|
|
1324
|
+
def extract_functions(
|
|
1325
|
+
self, tree: "tree_sitter.Tree", source_code: str
|
|
1326
|
+
) -> list[Function]:
|
|
1327
|
+
"""Extract functions from the tree (legacy compatibility)"""
|
|
1328
|
+
extractor = self.get_extractor()
|
|
1329
|
+
return extractor.extract_functions(tree, source_code)
|
|
1330
|
+
|
|
1331
|
+
def extract_classes(
|
|
1332
|
+
self, tree: "tree_sitter.Tree", source_code: str
|
|
1333
|
+
) -> list[Class]:
|
|
1334
|
+
"""Extract classes from the tree (legacy compatibility)"""
|
|
1335
|
+
extractor = self.get_extractor()
|
|
1336
|
+
return extractor.extract_classes(tree, source_code)
|
|
1337
|
+
|
|
1338
|
+
def extract_variables(
|
|
1339
|
+
self, tree: "tree_sitter.Tree", source_code: str
|
|
1340
|
+
) -> list[Variable]:
|
|
1341
|
+
"""Extract variables from the tree (legacy compatibility)"""
|
|
1342
|
+
extractor = self.get_extractor()
|
|
1343
|
+
return extractor.extract_variables(tree, source_code)
|
|
1344
|
+
|
|
1345
|
+
def extract_imports(
|
|
1346
|
+
self, tree: "tree_sitter.Tree", source_code: str
|
|
1347
|
+
) -> list[Import]:
|
|
1348
|
+
"""Extract imports from the tree (legacy compatibility)"""
|
|
1349
|
+
extractor = self.get_extractor()
|
|
1350
|
+
return extractor.extract_imports(tree, source_code)
|
|
1351
|
+
|
|
1352
|
+
def get_tree_sitter_language(self) -> Optional["tree_sitter.Language"]:
|
|
1353
|
+
"""Get the Tree-sitter language object for Python"""
|
|
1354
|
+
if self._language_cache is None:
|
|
1355
|
+
try:
|
|
1356
|
+
import tree_sitter
|
|
1357
|
+
import tree_sitter_python as tspython
|
|
1358
|
+
|
|
1359
|
+
# PyCapsuleオブジェクトをLanguageオブジェクトに変換
|
|
1360
|
+
language_capsule = tspython.language()
|
|
1361
|
+
self._language_cache = tree_sitter.Language(language_capsule)
|
|
1362
|
+
except ImportError:
|
|
1363
|
+
log_error("tree-sitter-python not available")
|
|
1364
|
+
return None
|
|
1365
|
+
except Exception as e:
|
|
1366
|
+
log_error(f"Failed to load Python language: {e}")
|
|
1367
|
+
return None
|
|
1368
|
+
return self._language_cache
|
|
1369
|
+
|
|
1370
|
+
def get_supported_queries(self) -> list[str]:
|
|
1371
|
+
"""Get list of supported query names for this language"""
|
|
1372
|
+
return [
|
|
1373
|
+
"function",
|
|
1374
|
+
"class",
|
|
1375
|
+
"variable",
|
|
1376
|
+
"import",
|
|
1377
|
+
"async_function",
|
|
1378
|
+
"method",
|
|
1379
|
+
"decorator",
|
|
1380
|
+
"exception",
|
|
1381
|
+
"comprehension",
|
|
1382
|
+
"lambda",
|
|
1383
|
+
"context_manager",
|
|
1384
|
+
"type_hint",
|
|
1385
|
+
"docstring",
|
|
1386
|
+
"django_model",
|
|
1387
|
+
"flask_route",
|
|
1388
|
+
"fastapi_endpoint",
|
|
1389
|
+
]
|
|
1390
|
+
|
|
1391
|
+
def is_applicable(self, file_path: str) -> bool:
|
|
1392
|
+
"""Check if this plugin is applicable for the given file"""
|
|
1393
|
+
return any(
|
|
1394
|
+
file_path.lower().endswith(ext.lower())
|
|
1395
|
+
for ext in self.get_file_extensions()
|
|
1396
|
+
)
|
|
1397
|
+
|
|
1398
|
+
def get_plugin_info(self) -> dict:
|
|
1399
|
+
"""Get information about this plugin"""
|
|
1400
|
+
return {
|
|
1401
|
+
"name": "Python Plugin",
|
|
1402
|
+
"language": self.get_language_name(),
|
|
1403
|
+
"extensions": self.get_file_extensions(),
|
|
1404
|
+
"version": "2.0.0",
|
|
1405
|
+
"supported_queries": self.get_supported_queries(),
|
|
1406
|
+
"features": [
|
|
1407
|
+
"Async/await functions",
|
|
1408
|
+
"Type hints support",
|
|
1409
|
+
"Decorators",
|
|
1410
|
+
"Context managers",
|
|
1411
|
+
"Comprehensions",
|
|
1412
|
+
"Lambda expressions",
|
|
1413
|
+
"Exception handling",
|
|
1414
|
+
"Docstring extraction",
|
|
1415
|
+
"Django framework support",
|
|
1416
|
+
"Flask framework support",
|
|
1417
|
+
"FastAPI framework support",
|
|
1418
|
+
"Dataclass support",
|
|
1419
|
+
"Abstract class detection",
|
|
1420
|
+
"Complexity analysis",
|
|
1421
|
+
],
|
|
1422
|
+
}
|
|
1423
|
+
|
|
1424
|
+
def execute_query_strategy(
|
|
1425
|
+
self, query_key: str | None, language: str
|
|
1426
|
+
) -> str | None:
|
|
1427
|
+
"""Execute query strategy for Python language"""
|
|
1428
|
+
queries = self.get_queries()
|
|
1429
|
+
return queries.get(query_key) if query_key else None
|
|
1430
|
+
|
|
1431
|
+
def _get_node_type_for_element(self, element: Any) -> str:
|
|
1432
|
+
"""Get appropriate node type for element"""
|
|
1433
|
+
from ..models import Class, Function, Import, Variable
|
|
1434
|
+
|
|
1435
|
+
if isinstance(element, Function):
|
|
1436
|
+
return "function_definition"
|
|
1437
|
+
elif isinstance(element, Class):
|
|
1438
|
+
return "class_definition"
|
|
1439
|
+
elif isinstance(element, Variable):
|
|
1440
|
+
return "assignment"
|
|
1441
|
+
elif isinstance(element, Import):
|
|
1442
|
+
return "import_statement"
|
|
1443
|
+
else:
|
|
1444
|
+
return "unknown"
|
|
1445
|
+
|
|
1446
|
+
def get_element_categories(self) -> dict[str, list[str]]:
|
|
1447
|
+
"""
|
|
1448
|
+
Get element categories mapping query keys to node types
|
|
1449
|
+
|
|
1450
|
+
Returns:
|
|
1451
|
+
Dictionary mapping query keys to lists of node types
|
|
1452
|
+
"""
|
|
1453
|
+
return {
|
|
1454
|
+
# Function-related queries
|
|
1455
|
+
"function": ["function_definition"],
|
|
1456
|
+
"functions": ["function_definition"],
|
|
1457
|
+
"async_function": ["function_definition"],
|
|
1458
|
+
"async_functions": ["function_definition"],
|
|
1459
|
+
"method": ["function_definition"],
|
|
1460
|
+
"methods": ["function_definition"],
|
|
1461
|
+
"lambda": ["lambda"],
|
|
1462
|
+
"lambdas": ["lambda"],
|
|
1463
|
+
# Class-related queries
|
|
1464
|
+
"class": ["class_definition"],
|
|
1465
|
+
"classes": ["class_definition"],
|
|
1466
|
+
# Import-related queries
|
|
1467
|
+
"import": ["import_statement", "import_from_statement"],
|
|
1468
|
+
"imports": ["import_statement", "import_from_statement"],
|
|
1469
|
+
"from_import": ["import_from_statement"],
|
|
1470
|
+
"from_imports": ["import_from_statement"],
|
|
1471
|
+
# Variable-related queries
|
|
1472
|
+
"variable": ["assignment"],
|
|
1473
|
+
"variables": ["assignment"],
|
|
1474
|
+
# Decorator-related queries
|
|
1475
|
+
"decorator": ["decorator"],
|
|
1476
|
+
"decorators": ["decorator"],
|
|
1477
|
+
# Exception-related queries
|
|
1478
|
+
"exception": ["raise_statement", "except_clause"],
|
|
1479
|
+
"exceptions": ["raise_statement", "except_clause"],
|
|
1480
|
+
# Comprehension-related queries
|
|
1481
|
+
"comprehension": [
|
|
1482
|
+
"list_comprehension",
|
|
1483
|
+
"set_comprehension",
|
|
1484
|
+
"dictionary_comprehension",
|
|
1485
|
+
"generator_expression",
|
|
1486
|
+
],
|
|
1487
|
+
"comprehensions": [
|
|
1488
|
+
"list_comprehension",
|
|
1489
|
+
"set_comprehension",
|
|
1490
|
+
"dictionary_comprehension",
|
|
1491
|
+
"generator_expression",
|
|
1492
|
+
],
|
|
1493
|
+
# Context manager queries
|
|
1494
|
+
"context_manager": ["with_statement"],
|
|
1495
|
+
"context_managers": ["with_statement"],
|
|
1496
|
+
# Type hint queries
|
|
1497
|
+
"type_hint": ["type"],
|
|
1498
|
+
"type_hints": ["type"],
|
|
1499
|
+
# Docstring queries
|
|
1500
|
+
"docstring": ["string"],
|
|
1501
|
+
"docstrings": ["string"],
|
|
1502
|
+
# Framework-specific queries
|
|
1503
|
+
"django_model": ["class_definition"],
|
|
1504
|
+
"django_models": ["class_definition"],
|
|
1505
|
+
"flask_route": ["decorator"],
|
|
1506
|
+
"flask_routes": ["decorator"],
|
|
1507
|
+
"fastapi_endpoint": ["function_definition"],
|
|
1508
|
+
"fastapi_endpoints": ["function_definition"],
|
|
1509
|
+
# Generic queries
|
|
1510
|
+
"all_elements": [
|
|
1511
|
+
"function_definition",
|
|
1512
|
+
"class_definition",
|
|
1513
|
+
"import_statement",
|
|
1514
|
+
"import_from_statement",
|
|
1515
|
+
"assignment",
|
|
1516
|
+
"decorator",
|
|
1517
|
+
"raise_statement",
|
|
1518
|
+
"except_clause",
|
|
1519
|
+
"list_comprehension",
|
|
1520
|
+
"set_comprehension",
|
|
1521
|
+
"dictionary_comprehension",
|
|
1522
|
+
"generator_expression",
|
|
1523
|
+
"with_statement",
|
|
1524
|
+
"type",
|
|
1525
|
+
"string",
|
|
1526
|
+
"lambda",
|
|
1527
|
+
],
|
|
1528
|
+
}
|
|
1529
|
+
|
|
1530
|
+
async def analyze_file(
|
|
1531
|
+
self, file_path: str, request: AnalysisRequest
|
|
1532
|
+
) -> AnalysisResult:
|
|
1533
|
+
"""Analyze a Python file and return the analysis results."""
|
|
1534
|
+
if not TREE_SITTER_AVAILABLE:
|
|
1535
|
+
return AnalysisResult(
|
|
1536
|
+
file_path=file_path,
|
|
1537
|
+
language=self.get_language_name(),
|
|
1538
|
+
success=False,
|
|
1539
|
+
error_message="Tree-sitter library not available.",
|
|
1540
|
+
)
|
|
1541
|
+
|
|
1542
|
+
language = self.get_tree_sitter_language()
|
|
1543
|
+
if not language:
|
|
1544
|
+
return AnalysisResult(
|
|
1545
|
+
file_path=file_path,
|
|
1546
|
+
language=self.get_language_name(),
|
|
1547
|
+
success=False,
|
|
1548
|
+
error_message="Could not load Python language for parsing.",
|
|
1549
|
+
)
|
|
1550
|
+
|
|
1551
|
+
try:
|
|
1552
|
+
from ..encoding_utils import read_file_safe
|
|
1553
|
+
|
|
1554
|
+
source_code, _ = read_file_safe(file_path)
|
|
1555
|
+
|
|
1556
|
+
parser = tree_sitter.Parser()
|
|
1557
|
+
parser.language = language
|
|
1558
|
+
tree = parser.parse(bytes(source_code, "utf8"))
|
|
1559
|
+
|
|
1560
|
+
extractor = self.create_extractor()
|
|
1561
|
+
extractor.current_file = file_path # Set current file for context
|
|
1562
|
+
|
|
1563
|
+
elements: list[CodeElement] = []
|
|
1564
|
+
|
|
1565
|
+
# Extract all element types
|
|
1566
|
+
functions = extractor.extract_functions(tree, source_code)
|
|
1567
|
+
classes = extractor.extract_classes(tree, source_code)
|
|
1568
|
+
variables = extractor.extract_variables(tree, source_code)
|
|
1569
|
+
imports = extractor.extract_imports(tree, source_code)
|
|
1570
|
+
|
|
1571
|
+
elements.extend(functions)
|
|
1572
|
+
elements.extend(classes)
|
|
1573
|
+
elements.extend(variables)
|
|
1574
|
+
elements.extend(imports)
|
|
1575
|
+
|
|
1576
|
+
def count_nodes(node: "tree_sitter.Node") -> int:
|
|
1577
|
+
count = 1
|
|
1578
|
+
for child in node.children:
|
|
1579
|
+
count += count_nodes(child)
|
|
1580
|
+
return count
|
|
1581
|
+
|
|
1582
|
+
return AnalysisResult(
|
|
1583
|
+
file_path=file_path,
|
|
1584
|
+
language=self.get_language_name(),
|
|
1585
|
+
success=True,
|
|
1586
|
+
elements=elements,
|
|
1587
|
+
line_count=len(source_code.splitlines()),
|
|
1588
|
+
node_count=count_nodes(tree.root_node),
|
|
1589
|
+
)
|
|
1590
|
+
except Exception as e:
|
|
1591
|
+
log_error(f"Error analyzing Python file {file_path}: {e}")
|
|
1592
|
+
return AnalysisResult(
|
|
1593
|
+
file_path=file_path,
|
|
1594
|
+
language=self.get_language_name(),
|
|
1595
|
+
success=False,
|
|
1596
|
+
error_message=str(e),
|
|
1597
|
+
)
|
|
1598
|
+
|
|
1599
|
+
def execute_query(self, tree: "tree_sitter.Tree", query_name: str) -> dict:
|
|
1600
|
+
"""Execute a specific query on the tree"""
|
|
1601
|
+
try:
|
|
1602
|
+
language = self.get_tree_sitter_language()
|
|
1603
|
+
if not language:
|
|
1604
|
+
return {"error": "Language not available"}
|
|
1605
|
+
|
|
1606
|
+
# Simple query execution for testing
|
|
1607
|
+
if query_name == "function":
|
|
1608
|
+
query_string = "(function_definition) @function"
|
|
1609
|
+
elif query_name == "class":
|
|
1610
|
+
query_string = "(class_definition) @class"
|
|
1611
|
+
else:
|
|
1612
|
+
return {"error": f"Unknown query: {query_name}"}
|
|
1613
|
+
|
|
1614
|
+
captures = TreeSitterQueryCompat.safe_execute_query(
|
|
1615
|
+
language, query_string, tree.root_node, fallback_result=[]
|
|
1616
|
+
)
|
|
1617
|
+
return {"captures": captures, "query": query_string}
|
|
1618
|
+
|
|
1619
|
+
except Exception as e:
|
|
1620
|
+
log_error(f"Query execution failed: {e}")
|
|
1621
|
+
return {"error": str(e)}
|
|
1622
|
+
|
|
1623
|
+
def extract_elements(self, tree: "tree_sitter.Tree", source_code: str) -> list:
|
|
1624
|
+
"""Extract elements from source code using tree-sitter AST"""
|
|
1625
|
+
extractor = self.get_extractor()
|
|
1626
|
+
elements = []
|
|
1627
|
+
|
|
1628
|
+
try:
|
|
1629
|
+
elements.extend(extractor.extract_functions(tree, source_code))
|
|
1630
|
+
elements.extend(extractor.extract_classes(tree, source_code)) # type: ignore
|
|
1631
|
+
elements.extend(extractor.extract_variables(tree, source_code)) # type: ignore
|
|
1632
|
+
elements.extend(extractor.extract_imports(tree, source_code)) # type: ignore
|
|
1633
|
+
except Exception as e:
|
|
1634
|
+
log_error(f"Failed to extract elements: {e}")
|
|
1635
|
+
|
|
1636
|
+
return elements
|