tree-sitter-analyzer 1.4.1__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tree-sitter-analyzer might be problematic. Click here for more details.
- tree_sitter_analyzer/__init__.py +1 -1
- tree_sitter_analyzer/api.py +108 -8
- tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +3 -2
- tree_sitter_analyzer/cli/commands/list_files_cli.py +0 -1
- tree_sitter_analyzer/cli/commands/search_content_cli.py +3 -2
- tree_sitter_analyzer/cli_main.py +3 -1
- tree_sitter_analyzer/encoding_utils.py +3 -3
- tree_sitter_analyzer/formatters/formatter_factory.py +3 -0
- tree_sitter_analyzer/formatters/javascript_formatter.py +467 -0
- tree_sitter_analyzer/formatters/python_formatter.py +161 -20
- tree_sitter_analyzer/language_loader.py +2 -2
- tree_sitter_analyzer/languages/javascript_plugin.py +1289 -238
- tree_sitter_analyzer/languages/python_plugin.py +581 -148
- tree_sitter_analyzer/mcp/server.py +17 -2
- tree_sitter_analyzer/mcp/tools/table_format_tool.py +106 -4
- tree_sitter_analyzer/mcp/utils/file_output_manager.py +257 -0
- tree_sitter_analyzer/mcp/utils/path_resolver.py +1 -1
- tree_sitter_analyzer/models.py +17 -0
- tree_sitter_analyzer/queries/javascript.py +592 -31
- tree_sitter_analyzer/queries/python.py +617 -58
- tree_sitter_analyzer/table_formatter.py +26 -2
- {tree_sitter_analyzer-1.4.1.dist-info → tree_sitter_analyzer-1.6.0.dist-info}/METADATA +165 -22
- {tree_sitter_analyzer-1.4.1.dist-info → tree_sitter_analyzer-1.6.0.dist-info}/RECORD +25 -23
- {tree_sitter_analyzer-1.4.1.dist-info → tree_sitter_analyzer-1.6.0.dist-info}/WHEEL +0 -0
- {tree_sitter_analyzer-1.4.1.dist-info → tree_sitter_analyzer-1.6.0.dist-info}/entry_points.txt +0 -0
|
@@ -2,121 +2,99 @@
|
|
|
2
2
|
"""
|
|
3
3
|
Python Language Plugin
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
Enhanced Python-specific parsing and element extraction functionality.
|
|
6
|
+
Provides comprehensive support for modern Python features including async/await,
|
|
7
|
+
decorators, type hints, context managers, and framework-specific patterns.
|
|
8
|
+
Equivalent to JavaScript plugin capabilities for consistent language support.
|
|
7
9
|
"""
|
|
8
10
|
|
|
9
|
-
from typing import TYPE_CHECKING, Optional
|
|
11
|
+
from typing import TYPE_CHECKING, Any, Optional
|
|
10
12
|
|
|
11
13
|
if TYPE_CHECKING:
|
|
12
14
|
import tree_sitter
|
|
13
15
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
+
try:
|
|
17
|
+
import tree_sitter
|
|
18
|
+
|
|
19
|
+
TREE_SITTER_AVAILABLE = True
|
|
20
|
+
except ImportError:
|
|
21
|
+
TREE_SITTER_AVAILABLE = False
|
|
16
22
|
|
|
17
|
-
from ..
|
|
23
|
+
from ..core.analysis_engine import AnalysisRequest
|
|
24
|
+
from ..encoding_utils import extract_text_slice, safe_encode
|
|
25
|
+
from ..models import AnalysisResult, Class, CodeElement, Function, Import, Variable
|
|
18
26
|
from ..plugins.base import ElementExtractor, LanguagePlugin
|
|
19
|
-
from ..utils import log_error, log_warning
|
|
27
|
+
from ..utils import log_debug, log_error, log_warning
|
|
20
28
|
|
|
21
29
|
|
|
22
30
|
class PythonElementExtractor(ElementExtractor):
|
|
23
|
-
"""Python-specific element extractor with comprehensive
|
|
31
|
+
"""Enhanced Python-specific element extractor with comprehensive feature support"""
|
|
24
32
|
|
|
25
33
|
def __init__(self) -> None:
|
|
26
34
|
"""Initialize the Python element extractor."""
|
|
27
35
|
self.current_module: str = ""
|
|
28
36
|
self.current_file: str = ""
|
|
29
37
|
self.source_code: str = ""
|
|
38
|
+
self.content_lines: list[str] = []
|
|
30
39
|
self.imports: list[str] = []
|
|
40
|
+
self.exports: list[dict[str, Any]] = []
|
|
41
|
+
|
|
42
|
+
# Performance optimization caches
|
|
43
|
+
self._node_text_cache: dict[int, str] = {}
|
|
44
|
+
self._processed_nodes: set[int] = set()
|
|
45
|
+
self._element_cache: dict[tuple[int, str], Any] = {}
|
|
46
|
+
self._file_encoding: str | None = None
|
|
47
|
+
self._docstring_cache: dict[int, str] = {}
|
|
48
|
+
self._complexity_cache: dict[int, int] = {}
|
|
49
|
+
|
|
50
|
+
# Python-specific tracking
|
|
51
|
+
self.is_module: bool = False
|
|
52
|
+
self.framework_type: str = "" # django, flask, fastapi, etc.
|
|
53
|
+
self.python_version: str = "3.8" # default
|
|
31
54
|
|
|
32
55
|
def extract_functions(
|
|
33
56
|
self, tree: "tree_sitter.Tree", source_code: str
|
|
34
57
|
) -> list[Function]:
|
|
35
|
-
"""Extract Python function definitions with comprehensive
|
|
36
|
-
self.source_code = source_code
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
function_queries = [
|
|
41
|
-
# Regular function definitions
|
|
42
|
-
"""
|
|
43
|
-
(function_definition
|
|
44
|
-
name: (identifier) @function.name
|
|
45
|
-
parameters: (parameters) @function.params
|
|
46
|
-
body: (block) @function.body) @function.definition
|
|
47
|
-
""",
|
|
48
|
-
# Async function definitions
|
|
49
|
-
"""
|
|
50
|
-
(function_definition
|
|
51
|
-
"async"
|
|
52
|
-
name: (identifier) @async_function.name
|
|
53
|
-
parameters: (parameters) @async_function.params
|
|
54
|
-
body: (block) @async_function.body) @async_function.definition
|
|
55
|
-
""",
|
|
56
|
-
]
|
|
58
|
+
"""Extract Python function definitions with comprehensive details"""
|
|
59
|
+
self.source_code = source_code or ""
|
|
60
|
+
self.content_lines = self.source_code.split("\n")
|
|
61
|
+
self._reset_caches()
|
|
62
|
+
self._detect_file_characteristics()
|
|
57
63
|
|
|
58
|
-
|
|
59
|
-
language = tree.language if hasattr(tree, "language") else None
|
|
60
|
-
if language:
|
|
61
|
-
for query_string in function_queries:
|
|
62
|
-
query = language.query(query_string)
|
|
63
|
-
captures = query.captures(tree.root_node)
|
|
64
|
+
functions: list[Function] = []
|
|
64
65
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
function = self._extract_detailed_function_info(
|
|
70
|
-
node, source_code, is_async=False
|
|
71
|
-
)
|
|
72
|
-
if function:
|
|
73
|
-
functions.append(function)
|
|
74
|
-
|
|
75
|
-
# Process async functions
|
|
76
|
-
async_nodes = captures.get("async_function.definition", [])
|
|
77
|
-
for node in async_nodes:
|
|
78
|
-
function = self._extract_detailed_function_info(
|
|
79
|
-
node, source_code, is_async=True
|
|
80
|
-
)
|
|
81
|
-
if function:
|
|
82
|
-
functions.append(function)
|
|
66
|
+
# Use optimized traversal for multiple function types
|
|
67
|
+
extractors = {
|
|
68
|
+
"function_definition": self._extract_function_optimized,
|
|
69
|
+
}
|
|
83
70
|
|
|
84
|
-
|
|
85
|
-
|
|
71
|
+
self._traverse_and_extract_iterative(
|
|
72
|
+
tree.root_node, extractors, functions, "function"
|
|
73
|
+
)
|
|
86
74
|
|
|
75
|
+
log_debug(f"Extracted {len(functions)} Python functions")
|
|
87
76
|
return functions
|
|
88
77
|
|
|
89
78
|
def extract_classes(
|
|
90
79
|
self, tree: "tree_sitter.Tree", source_code: str
|
|
91
80
|
) -> list[Class]:
|
|
92
|
-
"""Extract Python class definitions with
|
|
93
|
-
self.source_code = source_code
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
# Class definition query
|
|
97
|
-
query_string = """
|
|
98
|
-
(class_definition
|
|
99
|
-
name: (identifier) @class.name
|
|
100
|
-
superclasses: (argument_list)? @class.superclasses
|
|
101
|
-
body: (block) @class.body) @class.definition
|
|
102
|
-
"""
|
|
81
|
+
"""Extract Python class definitions with detailed information"""
|
|
82
|
+
self.source_code = source_code or ""
|
|
83
|
+
self.content_lines = self.source_code.split("\n")
|
|
84
|
+
self._reset_caches()
|
|
103
85
|
|
|
104
|
-
|
|
105
|
-
language = tree.language if hasattr(tree, "language") else None
|
|
106
|
-
if language:
|
|
107
|
-
query = language.query(query_string)
|
|
108
|
-
captures = query.captures(tree.root_node)
|
|
86
|
+
classes: list[Class] = []
|
|
109
87
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
if cls:
|
|
115
|
-
classes.append(cls)
|
|
88
|
+
# Extract class declarations
|
|
89
|
+
extractors = {
|
|
90
|
+
"class_definition": self._extract_class_optimized,
|
|
91
|
+
}
|
|
116
92
|
|
|
117
|
-
|
|
118
|
-
|
|
93
|
+
self._traverse_and_extract_iterative(
|
|
94
|
+
tree.root_node, extractors, classes, "class"
|
|
95
|
+
)
|
|
119
96
|
|
|
97
|
+
log_debug(f"Extracted {len(classes)} Python classes")
|
|
120
98
|
return classes
|
|
121
99
|
|
|
122
100
|
def extract_variables(
|
|
@@ -152,6 +130,446 @@ class PythonElementExtractor(ElementExtractor):
|
|
|
152
130
|
|
|
153
131
|
return variables
|
|
154
132
|
|
|
133
|
+
def _reset_caches(self) -> None:
|
|
134
|
+
"""Reset performance caches"""
|
|
135
|
+
self._node_text_cache.clear()
|
|
136
|
+
self._processed_nodes.clear()
|
|
137
|
+
self._element_cache.clear()
|
|
138
|
+
self._docstring_cache.clear()
|
|
139
|
+
self._complexity_cache.clear()
|
|
140
|
+
|
|
141
|
+
def _detect_file_characteristics(self) -> None:
|
|
142
|
+
"""Detect Python file characteristics"""
|
|
143
|
+
# Check if it's a module
|
|
144
|
+
self.is_module = "import " in self.source_code or "from " in self.source_code
|
|
145
|
+
|
|
146
|
+
# Detect framework
|
|
147
|
+
if "django" in self.source_code.lower() or "from django" in self.source_code:
|
|
148
|
+
self.framework_type = "django"
|
|
149
|
+
elif "flask" in self.source_code.lower() or "from flask" in self.source_code:
|
|
150
|
+
self.framework_type = "flask"
|
|
151
|
+
elif (
|
|
152
|
+
"fastapi" in self.source_code.lower() or "from fastapi" in self.source_code
|
|
153
|
+
):
|
|
154
|
+
self.framework_type = "fastapi"
|
|
155
|
+
|
|
156
|
+
def _traverse_and_extract_iterative(
|
|
157
|
+
self,
|
|
158
|
+
root_node: "tree_sitter.Node",
|
|
159
|
+
extractors: dict[str, Any],
|
|
160
|
+
results: list[Any],
|
|
161
|
+
element_type: str,
|
|
162
|
+
) -> None:
|
|
163
|
+
"""Iterative node traversal and extraction with caching"""
|
|
164
|
+
if not root_node:
|
|
165
|
+
return
|
|
166
|
+
|
|
167
|
+
target_node_types = set(extractors.keys())
|
|
168
|
+
container_node_types = {
|
|
169
|
+
"module",
|
|
170
|
+
"class_definition",
|
|
171
|
+
"function_definition",
|
|
172
|
+
"if_statement",
|
|
173
|
+
"for_statement",
|
|
174
|
+
"while_statement",
|
|
175
|
+
"with_statement",
|
|
176
|
+
"try_statement",
|
|
177
|
+
"block",
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
node_stack = [(root_node, 0)]
|
|
181
|
+
processed_nodes = 0
|
|
182
|
+
max_depth = 50
|
|
183
|
+
|
|
184
|
+
while node_stack:
|
|
185
|
+
current_node, depth = node_stack.pop()
|
|
186
|
+
|
|
187
|
+
if depth > max_depth:
|
|
188
|
+
log_warning(f"Maximum traversal depth ({max_depth}) exceeded")
|
|
189
|
+
continue
|
|
190
|
+
|
|
191
|
+
processed_nodes += 1
|
|
192
|
+
node_type = current_node.type
|
|
193
|
+
|
|
194
|
+
# Early termination for irrelevant nodes
|
|
195
|
+
if (
|
|
196
|
+
depth > 0
|
|
197
|
+
and node_type not in target_node_types
|
|
198
|
+
and node_type not in container_node_types
|
|
199
|
+
):
|
|
200
|
+
continue
|
|
201
|
+
|
|
202
|
+
# Process target nodes
|
|
203
|
+
if node_type in target_node_types:
|
|
204
|
+
node_id = id(current_node)
|
|
205
|
+
|
|
206
|
+
if node_id in self._processed_nodes:
|
|
207
|
+
continue
|
|
208
|
+
|
|
209
|
+
cache_key = (node_id, element_type)
|
|
210
|
+
if cache_key in self._element_cache:
|
|
211
|
+
element = self._element_cache[cache_key]
|
|
212
|
+
if element:
|
|
213
|
+
if isinstance(element, list):
|
|
214
|
+
results.extend(element)
|
|
215
|
+
else:
|
|
216
|
+
results.append(element)
|
|
217
|
+
self._processed_nodes.add(node_id)
|
|
218
|
+
continue
|
|
219
|
+
|
|
220
|
+
# Extract and cache
|
|
221
|
+
extractor = extractors.get(node_type)
|
|
222
|
+
if extractor:
|
|
223
|
+
element = extractor(current_node)
|
|
224
|
+
self._element_cache[cache_key] = element
|
|
225
|
+
if element:
|
|
226
|
+
if isinstance(element, list):
|
|
227
|
+
results.extend(element)
|
|
228
|
+
else:
|
|
229
|
+
results.append(element)
|
|
230
|
+
self._processed_nodes.add(node_id)
|
|
231
|
+
|
|
232
|
+
# Add children to stack
|
|
233
|
+
if current_node.children:
|
|
234
|
+
for child in reversed(current_node.children):
|
|
235
|
+
node_stack.append((child, depth + 1))
|
|
236
|
+
|
|
237
|
+
log_debug(f"Iterative traversal processed {processed_nodes} nodes")
|
|
238
|
+
|
|
239
|
+
def _get_node_text_optimized(self, node: "tree_sitter.Node") -> str:
|
|
240
|
+
"""Get node text with optimized caching"""
|
|
241
|
+
node_id = id(node)
|
|
242
|
+
|
|
243
|
+
if node_id in self._node_text_cache:
|
|
244
|
+
return self._node_text_cache[node_id]
|
|
245
|
+
|
|
246
|
+
try:
|
|
247
|
+
start_byte = node.start_byte
|
|
248
|
+
end_byte = node.end_byte
|
|
249
|
+
|
|
250
|
+
encoding = self._file_encoding or "utf-8"
|
|
251
|
+
content_bytes = safe_encode("\n".join(self.content_lines), encoding)
|
|
252
|
+
text = extract_text_slice(content_bytes, start_byte, end_byte, encoding)
|
|
253
|
+
|
|
254
|
+
self._node_text_cache[node_id] = text
|
|
255
|
+
return text
|
|
256
|
+
except Exception as e:
|
|
257
|
+
log_error(f"Error in _get_node_text_optimized: {e}")
|
|
258
|
+
# Fallback to simple text extraction
|
|
259
|
+
try:
|
|
260
|
+
start_point = node.start_point
|
|
261
|
+
end_point = node.end_point
|
|
262
|
+
|
|
263
|
+
if start_point[0] == end_point[0]:
|
|
264
|
+
line = self.content_lines[start_point[0]]
|
|
265
|
+
return line[start_point[1] : end_point[1]]
|
|
266
|
+
else:
|
|
267
|
+
lines = []
|
|
268
|
+
for i in range(start_point[0], end_point[0] + 1):
|
|
269
|
+
if i < len(self.content_lines):
|
|
270
|
+
line = self.content_lines[i]
|
|
271
|
+
if i == start_point[0]:
|
|
272
|
+
lines.append(line[start_point[1] :])
|
|
273
|
+
elif i == end_point[0]:
|
|
274
|
+
lines.append(line[: end_point[1]])
|
|
275
|
+
else:
|
|
276
|
+
lines.append(line)
|
|
277
|
+
return "\n".join(lines)
|
|
278
|
+
except Exception as fallback_error:
|
|
279
|
+
log_error(f"Fallback text extraction also failed: {fallback_error}")
|
|
280
|
+
return ""
|
|
281
|
+
|
|
282
|
+
def _extract_function_optimized(self, node: "tree_sitter.Node") -> Function | None:
|
|
283
|
+
"""Extract function information with detailed metadata"""
|
|
284
|
+
try:
|
|
285
|
+
start_line = node.start_point[0] + 1
|
|
286
|
+
end_line = node.end_point[0] + 1
|
|
287
|
+
|
|
288
|
+
# Extract function details
|
|
289
|
+
function_info = self._parse_function_signature_optimized(node)
|
|
290
|
+
if not function_info:
|
|
291
|
+
return None
|
|
292
|
+
|
|
293
|
+
name, parameters, is_async, decorators, return_type = function_info
|
|
294
|
+
|
|
295
|
+
# Extract docstring
|
|
296
|
+
docstring = self._extract_docstring_for_line(start_line)
|
|
297
|
+
|
|
298
|
+
# Calculate complexity
|
|
299
|
+
complexity_score = self._calculate_complexity_optimized(node)
|
|
300
|
+
|
|
301
|
+
# Extract raw text
|
|
302
|
+
start_line_idx = max(0, start_line - 1)
|
|
303
|
+
end_line_idx = min(len(self.content_lines), end_line)
|
|
304
|
+
raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
|
|
305
|
+
|
|
306
|
+
# Determine visibility (Python conventions)
|
|
307
|
+
visibility = "public"
|
|
308
|
+
if name.startswith("__") and name.endswith("__"):
|
|
309
|
+
visibility = "magic" # Magic methods
|
|
310
|
+
elif name.startswith("_"):
|
|
311
|
+
visibility = "private"
|
|
312
|
+
|
|
313
|
+
return Function(
|
|
314
|
+
name=name,
|
|
315
|
+
start_line=start_line,
|
|
316
|
+
end_line=end_line,
|
|
317
|
+
raw_text=raw_text,
|
|
318
|
+
language="python",
|
|
319
|
+
parameters=parameters,
|
|
320
|
+
return_type=return_type or "Any",
|
|
321
|
+
is_async=is_async,
|
|
322
|
+
is_generator="yield" in raw_text,
|
|
323
|
+
docstring=docstring,
|
|
324
|
+
complexity_score=complexity_score,
|
|
325
|
+
modifiers=decorators,
|
|
326
|
+
is_static="staticmethod" in decorators,
|
|
327
|
+
is_staticmethod="staticmethod" in decorators,
|
|
328
|
+
is_private=visibility == "private",
|
|
329
|
+
is_public=visibility == "public",
|
|
330
|
+
# Python-specific properties
|
|
331
|
+
framework_type=self.framework_type,
|
|
332
|
+
is_property="property" in decorators,
|
|
333
|
+
is_classmethod="classmethod" in decorators,
|
|
334
|
+
)
|
|
335
|
+
except Exception as e:
|
|
336
|
+
log_error(f"Failed to extract function info: {e}")
|
|
337
|
+
import traceback
|
|
338
|
+
|
|
339
|
+
traceback.print_exc()
|
|
340
|
+
return None
|
|
341
|
+
|
|
342
|
+
def _parse_function_signature_optimized(
|
|
343
|
+
self, node: "tree_sitter.Node"
|
|
344
|
+
) -> tuple[str, list[str], bool, list[str], str | None] | None:
|
|
345
|
+
"""Parse function signature for Python functions"""
|
|
346
|
+
try:
|
|
347
|
+
name = None
|
|
348
|
+
parameters = []
|
|
349
|
+
is_async = False
|
|
350
|
+
decorators = []
|
|
351
|
+
return_type = None
|
|
352
|
+
|
|
353
|
+
# Check for async keyword
|
|
354
|
+
node_text = self._get_node_text_optimized(node)
|
|
355
|
+
is_async = node_text.strip().startswith("async def")
|
|
356
|
+
|
|
357
|
+
# Extract decorators from preceding siblings
|
|
358
|
+
if node.parent:
|
|
359
|
+
for sibling in node.parent.children:
|
|
360
|
+
if sibling.type == "decorated_definition":
|
|
361
|
+
for child in sibling.children:
|
|
362
|
+
if child.type == "decorator":
|
|
363
|
+
decorator_text = self._get_node_text_optimized(child)
|
|
364
|
+
if decorator_text.startswith("@"):
|
|
365
|
+
decorator_text = decorator_text[1:].strip()
|
|
366
|
+
decorators.append(decorator_text)
|
|
367
|
+
|
|
368
|
+
for child in node.children:
|
|
369
|
+
if child.type == "identifier":
|
|
370
|
+
name = child.text.decode("utf8") if child.text else None
|
|
371
|
+
elif child.type == "parameters":
|
|
372
|
+
parameters = self._extract_parameters_from_node_optimized(child)
|
|
373
|
+
elif child.type == "type":
|
|
374
|
+
return_type = self._get_node_text_optimized(child)
|
|
375
|
+
|
|
376
|
+
return name or "", parameters, is_async, decorators, return_type
|
|
377
|
+
except Exception:
|
|
378
|
+
return None
|
|
379
|
+
|
|
380
|
+
def _extract_parameters_from_node_optimized(
|
|
381
|
+
self, params_node: "tree_sitter.Node"
|
|
382
|
+
) -> list[str]:
|
|
383
|
+
"""Extract function parameters with type hints"""
|
|
384
|
+
parameters = []
|
|
385
|
+
|
|
386
|
+
for child in params_node.children:
|
|
387
|
+
if child.type == "identifier":
|
|
388
|
+
param_name = self._get_node_text_optimized(child)
|
|
389
|
+
parameters.append(param_name)
|
|
390
|
+
elif child.type == "typed_parameter":
|
|
391
|
+
# Handle typed parameters
|
|
392
|
+
param_text = self._get_node_text_optimized(child)
|
|
393
|
+
parameters.append(param_text)
|
|
394
|
+
elif child.type == "default_parameter":
|
|
395
|
+
# Handle default parameters
|
|
396
|
+
param_text = self._get_node_text_optimized(child)
|
|
397
|
+
parameters.append(param_text)
|
|
398
|
+
elif child.type == "list_splat_pattern":
|
|
399
|
+
# Handle *args
|
|
400
|
+
param_text = self._get_node_text_optimized(child)
|
|
401
|
+
parameters.append(param_text)
|
|
402
|
+
elif child.type == "dictionary_splat_pattern":
|
|
403
|
+
# Handle **kwargs
|
|
404
|
+
param_text = self._get_node_text_optimized(child)
|
|
405
|
+
parameters.append(param_text)
|
|
406
|
+
|
|
407
|
+
return parameters
|
|
408
|
+
|
|
409
|
+
def _extract_docstring_for_line(self, target_line: int) -> str | None:
|
|
410
|
+
"""Extract docstring for the specified line"""
|
|
411
|
+
if target_line in self._docstring_cache:
|
|
412
|
+
return self._docstring_cache[target_line]
|
|
413
|
+
|
|
414
|
+
try:
|
|
415
|
+
if not self.content_lines or target_line >= len(self.content_lines):
|
|
416
|
+
return None
|
|
417
|
+
|
|
418
|
+
# Look for docstring in the next few lines after function definition
|
|
419
|
+
for i in range(target_line, min(target_line + 5, len(self.content_lines))):
|
|
420
|
+
line = self.content_lines[i].strip()
|
|
421
|
+
if line.startswith('"""') or line.startswith("'''"):
|
|
422
|
+
# Found docstring start
|
|
423
|
+
quote_type = '"""' if line.startswith('"""') else "'''"
|
|
424
|
+
docstring_lines = []
|
|
425
|
+
|
|
426
|
+
# Single line docstring
|
|
427
|
+
if line.count(quote_type) >= 2:
|
|
428
|
+
docstring = line.replace(quote_type, "").strip()
|
|
429
|
+
self._docstring_cache[target_line] = docstring
|
|
430
|
+
return docstring
|
|
431
|
+
|
|
432
|
+
# Multi-line docstring
|
|
433
|
+
docstring_lines.append(line.replace(quote_type, ""))
|
|
434
|
+
for j in range(i + 1, len(self.content_lines)):
|
|
435
|
+
next_line = self.content_lines[j]
|
|
436
|
+
if quote_type in next_line:
|
|
437
|
+
docstring_lines.append(next_line.replace(quote_type, ""))
|
|
438
|
+
break
|
|
439
|
+
docstring_lines.append(next_line)
|
|
440
|
+
|
|
441
|
+
docstring = "\n".join(docstring_lines).strip()
|
|
442
|
+
self._docstring_cache[target_line] = docstring
|
|
443
|
+
return docstring
|
|
444
|
+
|
|
445
|
+
self._docstring_cache[target_line] = None
|
|
446
|
+
return None
|
|
447
|
+
|
|
448
|
+
except Exception as e:
|
|
449
|
+
log_debug(f"Failed to extract docstring: {e}")
|
|
450
|
+
return None
|
|
451
|
+
|
|
452
|
+
def _calculate_complexity_optimized(self, node: "tree_sitter.Node") -> int:
|
|
453
|
+
"""Calculate cyclomatic complexity efficiently"""
|
|
454
|
+
node_id = id(node)
|
|
455
|
+
if node_id in self._complexity_cache:
|
|
456
|
+
return self._complexity_cache[node_id]
|
|
457
|
+
|
|
458
|
+
complexity = 1
|
|
459
|
+
try:
|
|
460
|
+
node_text = self._get_node_text_optimized(node).lower()
|
|
461
|
+
keywords = [
|
|
462
|
+
"if",
|
|
463
|
+
"elif",
|
|
464
|
+
"while",
|
|
465
|
+
"for",
|
|
466
|
+
"except",
|
|
467
|
+
"and",
|
|
468
|
+
"or",
|
|
469
|
+
"with",
|
|
470
|
+
"match",
|
|
471
|
+
"case",
|
|
472
|
+
]
|
|
473
|
+
for keyword in keywords:
|
|
474
|
+
complexity += node_text.count(f" {keyword} ") + node_text.count(
|
|
475
|
+
f"\n{keyword} "
|
|
476
|
+
)
|
|
477
|
+
except Exception as e:
|
|
478
|
+
log_debug(f"Failed to calculate complexity: {e}")
|
|
479
|
+
|
|
480
|
+
self._complexity_cache[node_id] = complexity
|
|
481
|
+
return complexity
|
|
482
|
+
|
|
483
|
+
def _extract_class_optimized(self, node: "tree_sitter.Node") -> Class | None:
|
|
484
|
+
"""Extract class information with detailed metadata"""
|
|
485
|
+
try:
|
|
486
|
+
start_line = node.start_point[0] + 1
|
|
487
|
+
end_line = node.end_point[0] + 1
|
|
488
|
+
|
|
489
|
+
# Extract class name
|
|
490
|
+
class_name = None
|
|
491
|
+
superclasses = []
|
|
492
|
+
decorators = []
|
|
493
|
+
|
|
494
|
+
# Extract decorators from preceding siblings
|
|
495
|
+
if node.parent:
|
|
496
|
+
for sibling in node.parent.children:
|
|
497
|
+
if sibling.type == "decorated_definition":
|
|
498
|
+
for child in sibling.children:
|
|
499
|
+
if child.type == "decorator":
|
|
500
|
+
decorator_text = self._get_node_text_optimized(child)
|
|
501
|
+
if decorator_text.startswith("@"):
|
|
502
|
+
decorator_text = decorator_text[1:].strip()
|
|
503
|
+
decorators.append(decorator_text)
|
|
504
|
+
|
|
505
|
+
for child in node.children:
|
|
506
|
+
if child.type == "identifier":
|
|
507
|
+
class_name = child.text.decode("utf8") if child.text else None
|
|
508
|
+
elif child.type == "argument_list":
|
|
509
|
+
# Extract superclasses
|
|
510
|
+
for grandchild in child.children:
|
|
511
|
+
if grandchild.type == "identifier":
|
|
512
|
+
superclass_name = self._get_node_text_optimized(grandchild)
|
|
513
|
+
superclasses.append(superclass_name)
|
|
514
|
+
|
|
515
|
+
if not class_name:
|
|
516
|
+
return None
|
|
517
|
+
|
|
518
|
+
# Extract docstring
|
|
519
|
+
docstring = self._extract_docstring_for_line(start_line)
|
|
520
|
+
|
|
521
|
+
# Extract raw text
|
|
522
|
+
raw_text = self._get_node_text_optimized(node)
|
|
523
|
+
|
|
524
|
+
# Generate fully qualified name
|
|
525
|
+
full_qualified_name = (
|
|
526
|
+
f"{self.current_module}.{class_name}"
|
|
527
|
+
if self.current_module
|
|
528
|
+
else class_name
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
return Class(
|
|
532
|
+
name=class_name,
|
|
533
|
+
start_line=start_line,
|
|
534
|
+
end_line=end_line,
|
|
535
|
+
raw_text=raw_text,
|
|
536
|
+
language="python",
|
|
537
|
+
class_type="class",
|
|
538
|
+
superclass=superclasses[0] if superclasses else None,
|
|
539
|
+
interfaces=superclasses[1:] if len(superclasses) > 1 else [],
|
|
540
|
+
docstring=docstring,
|
|
541
|
+
modifiers=decorators,
|
|
542
|
+
full_qualified_name=full_qualified_name,
|
|
543
|
+
package_name=self.current_module,
|
|
544
|
+
# Python-specific properties
|
|
545
|
+
framework_type=self.framework_type,
|
|
546
|
+
is_dataclass="dataclass" in decorators,
|
|
547
|
+
is_abstract="ABC" in superclasses or "abstractmethod" in raw_text,
|
|
548
|
+
is_exception=any(
|
|
549
|
+
"Exception" in sc or "Error" in sc for sc in superclasses
|
|
550
|
+
),
|
|
551
|
+
)
|
|
552
|
+
except Exception as e:
|
|
553
|
+
log_debug(f"Failed to extract class info: {e}")
|
|
554
|
+
return None
|
|
555
|
+
|
|
556
|
+
def _is_framework_class(self, node: "tree_sitter.Node", class_name: str) -> bool:
|
|
557
|
+
"""Check if class is a framework-specific class"""
|
|
558
|
+
if self.framework_type == "django":
|
|
559
|
+
# Check for Django model, view, form, etc.
|
|
560
|
+
node_text = self._get_node_text_optimized(node)
|
|
561
|
+
return any(
|
|
562
|
+
pattern in node_text
|
|
563
|
+
for pattern in ["Model", "View", "Form", "Serializer", "TestCase"]
|
|
564
|
+
)
|
|
565
|
+
elif self.framework_type == "flask":
|
|
566
|
+
# Check for Flask patterns
|
|
567
|
+
return "Flask" in self.source_code or "Blueprint" in self.source_code
|
|
568
|
+
elif self.framework_type == "fastapi":
|
|
569
|
+
# Check for FastAPI patterns
|
|
570
|
+
return "APIRouter" in self.source_code or "BaseModel" in self.source_code
|
|
571
|
+
return False
|
|
572
|
+
|
|
155
573
|
def _extract_class_attributes(
|
|
156
574
|
self, class_body_node: "tree_sitter.Node", source_code: str
|
|
157
575
|
) -> list[Variable]:
|
|
@@ -626,7 +1044,24 @@ class PythonPlugin(LanguagePlugin):
|
|
|
626
1044
|
|
|
627
1045
|
def get_supported_queries(self) -> list[str]:
|
|
628
1046
|
"""Get list of supported query names for this language"""
|
|
629
|
-
return [
|
|
1047
|
+
return [
|
|
1048
|
+
"function",
|
|
1049
|
+
"class",
|
|
1050
|
+
"variable",
|
|
1051
|
+
"import",
|
|
1052
|
+
"async_function",
|
|
1053
|
+
"method",
|
|
1054
|
+
"decorator",
|
|
1055
|
+
"exception",
|
|
1056
|
+
"comprehension",
|
|
1057
|
+
"lambda",
|
|
1058
|
+
"context_manager",
|
|
1059
|
+
"type_hint",
|
|
1060
|
+
"docstring",
|
|
1061
|
+
"django_model",
|
|
1062
|
+
"flask_route",
|
|
1063
|
+
"fastapi_endpoint",
|
|
1064
|
+
]
|
|
630
1065
|
|
|
631
1066
|
def is_applicable(self, file_path: str) -> bool:
|
|
632
1067
|
"""Check if this plugin is applicable for the given file"""
|
|
@@ -643,90 +1078,88 @@ class PythonPlugin(LanguagePlugin):
|
|
|
643
1078
|
"extensions": self.get_file_extensions(),
|
|
644
1079
|
"version": "2.0.0",
|
|
645
1080
|
"supported_queries": self.get_supported_queries(),
|
|
1081
|
+
"features": [
|
|
1082
|
+
"Async/await functions",
|
|
1083
|
+
"Type hints support",
|
|
1084
|
+
"Decorators",
|
|
1085
|
+
"Context managers",
|
|
1086
|
+
"Comprehensions",
|
|
1087
|
+
"Lambda expressions",
|
|
1088
|
+
"Exception handling",
|
|
1089
|
+
"Docstring extraction",
|
|
1090
|
+
"Django framework support",
|
|
1091
|
+
"Flask framework support",
|
|
1092
|
+
"FastAPI framework support",
|
|
1093
|
+
"Dataclass support",
|
|
1094
|
+
"Abstract class detection",
|
|
1095
|
+
"Complexity analysis",
|
|
1096
|
+
],
|
|
646
1097
|
}
|
|
647
1098
|
|
|
648
1099
|
async def analyze_file(
|
|
649
|
-
self, file_path: str, request:
|
|
650
|
-
) ->
|
|
651
|
-
"""
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
1100
|
+
self, file_path: str, request: AnalysisRequest
|
|
1101
|
+
) -> AnalysisResult:
|
|
1102
|
+
"""Analyze a Python file and return the analysis results."""
|
|
1103
|
+
if not TREE_SITTER_AVAILABLE:
|
|
1104
|
+
return AnalysisResult(
|
|
1105
|
+
file_path=file_path,
|
|
1106
|
+
language=self.get_language_name(),
|
|
1107
|
+
success=False,
|
|
1108
|
+
error_message="Tree-sitter library not available.",
|
|
1109
|
+
)
|
|
1110
|
+
|
|
1111
|
+
language = self.get_tree_sitter_language()
|
|
1112
|
+
if not language:
|
|
1113
|
+
return AnalysisResult(
|
|
1114
|
+
file_path=file_path,
|
|
1115
|
+
language=self.get_language_name(),
|
|
1116
|
+
success=False,
|
|
1117
|
+
error_message="Could not load Python language for parsing.",
|
|
1118
|
+
)
|
|
664
1119
|
|
|
665
|
-
|
|
1120
|
+
try:
|
|
666
1121
|
with open(file_path, encoding="utf-8") as f:
|
|
667
1122
|
source_code = f.read()
|
|
668
1123
|
|
|
669
|
-
|
|
670
|
-
parser =
|
|
671
|
-
|
|
1124
|
+
parser = tree_sitter.Parser()
|
|
1125
|
+
parser.language = language
|
|
1126
|
+
tree = parser.parse(bytes(source_code, "utf8"))
|
|
672
1127
|
|
|
673
|
-
if not parse_result.success:
|
|
674
|
-
return AnalysisResult(
|
|
675
|
-
file_path=file_path,
|
|
676
|
-
language="python",
|
|
677
|
-
line_count=len(source_code.splitlines()),
|
|
678
|
-
elements=[],
|
|
679
|
-
node_count=0,
|
|
680
|
-
query_results={},
|
|
681
|
-
source_code=source_code,
|
|
682
|
-
success=False,
|
|
683
|
-
error_message=parse_result.error_message,
|
|
684
|
-
)
|
|
685
|
-
|
|
686
|
-
# Extract elements
|
|
687
1128
|
extractor = self.create_extractor()
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
1129
|
+
extractor.current_file = file_path # Set current file for context
|
|
1130
|
+
|
|
1131
|
+
elements: list[CodeElement] = []
|
|
1132
|
+
|
|
1133
|
+
# Extract all element types
|
|
1134
|
+
functions = extractor.extract_functions(tree, source_code)
|
|
1135
|
+
classes = extractor.extract_classes(tree, source_code)
|
|
1136
|
+
variables = extractor.extract_variables(tree, source_code)
|
|
1137
|
+
imports = extractor.extract_imports(tree, source_code)
|
|
1138
|
+
|
|
1139
|
+
elements.extend(functions)
|
|
1140
|
+
elements.extend(classes)
|
|
1141
|
+
elements.extend(variables)
|
|
1142
|
+
elements.extend(imports)
|
|
1143
|
+
|
|
1144
|
+
def count_nodes(node: "tree_sitter.Node") -> int:
|
|
1145
|
+
count = 1
|
|
1146
|
+
for child in node.children:
|
|
1147
|
+
count += count_nodes(child)
|
|
1148
|
+
return count
|
|
705
1149
|
|
|
706
1150
|
return AnalysisResult(
|
|
707
1151
|
file_path=file_path,
|
|
708
|
-
language=
|
|
709
|
-
line_count=len(source_code.splitlines()),
|
|
710
|
-
elements=all_elements,
|
|
711
|
-
node_count=(
|
|
712
|
-
parse_result.tree.root_node.child_count if parse_result.tree else 0
|
|
713
|
-
),
|
|
714
|
-
query_results={},
|
|
715
|
-
source_code=source_code,
|
|
1152
|
+
language=self.get_language_name(),
|
|
716
1153
|
success=True,
|
|
717
|
-
|
|
1154
|
+
elements=elements,
|
|
1155
|
+
line_count=len(source_code.splitlines()),
|
|
1156
|
+
node_count=count_nodes(tree.root_node),
|
|
718
1157
|
)
|
|
719
|
-
|
|
720
1158
|
except Exception as e:
|
|
721
|
-
log_error(f"
|
|
1159
|
+
log_error(f"Error analyzing Python file {file_path}: {e}")
|
|
722
1160
|
return AnalysisResult(
|
|
723
1161
|
file_path=file_path,
|
|
724
|
-
language=
|
|
725
|
-
line_count=0,
|
|
726
|
-
elements=[],
|
|
727
|
-
node_count=0,
|
|
728
|
-
query_results={},
|
|
729
|
-
source_code="",
|
|
1162
|
+
language=self.get_language_name(),
|
|
730
1163
|
success=False,
|
|
731
1164
|
error_message=str(e),
|
|
732
1165
|
)
|