tree-sitter-analyzer 1.9.17.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. tree_sitter_analyzer/__init__.py +132 -0
  2. tree_sitter_analyzer/__main__.py +11 -0
  3. tree_sitter_analyzer/api.py +853 -0
  4. tree_sitter_analyzer/cli/__init__.py +39 -0
  5. tree_sitter_analyzer/cli/__main__.py +12 -0
  6. tree_sitter_analyzer/cli/argument_validator.py +89 -0
  7. tree_sitter_analyzer/cli/commands/__init__.py +26 -0
  8. tree_sitter_analyzer/cli/commands/advanced_command.py +226 -0
  9. tree_sitter_analyzer/cli/commands/base_command.py +181 -0
  10. tree_sitter_analyzer/cli/commands/default_command.py +18 -0
  11. tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +188 -0
  12. tree_sitter_analyzer/cli/commands/list_files_cli.py +133 -0
  13. tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -0
  14. tree_sitter_analyzer/cli/commands/query_command.py +109 -0
  15. tree_sitter_analyzer/cli/commands/search_content_cli.py +161 -0
  16. tree_sitter_analyzer/cli/commands/structure_command.py +156 -0
  17. tree_sitter_analyzer/cli/commands/summary_command.py +116 -0
  18. tree_sitter_analyzer/cli/commands/table_command.py +414 -0
  19. tree_sitter_analyzer/cli/info_commands.py +124 -0
  20. tree_sitter_analyzer/cli_main.py +472 -0
  21. tree_sitter_analyzer/constants.py +85 -0
  22. tree_sitter_analyzer/core/__init__.py +15 -0
  23. tree_sitter_analyzer/core/analysis_engine.py +580 -0
  24. tree_sitter_analyzer/core/cache_service.py +333 -0
  25. tree_sitter_analyzer/core/engine.py +585 -0
  26. tree_sitter_analyzer/core/parser.py +293 -0
  27. tree_sitter_analyzer/core/query.py +605 -0
  28. tree_sitter_analyzer/core/query_filter.py +200 -0
  29. tree_sitter_analyzer/core/query_service.py +340 -0
  30. tree_sitter_analyzer/encoding_utils.py +530 -0
  31. tree_sitter_analyzer/exceptions.py +747 -0
  32. tree_sitter_analyzer/file_handler.py +246 -0
  33. tree_sitter_analyzer/formatters/__init__.py +1 -0
  34. tree_sitter_analyzer/formatters/base_formatter.py +201 -0
  35. tree_sitter_analyzer/formatters/csharp_formatter.py +367 -0
  36. tree_sitter_analyzer/formatters/formatter_config.py +197 -0
  37. tree_sitter_analyzer/formatters/formatter_factory.py +84 -0
  38. tree_sitter_analyzer/formatters/formatter_registry.py +377 -0
  39. tree_sitter_analyzer/formatters/formatter_selector.py +96 -0
  40. tree_sitter_analyzer/formatters/go_formatter.py +368 -0
  41. tree_sitter_analyzer/formatters/html_formatter.py +498 -0
  42. tree_sitter_analyzer/formatters/java_formatter.py +423 -0
  43. tree_sitter_analyzer/formatters/javascript_formatter.py +611 -0
  44. tree_sitter_analyzer/formatters/kotlin_formatter.py +268 -0
  45. tree_sitter_analyzer/formatters/language_formatter_factory.py +123 -0
  46. tree_sitter_analyzer/formatters/legacy_formatter_adapters.py +228 -0
  47. tree_sitter_analyzer/formatters/markdown_formatter.py +725 -0
  48. tree_sitter_analyzer/formatters/php_formatter.py +301 -0
  49. tree_sitter_analyzer/formatters/python_formatter.py +830 -0
  50. tree_sitter_analyzer/formatters/ruby_formatter.py +278 -0
  51. tree_sitter_analyzer/formatters/rust_formatter.py +233 -0
  52. tree_sitter_analyzer/formatters/sql_formatter_wrapper.py +689 -0
  53. tree_sitter_analyzer/formatters/sql_formatters.py +536 -0
  54. tree_sitter_analyzer/formatters/typescript_formatter.py +543 -0
  55. tree_sitter_analyzer/formatters/yaml_formatter.py +462 -0
  56. tree_sitter_analyzer/interfaces/__init__.py +9 -0
  57. tree_sitter_analyzer/interfaces/cli.py +535 -0
  58. tree_sitter_analyzer/interfaces/cli_adapter.py +359 -0
  59. tree_sitter_analyzer/interfaces/mcp_adapter.py +224 -0
  60. tree_sitter_analyzer/interfaces/mcp_server.py +428 -0
  61. tree_sitter_analyzer/language_detector.py +553 -0
  62. tree_sitter_analyzer/language_loader.py +271 -0
  63. tree_sitter_analyzer/languages/__init__.py +10 -0
  64. tree_sitter_analyzer/languages/csharp_plugin.py +1076 -0
  65. tree_sitter_analyzer/languages/css_plugin.py +449 -0
  66. tree_sitter_analyzer/languages/go_plugin.py +836 -0
  67. tree_sitter_analyzer/languages/html_plugin.py +496 -0
  68. tree_sitter_analyzer/languages/java_plugin.py +1299 -0
  69. tree_sitter_analyzer/languages/javascript_plugin.py +1622 -0
  70. tree_sitter_analyzer/languages/kotlin_plugin.py +656 -0
  71. tree_sitter_analyzer/languages/markdown_plugin.py +1928 -0
  72. tree_sitter_analyzer/languages/php_plugin.py +862 -0
  73. tree_sitter_analyzer/languages/python_plugin.py +1636 -0
  74. tree_sitter_analyzer/languages/ruby_plugin.py +757 -0
  75. tree_sitter_analyzer/languages/rust_plugin.py +673 -0
  76. tree_sitter_analyzer/languages/sql_plugin.py +2444 -0
  77. tree_sitter_analyzer/languages/typescript_plugin.py +1892 -0
  78. tree_sitter_analyzer/languages/yaml_plugin.py +695 -0
  79. tree_sitter_analyzer/legacy_table_formatter.py +860 -0
  80. tree_sitter_analyzer/mcp/__init__.py +34 -0
  81. tree_sitter_analyzer/mcp/resources/__init__.py +43 -0
  82. tree_sitter_analyzer/mcp/resources/code_file_resource.py +208 -0
  83. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +586 -0
  84. tree_sitter_analyzer/mcp/server.py +869 -0
  85. tree_sitter_analyzer/mcp/tools/__init__.py +28 -0
  86. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +779 -0
  87. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +291 -0
  88. tree_sitter_analyzer/mcp/tools/base_tool.py +139 -0
  89. tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +816 -0
  90. tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +686 -0
  91. tree_sitter_analyzer/mcp/tools/list_files_tool.py +413 -0
  92. tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
  93. tree_sitter_analyzer/mcp/tools/query_tool.py +443 -0
  94. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +464 -0
  95. tree_sitter_analyzer/mcp/tools/search_content_tool.py +836 -0
  96. tree_sitter_analyzer/mcp/tools/table_format_tool.py +572 -0
  97. tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +653 -0
  98. tree_sitter_analyzer/mcp/utils/__init__.py +113 -0
  99. tree_sitter_analyzer/mcp/utils/error_handler.py +569 -0
  100. tree_sitter_analyzer/mcp/utils/file_output_factory.py +217 -0
  101. tree_sitter_analyzer/mcp/utils/file_output_manager.py +322 -0
  102. tree_sitter_analyzer/mcp/utils/gitignore_detector.py +358 -0
  103. tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -0
  104. tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
  105. tree_sitter_analyzer/models.py +840 -0
  106. tree_sitter_analyzer/mypy_current_errors.txt +2 -0
  107. tree_sitter_analyzer/output_manager.py +255 -0
  108. tree_sitter_analyzer/platform_compat/__init__.py +3 -0
  109. tree_sitter_analyzer/platform_compat/adapter.py +324 -0
  110. tree_sitter_analyzer/platform_compat/compare.py +224 -0
  111. tree_sitter_analyzer/platform_compat/detector.py +67 -0
  112. tree_sitter_analyzer/platform_compat/fixtures.py +228 -0
  113. tree_sitter_analyzer/platform_compat/profiles.py +217 -0
  114. tree_sitter_analyzer/platform_compat/record.py +55 -0
  115. tree_sitter_analyzer/platform_compat/recorder.py +155 -0
  116. tree_sitter_analyzer/platform_compat/report.py +92 -0
  117. tree_sitter_analyzer/plugins/__init__.py +280 -0
  118. tree_sitter_analyzer/plugins/base.py +647 -0
  119. tree_sitter_analyzer/plugins/manager.py +384 -0
  120. tree_sitter_analyzer/project_detector.py +328 -0
  121. tree_sitter_analyzer/queries/__init__.py +27 -0
  122. tree_sitter_analyzer/queries/csharp.py +216 -0
  123. tree_sitter_analyzer/queries/css.py +615 -0
  124. tree_sitter_analyzer/queries/go.py +275 -0
  125. tree_sitter_analyzer/queries/html.py +543 -0
  126. tree_sitter_analyzer/queries/java.py +402 -0
  127. tree_sitter_analyzer/queries/javascript.py +724 -0
  128. tree_sitter_analyzer/queries/kotlin.py +192 -0
  129. tree_sitter_analyzer/queries/markdown.py +258 -0
  130. tree_sitter_analyzer/queries/php.py +95 -0
  131. tree_sitter_analyzer/queries/python.py +859 -0
  132. tree_sitter_analyzer/queries/ruby.py +92 -0
  133. tree_sitter_analyzer/queries/rust.py +223 -0
  134. tree_sitter_analyzer/queries/sql.py +555 -0
  135. tree_sitter_analyzer/queries/typescript.py +871 -0
  136. tree_sitter_analyzer/queries/yaml.py +236 -0
  137. tree_sitter_analyzer/query_loader.py +272 -0
  138. tree_sitter_analyzer/security/__init__.py +22 -0
  139. tree_sitter_analyzer/security/boundary_manager.py +277 -0
  140. tree_sitter_analyzer/security/regex_checker.py +297 -0
  141. tree_sitter_analyzer/security/validator.py +599 -0
  142. tree_sitter_analyzer/table_formatter.py +782 -0
  143. tree_sitter_analyzer/utils/__init__.py +53 -0
  144. tree_sitter_analyzer/utils/logging.py +433 -0
  145. tree_sitter_analyzer/utils/tree_sitter_compat.py +289 -0
  146. tree_sitter_analyzer-1.9.17.1.dist-info/METADATA +485 -0
  147. tree_sitter_analyzer-1.9.17.1.dist-info/RECORD +149 -0
  148. tree_sitter_analyzer-1.9.17.1.dist-info/WHEEL +4 -0
  149. tree_sitter_analyzer-1.9.17.1.dist-info/entry_points.txt +25 -0
@@ -0,0 +1,1636 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Python Language Plugin
4
+
5
+ Enhanced Python-specific parsing and element extraction functionality.
6
+ Provides comprehensive support for modern Python features including async/await,
7
+ decorators, type hints, context managers, and framework-specific patterns.
8
+ Equivalent to JavaScript plugin capabilities for consistent language support.
9
+ """
10
+
11
+ from typing import TYPE_CHECKING, Any, Optional
12
+
13
+ if TYPE_CHECKING:
14
+ import tree_sitter
15
+
16
+ try:
17
+ import tree_sitter
18
+
19
+ TREE_SITTER_AVAILABLE = True
20
+ except ImportError:
21
+ TREE_SITTER_AVAILABLE = False
22
+
23
+ from ..core.analysis_engine import AnalysisRequest
24
+ from ..encoding_utils import extract_text_slice, safe_encode
25
+ from ..models import AnalysisResult, Class, CodeElement, Function, Import, Variable
26
+ from ..plugins.base import ElementExtractor, LanguagePlugin
27
+ from ..utils import log_debug, log_error, log_warning
28
+ from ..utils.tree_sitter_compat import TreeSitterQueryCompat
29
+
30
+
31
+ class PythonElementExtractor(ElementExtractor):
32
+ """Enhanced Python-specific element extractor with comprehensive feature support"""
33
+
34
+ def __init__(self) -> None:
35
+ """Initialize the Python element extractor."""
36
+ self.current_module: str = ""
37
+ self.current_file: str = ""
38
+ self.source_code: str = ""
39
+ self.content_lines: list[str] = []
40
+ self.imports: list[str] = []
41
+ self.exports: list[dict[str, Any]] = []
42
+
43
+ # Performance optimization caches
44
+ self._node_text_cache: dict[int, str] = {}
45
+ self._processed_nodes: set[int] = set()
46
+ self._element_cache: dict[tuple[int, str], Any] = {}
47
+ self._file_encoding: str | None = None
48
+ self._docstring_cache: dict[int, str] = {}
49
+ self._complexity_cache: dict[int, int] = {}
50
+
51
+ # Python-specific tracking
52
+ self.is_module: bool = False
53
+ self.framework_type: str = "" # django, flask, fastapi, etc.
54
+ self.python_version: str = "3.8" # default
55
+
56
+ def extract_functions(
57
+ self, tree: "tree_sitter.Tree", source_code: str
58
+ ) -> list[Function]:
59
+ """Extract Python function definitions with comprehensive details"""
60
+ self.source_code = source_code or ""
61
+ self.content_lines = self.source_code.split("\n")
62
+ self._reset_caches()
63
+ self._detect_file_characteristics()
64
+
65
+ functions: list[Function] = []
66
+
67
+ # Use optimized traversal for multiple function types
68
+ extractors = {
69
+ "function_definition": self._extract_function_optimized,
70
+ }
71
+
72
+ if tree is not None and tree.root_node is not None:
73
+ try:
74
+ self._traverse_and_extract_iterative(
75
+ tree.root_node, extractors, functions, "function"
76
+ )
77
+ log_debug(f"Extracted {len(functions)} Python functions")
78
+ except Exception as e:
79
+ log_debug(f"Error during function extraction: {e}")
80
+ return []
81
+
82
+ return functions
83
+
84
+ def extract_classes(
85
+ self, tree: "tree_sitter.Tree", source_code: str
86
+ ) -> list[Class]:
87
+ """Extract Python class definitions with detailed information"""
88
+ self.source_code = source_code or ""
89
+ self.content_lines = self.source_code.split("\n")
90
+ self._reset_caches()
91
+
92
+ classes: list[Class] = []
93
+
94
+ # Extract class declarations
95
+ extractors = {
96
+ "class_definition": self._extract_class_optimized,
97
+ }
98
+
99
+ if tree is not None and tree.root_node is not None:
100
+ try:
101
+ self._traverse_and_extract_iterative(
102
+ tree.root_node, extractors, classes, "class"
103
+ )
104
+ log_debug(f"Extracted {len(classes)} Python classes")
105
+ except Exception as e:
106
+ log_debug(f"Error during class extraction: {e}")
107
+ return []
108
+
109
+ return classes
110
+
111
+ def extract_variables(
112
+ self, tree: "tree_sitter.Tree", source_code: str
113
+ ) -> list[Variable]:
114
+ """Extract Python variable definitions (class attributes only)"""
115
+ variables: list[Variable] = []
116
+
117
+ # Only extract class-level attributes, not function-level variables
118
+ try:
119
+ # Find class declarations using compatible API
120
+ class_query = """
121
+ (class_definition
122
+ body: (block) @class.body) @class.definition
123
+ """
124
+
125
+ language = tree.language if hasattr(tree, "language") else None
126
+ if language:
127
+ try:
128
+ captures = TreeSitterQueryCompat.safe_execute_query(
129
+ language, class_query, tree.root_node, fallback_result=[]
130
+ )
131
+ class_bodies = []
132
+ for node, capture_name in captures:
133
+ if capture_name == "class.body":
134
+ class_bodies.append(node)
135
+ except Exception as e:
136
+ log_debug(
137
+ f"Could not extract Python class attributes using query: {e}"
138
+ )
139
+ class_bodies = []
140
+
141
+ # For each class body, extract attribute assignments
142
+ for class_body in class_bodies:
143
+ variables.extend(
144
+ self._extract_class_attributes(class_body, source_code)
145
+ )
146
+
147
+ except Exception as e:
148
+ log_warning(f"Could not extract Python class attributes: {e}")
149
+
150
+ return variables
151
+
152
+ def _reset_caches(self) -> None:
153
+ """Reset performance caches"""
154
+ self._node_text_cache.clear()
155
+ self._processed_nodes.clear()
156
+ self._element_cache.clear()
157
+ self._docstring_cache.clear()
158
+ self._complexity_cache.clear()
159
+
160
+ def _detect_file_characteristics(self) -> None:
161
+ """Detect Python file characteristics"""
162
+ # Check if it's a module
163
+ self.is_module = "import " in self.source_code or "from " in self.source_code
164
+
165
+ # Reset framework type
166
+ self.framework_type = ""
167
+
168
+ # Detect framework (case-sensitive)
169
+ if "django" in self.source_code or "from django" in self.source_code:
170
+ self.framework_type = "django"
171
+ elif "flask" in self.source_code or "from flask" in self.source_code:
172
+ self.framework_type = "flask"
173
+ elif "fastapi" in self.source_code or "from fastapi" in self.source_code:
174
+ self.framework_type = "fastapi"
175
+
176
+ def _traverse_and_extract_iterative(
177
+ self,
178
+ root_node: Optional["tree_sitter.Node"],
179
+ extractors: dict[str, Any],
180
+ results: list[Any],
181
+ element_type: str,
182
+ ) -> None:
183
+ """Iterative node traversal and extraction with caching"""
184
+ if not root_node:
185
+ return
186
+
187
+ target_node_types = set(extractors.keys())
188
+ container_node_types = {
189
+ "module",
190
+ "class_definition",
191
+ "function_definition",
192
+ "if_statement",
193
+ "for_statement",
194
+ "while_statement",
195
+ "with_statement",
196
+ "try_statement",
197
+ "block",
198
+ }
199
+
200
+ node_stack = [(root_node, 0)]
201
+ processed_nodes = 0
202
+ max_depth = 50
203
+
204
+ while node_stack:
205
+ current_node, depth = node_stack.pop()
206
+
207
+ if depth > max_depth:
208
+ log_warning(f"Maximum traversal depth ({max_depth}) exceeded")
209
+ continue
210
+
211
+ processed_nodes += 1
212
+ node_type = current_node.type
213
+
214
+ # Early termination for irrelevant nodes
215
+ if (
216
+ depth > 0
217
+ and node_type not in target_node_types
218
+ and node_type not in container_node_types
219
+ ):
220
+ continue
221
+
222
+ # Process target nodes
223
+ if node_type in target_node_types:
224
+ node_id = id(current_node)
225
+
226
+ if node_id in self._processed_nodes:
227
+ continue
228
+
229
+ cache_key = (node_id, element_type)
230
+ if cache_key in self._element_cache:
231
+ element = self._element_cache[cache_key]
232
+ if element:
233
+ if isinstance(element, list):
234
+ results.extend(element)
235
+ else:
236
+ results.append(element)
237
+ self._processed_nodes.add(node_id)
238
+ continue
239
+
240
+ # Extract and cache
241
+ extractor = extractors.get(node_type)
242
+ if extractor:
243
+ try:
244
+ element = extractor(current_node)
245
+ self._element_cache[cache_key] = element
246
+ if element:
247
+ if isinstance(element, list):
248
+ results.extend(element)
249
+ else:
250
+ results.append(element)
251
+ self._processed_nodes.add(node_id)
252
+ except Exception:
253
+ # Skip nodes that cause extraction errors
254
+ self._processed_nodes.add(node_id)
255
+
256
+ # Add children to stack
257
+ if current_node.children:
258
+ try:
259
+ # Try to reverse children for proper traversal order
260
+ children_list = list(current_node.children)
261
+ children_iter = reversed(children_list)
262
+ except TypeError:
263
+ # Fallback for Mock objects or other non-reversible types
264
+ try:
265
+ children_list = list(current_node.children)
266
+ children_iter = iter(children_list) # type: ignore
267
+ except TypeError:
268
+ # If children is not iterable, skip
269
+ children_iter = iter([]) # type: ignore
270
+
271
+ for child in children_iter:
272
+ node_stack.append((child, depth + 1))
273
+
274
+ log_debug(f"Iterative traversal processed {processed_nodes} nodes")
275
+
276
+ def _get_node_text_optimized(self, node: "tree_sitter.Node") -> str:
277
+ """Get node text with optimized caching"""
278
+ node_id = id(node)
279
+
280
+ if node_id in self._node_text_cache:
281
+ return self._node_text_cache[node_id]
282
+
283
+ try:
284
+ start_byte = node.start_byte
285
+ end_byte = node.end_byte
286
+
287
+ encoding = self._file_encoding or "utf-8"
288
+ content_bytes = safe_encode("\n".join(self.content_lines), encoding)
289
+ text = extract_text_slice(content_bytes, start_byte, end_byte, encoding)
290
+
291
+ # If byte extraction returns empty string, try fallback
292
+ if text:
293
+ self._node_text_cache[node_id] = text
294
+ return text
295
+ except Exception as e:
296
+ log_error(f"Error in _get_node_text_optimized: {e}")
297
+
298
+ # Fallback to simple text extraction
299
+ try:
300
+ start_point = node.start_point
301
+ end_point = node.end_point
302
+
303
+ # Validate points are within bounds
304
+ if start_point[0] < 0 or start_point[0] >= len(self.content_lines):
305
+ return ""
306
+
307
+ if end_point[0] < 0 or end_point[0] >= len(self.content_lines):
308
+ return ""
309
+
310
+ if start_point[0] == end_point[0]:
311
+ line = self.content_lines[start_point[0]]
312
+ # Ensure column indices are within line bounds
313
+ start_col = max(0, min(start_point[1], len(line)))
314
+ end_col = max(start_col, min(end_point[1], len(line)))
315
+ result: str = line[start_col:end_col]
316
+ self._node_text_cache[node_id] = result
317
+ return result
318
+ else:
319
+ lines = []
320
+ for i in range(start_point[0], end_point[0] + 1):
321
+ if i < len(self.content_lines):
322
+ line = self.content_lines[i]
323
+ if i == start_point[0]:
324
+ start_col = max(0, min(start_point[1], len(line)))
325
+ lines.append(line[start_col:])
326
+ elif i == end_point[0]:
327
+ end_col = max(0, min(end_point[1], len(line)))
328
+ lines.append(line[:end_col])
329
+ else:
330
+ lines.append(line)
331
+ result = "\n".join(lines)
332
+ self._node_text_cache[node_id] = result
333
+ return result
334
+ except Exception as fallback_error:
335
+ log_error(f"Fallback text extraction also failed: {fallback_error}")
336
+ return ""
337
+
338
+ def _extract_function_optimized(self, node: "tree_sitter.Node") -> Function | None:
339
+ """Extract function information with detailed metadata"""
340
+ try:
341
+ start_line = node.start_point[0] + 1
342
+ end_line = node.end_point[0] + 1
343
+
344
+ # Extract function details
345
+ function_info = self._parse_function_signature_optimized(node)
346
+ if not function_info:
347
+ return None
348
+
349
+ name, parameters, is_async, decorators, return_type = function_info
350
+
351
+ # Extract docstring
352
+ docstring = self._extract_docstring_for_line(start_line)
353
+
354
+ # Calculate complexity
355
+ complexity_score = self._calculate_complexity_optimized(node)
356
+
357
+ # Extract raw text
358
+ start_line_idx = max(0, start_line - 1)
359
+ end_line_idx = min(len(self.content_lines), end_line)
360
+ raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
361
+
362
+ # Determine visibility (Python conventions)
363
+ visibility = "public"
364
+ if name.startswith("__") and name.endswith("__"):
365
+ visibility = "magic" # Magic methods
366
+ elif name.startswith("_"):
367
+ visibility = "private"
368
+
369
+ return Function(
370
+ name=name,
371
+ start_line=start_line,
372
+ end_line=end_line,
373
+ raw_text=raw_text,
374
+ language="python",
375
+ parameters=parameters,
376
+ return_type=return_type or "Any",
377
+ is_async=is_async,
378
+ is_generator="yield" in raw_text,
379
+ docstring=docstring,
380
+ complexity_score=complexity_score,
381
+ modifiers=decorators,
382
+ is_static="staticmethod" in decorators,
383
+ is_staticmethod="staticmethod" in decorators,
384
+ is_private=visibility == "private",
385
+ is_public=visibility == "public",
386
+ # Python-specific properties
387
+ framework_type=self.framework_type,
388
+ is_property="property" in decorators,
389
+ is_classmethod="classmethod" in decorators,
390
+ )
391
+ except Exception as e:
392
+ log_error(f"Failed to extract function info: {e}")
393
+ import traceback
394
+
395
+ traceback.print_exc()
396
+ return None
397
+
398
+ def _parse_function_signature_optimized(
399
+ self, node: "tree_sitter.Node"
400
+ ) -> tuple[str, list[str], bool, list[str], str | None] | None:
401
+ """Parse function signature for Python functions"""
402
+ try:
403
+ name = None
404
+ parameters = []
405
+ is_async = False
406
+ decorators = []
407
+ return_type = None
408
+
409
+ # Check for async keyword
410
+ node_text = self._get_node_text_optimized(node)
411
+ is_async = node_text.strip().startswith("async def")
412
+
413
+ # Extract return type from function signature text
414
+ if "->" in node_text:
415
+ # Split by '->' and extract return type
416
+ parts = node_text.split("->")
417
+ if len(parts) > 1:
418
+ # Get everything after '->' and before ':'
419
+ return_part = parts[1].split(":")[0].strip()
420
+ # Clean up the return type
421
+ return_type = return_part.replace("\n", " ").strip()
422
+ # Don't use decorator names as return types
423
+ if (
424
+ return_type
425
+ and not return_type.startswith("@")
426
+ and return_type != "dataclass"
427
+ ):
428
+ # Additional validation - ensure it's a valid type annotation
429
+ if not any(
430
+ invalid in return_type
431
+ for invalid in ["def ", "class ", "import "]
432
+ ):
433
+ pass # Keep the return_type
434
+ else:
435
+ return_type = None
436
+
437
+ # Extract decorators from preceding siblings
438
+ if node.parent:
439
+ for sibling in node.parent.children:
440
+ if sibling.type == "decorated_definition":
441
+ for child in sibling.children:
442
+ if child.type == "decorator":
443
+ decorator_text = self._get_node_text_optimized(child)
444
+ if decorator_text.startswith("@"):
445
+ decorator_text = decorator_text[1:].strip()
446
+ decorators.append(decorator_text)
447
+
448
+ for child in node.children:
449
+ if child.type == "identifier":
450
+ name = child.text.decode("utf8") if child.text else None
451
+ elif child.type == "parameters":
452
+ parameters = self._extract_parameters_from_node_optimized(child)
453
+ elif child.type == "type" and not return_type:
454
+ # Only use this if we didn't extract from text
455
+ type_text = self._get_node_text_optimized(child)
456
+ if (
457
+ type_text
458
+ and not type_text.startswith("@")
459
+ and type_text != "dataclass"
460
+ ):
461
+ return_type = type_text
462
+
463
+ return name or "", parameters, is_async, decorators, return_type
464
+ except Exception:
465
+ return None
466
+
467
+ def _extract_parameters_from_node_optimized(
468
+ self, params_node: "tree_sitter.Node"
469
+ ) -> list[str]:
470
+ """Extract function parameters with type hints"""
471
+ parameters = []
472
+
473
+ for child in params_node.children:
474
+ if child.type == "identifier":
475
+ param_name = self._get_node_text_optimized(child)
476
+ parameters.append(param_name)
477
+ elif child.type == "typed_parameter":
478
+ # Handle typed parameters
479
+ param_text = self._get_node_text_optimized(child)
480
+ parameters.append(param_text)
481
+ elif child.type == "default_parameter":
482
+ # Handle default parameters
483
+ param_text = self._get_node_text_optimized(child)
484
+ parameters.append(param_text)
485
+ elif child.type == "list_splat_pattern":
486
+ # Handle *args
487
+ param_text = self._get_node_text_optimized(child)
488
+ parameters.append(param_text)
489
+ elif child.type == "dictionary_splat_pattern":
490
+ # Handle **kwargs
491
+ param_text = self._get_node_text_optimized(child)
492
+ parameters.append(param_text)
493
+
494
+ return parameters
495
+
496
+ def _extract_docstring_for_line(self, target_line: int) -> str | None:
497
+ """Extract docstring for the specified line"""
498
+ if target_line in self._docstring_cache:
499
+ return self._docstring_cache[target_line]
500
+
501
+ try:
502
+ if not self.content_lines or target_line >= len(self.content_lines):
503
+ return None
504
+
505
+ # Look for docstring in the next few lines after function definition
506
+ for i in range(target_line, min(target_line + 5, len(self.content_lines))):
507
+ line = self.content_lines[i].strip()
508
+ if line.startswith('"""') or line.startswith("'''"):
509
+ # Found docstring start
510
+ quote_type = '"""' if line.startswith('"""') else "'''"
511
+ docstring_lines = []
512
+
513
+ # Single line docstring
514
+ if line.count(quote_type) >= 2:
515
+ docstring = line.replace(quote_type, "").strip()
516
+ self._docstring_cache[target_line] = docstring
517
+ return docstring
518
+
519
+ # Multi-line docstring
520
+ docstring_lines.append(line.replace(quote_type, ""))
521
+ found_closing_quote = False
522
+ for j in range(i + 1, len(self.content_lines)):
523
+ next_line = self.content_lines[j]
524
+ if quote_type in next_line:
525
+ docstring_lines.append(next_line.replace(quote_type, ""))
526
+ found_closing_quote = True
527
+ break
528
+ docstring_lines.append(next_line)
529
+
530
+ if not found_closing_quote:
531
+ self._docstring_cache[target_line] = ""
532
+ return None
533
+
534
+ # Join preserving formatting and add leading newline for multi-line
535
+ docstring = "\n".join(docstring_lines)
536
+ # Add leading newline for multi-line docstrings to match expected format
537
+ if not docstring.startswith("\n"):
538
+ docstring = "\n" + docstring
539
+ self._docstring_cache[target_line] = docstring
540
+ return docstring
541
+
542
+ self._docstring_cache[target_line] = ""
543
+ return None
544
+
545
+ except Exception as e:
546
+ log_debug(f"Failed to extract docstring: {e}")
547
+ return None
548
+
549
+ def _calculate_complexity_optimized(self, node: "tree_sitter.Node") -> int:
550
+ """Calculate cyclomatic complexity efficiently"""
551
+ import re
552
+
553
+ node_id = id(node)
554
+ if node_id in self._complexity_cache:
555
+ return self._complexity_cache[node_id]
556
+
557
+ complexity = 1
558
+ try:
559
+ node_text = self._get_node_text_optimized(node).lower()
560
+ keywords = [
561
+ "if",
562
+ "elif",
563
+ "while",
564
+ "for",
565
+ "except",
566
+ "and",
567
+ "or",
568
+ "with",
569
+ "match",
570
+ "case",
571
+ ]
572
+ for keyword in keywords:
573
+ # More flexible keyword matching
574
+ pattern = rf"\b{keyword}\b"
575
+ matches = re.findall(pattern, node_text)
576
+ complexity += len(matches)
577
+ except Exception as e:
578
+ log_debug(f"Failed to calculate complexity: {e}")
579
+
580
+ self._complexity_cache[node_id] = complexity
581
+ return complexity
582
+
583
+ def _extract_class_optimized(self, node: "tree_sitter.Node") -> Class | None:
584
+ """Extract class information with detailed metadata"""
585
+ try:
586
+ start_line = node.start_point[0] + 1
587
+ end_line = node.end_point[0] + 1
588
+
589
+ # Extract class name
590
+ class_name = None
591
+ superclasses = []
592
+ decorators = []
593
+
594
+ # Extract decorators from preceding siblings
595
+ if node.parent:
596
+ for sibling in node.parent.children:
597
+ if sibling.type == "decorated_definition":
598
+ for child in sibling.children:
599
+ if child.type == "decorator":
600
+ decorator_text = self._get_node_text_optimized(child)
601
+ if decorator_text.startswith("@"):
602
+ decorator_text = decorator_text[1:].strip()
603
+ decorators.append(decorator_text)
604
+
605
+ for child in node.children:
606
+ if child.type == "identifier":
607
+ class_name = child.text.decode("utf8") if child.text else None
608
+ elif child.type == "argument_list":
609
+ # Extract superclasses
610
+ if child.children: # Check if children exists and is not None
611
+ for grandchild in child.children:
612
+ if grandchild.type == "identifier":
613
+ superclass_name = (
614
+ grandchild.text.decode("utf8")
615
+ if grandchild.text
616
+ else None
617
+ )
618
+ if superclass_name:
619
+ superclasses.append(superclass_name)
620
+
621
+ if not class_name:
622
+ return None
623
+
624
+ # Extract docstring
625
+ docstring = self._extract_docstring_for_line(start_line)
626
+
627
+ # Extract raw text
628
+ raw_text = self._get_node_text_optimized(node)
629
+
630
+ # Generate fully qualified name
631
+ full_qualified_name = (
632
+ f"{self.current_module}.{class_name}"
633
+ if self.current_module
634
+ else class_name
635
+ )
636
+
637
+ return Class(
638
+ name=class_name,
639
+ start_line=start_line,
640
+ end_line=end_line,
641
+ raw_text=raw_text,
642
+ language="python",
643
+ class_type="class",
644
+ superclass=superclasses[0] if superclasses else None,
645
+ interfaces=superclasses[1:] if len(superclasses) > 1 else [],
646
+ docstring=docstring,
647
+ modifiers=decorators,
648
+ full_qualified_name=full_qualified_name,
649
+ package_name=self.current_module,
650
+ # Python-specific properties
651
+ framework_type=self.framework_type,
652
+ is_dataclass="dataclass" in decorators,
653
+ is_abstract="ABC" in superclasses or "abstractmethod" in raw_text,
654
+ is_exception=any(
655
+ "Exception" in sc or "Error" in sc for sc in superclasses
656
+ ),
657
+ )
658
+ except Exception as e:
659
+ log_debug(f"Failed to extract class info: {e}")
660
+ return None
661
+
662
+ def _is_framework_class(self, node: "tree_sitter.Node", class_name: str) -> bool:
663
+ """Check if class is a framework-specific class"""
664
+ if self.framework_type == "django":
665
+ # Check for Django model, view, form, etc.
666
+ node_text = self._get_node_text_optimized(node)
667
+ return any(
668
+ pattern in node_text
669
+ for pattern in ["Model", "View", "Form", "Serializer", "TestCase"]
670
+ )
671
+ elif self.framework_type == "flask":
672
+ # Check for Flask patterns
673
+ return "Flask" in self.source_code or "Blueprint" in self.source_code
674
+ elif self.framework_type == "fastapi":
675
+ # Check for FastAPI patterns
676
+ return "APIRouter" in self.source_code or "BaseModel" in self.source_code
677
+ return False
678
+
679
+ def _extract_class_attributes(
680
+ self, class_body_node: "tree_sitter.Node", source_code: str
681
+ ) -> list[Variable]:
682
+ """Extract class-level attribute assignments"""
683
+ attributes: list[Variable] = []
684
+
685
+ try:
686
+ # Look for assignments directly under class body
687
+ for child in class_body_node.children:
688
+ if child.type == "expression_statement":
689
+ # Check if it's an assignment
690
+ for grandchild in child.children:
691
+ if grandchild.type == "assignment":
692
+ attribute = self._extract_class_attribute_info(
693
+ grandchild, source_code
694
+ )
695
+ if attribute:
696
+ attributes.append(attribute)
697
+ elif child.type == "assignment":
698
+ attribute = self._extract_class_attribute_info(child, source_code)
699
+ if attribute:
700
+ attributes.append(attribute)
701
+
702
+ except Exception as e:
703
+ log_warning(f"Could not extract class attributes: {e}")
704
+
705
+ return attributes
706
+
707
+ def _extract_class_attribute_info(
708
+ self, node: "tree_sitter.Node", source_code: str
709
+ ) -> Variable | None:
710
+ """Extract class attribute information from assignment node"""
711
+ try:
712
+ # Get the full assignment text
713
+ assignment_text = source_code[node.start_byte : node.end_byte]
714
+
715
+ # Extract attribute name and type annotation
716
+ if "=" in assignment_text:
717
+ left_part = assignment_text.split("=")[0].strip()
718
+
719
+ # Handle type annotations (e.g., "name: str = ...")
720
+ if ":" in left_part:
721
+ name_part, type_part = left_part.split(":", 1)
722
+ attr_name = name_part.strip()
723
+ attr_type = type_part.strip()
724
+ else:
725
+ attr_name = left_part
726
+ attr_type = None
727
+
728
+ return Variable(
729
+ name=attr_name,
730
+ start_line=node.start_point[0] + 1,
731
+ end_line=node.end_point[0] + 1,
732
+ raw_text=assignment_text,
733
+ language="python",
734
+ variable_type=attr_type,
735
+ )
736
+
737
+ except Exception as e:
738
+ log_warning(f"Could not extract class attribute info: {e}")
739
+
740
+ return None
741
+
742
+ def extract_imports(
743
+ self, tree: "tree_sitter.Tree", source_code: str
744
+ ) -> list[Import]:
745
+ """Extract Python import statements"""
746
+ imports: list[Import] = []
747
+
748
+ # Simplified import statement query - only capture statements, not individual elements
749
+ import_query = """
750
+ (import_statement) @import_stmt
751
+ (import_from_statement) @from_import_stmt
752
+ """
753
+
754
+ try:
755
+ language = tree.language if hasattr(tree, "language") else None
756
+ if language:
757
+ try:
758
+ captures = TreeSitterQueryCompat.safe_execute_query(
759
+ language, import_query, tree.root_node, fallback_result=[]
760
+ )
761
+
762
+ # Track processed statements by their start/end positions to avoid duplicates
763
+ processed_positions: set[tuple[int, int]] = set()
764
+
765
+ for node, capture_name in captures:
766
+ # Use position as unique identifier
767
+ position_key = (node.start_point[0], node.end_point[0])
768
+ if position_key in processed_positions:
769
+ continue
770
+
771
+ processed_positions.add(position_key)
772
+
773
+ # Determine import type from capture name
774
+ if "from" in capture_name:
775
+ import_type = "from_import"
776
+ else:
777
+ import_type = "import"
778
+
779
+ imp = self._extract_import_info(node, source_code, import_type)
780
+ if imp:
781
+ imports.append(imp)
782
+
783
+ except Exception as query_error:
784
+ # Fallback to manual extraction for tree-sitter compatibility
785
+ log_debug(
786
+ f"Query execution failed, using manual extraction: {query_error}"
787
+ )
788
+ imports.extend(
789
+ self._extract_imports_manual(tree.root_node, source_code)
790
+ )
791
+
792
+ except Exception as e:
793
+ log_warning(f"Could not extract Python imports: {e}")
794
+ # Final fallback
795
+ imports.extend(self._extract_imports_manual(tree.root_node, source_code))
796
+
797
+ return imports
798
+
799
+ def _extract_imports_manual(
800
+ self, root_node: "tree_sitter.Node", source_code: str
801
+ ) -> list[Import]:
802
+ """Manual import extraction for tree-sitter 0.25.x compatibility"""
803
+ imports = []
804
+
805
+ def walk_tree(node: "tree_sitter.Node") -> None:
806
+ if node.type in ["import_statement", "import_from_statement"]:
807
+ try:
808
+ start_line = node.start_point[0] + 1
809
+ end_line = node.end_point[0] + 1
810
+ raw_text = (
811
+ source_code[node.start_byte : node.end_byte]
812
+ if hasattr(node, "start_byte")
813
+ else ""
814
+ )
815
+
816
+ # Parse the import statement correctly
817
+ if node.type == "import_statement":
818
+ # Simple import: import os, sys, json
819
+ # Extract all imported modules
820
+ for child in node.children:
821
+ if (
822
+ child.type == "dotted_name"
823
+ or child.type == "identifier"
824
+ ):
825
+ module_name = (
826
+ source_code[child.start_byte : child.end_byte]
827
+ if hasattr(child, "start_byte")
828
+ else ""
829
+ )
830
+ if module_name and module_name != "import":
831
+ import_obj = Import(
832
+ name=module_name,
833
+ start_line=start_line,
834
+ end_line=end_line,
835
+ raw_text=raw_text,
836
+ module_name=module_name,
837
+ imported_names=[module_name],
838
+ element_type="import",
839
+ )
840
+ imports.append(import_obj)
841
+ elif node.type == "import_from_statement":
842
+ # From import: from abc import ABC, abstractmethod
843
+ module_name = ""
844
+ imported_items = []
845
+
846
+ # Find the module name (after 'from')
847
+ for child in node.children:
848
+ if child.type == "dotted_name" and not module_name:
849
+ module_name = (
850
+ source_code[child.start_byte : child.end_byte]
851
+ if hasattr(child, "start_byte")
852
+ else ""
853
+ )
854
+ elif child.type == "import_list":
855
+ # Extract items from import list
856
+ for grandchild in child.children:
857
+ if (
858
+ grandchild.type == "dotted_name"
859
+ or grandchild.type == "identifier"
860
+ ):
861
+ item_name = (
862
+ source_code[
863
+ grandchild.start_byte : grandchild.end_byte
864
+ ]
865
+ if hasattr(grandchild, "start_byte")
866
+ else ""
867
+ )
868
+ if item_name and item_name not in [
869
+ ",",
870
+ "(",
871
+ ")",
872
+ ]:
873
+ imported_items.append(item_name)
874
+ elif child.type == "dotted_name" and module_name:
875
+ # Single import item (not in a list)
876
+ item_name = (
877
+ source_code[child.start_byte : child.end_byte]
878
+ if hasattr(child, "start_byte")
879
+ else ""
880
+ )
881
+ if item_name:
882
+ imported_items.append(item_name)
883
+
884
+ # Create import object for from import
885
+ if module_name:
886
+ import_obj = Import(
887
+ name=(
888
+ f"from {module_name} import {', '.join(imported_items)}"
889
+ if imported_items
890
+ else f"from {module_name}"
891
+ ),
892
+ start_line=start_line,
893
+ end_line=end_line,
894
+ raw_text=raw_text,
895
+ module_name=module_name,
896
+ imported_names=imported_items,
897
+ element_type="import",
898
+ )
899
+ imports.append(import_obj)
900
+
901
+ except Exception as e:
902
+ log_warning(f"Failed to extract import manually: {e}")
903
+
904
+ # Recursively process children
905
+ for child in node.children:
906
+ walk_tree(child)
907
+
908
+ walk_tree(root_node)
909
+ return imports
910
+
911
+ def extract_packages(self, tree: "tree_sitter.Tree", source_code: str) -> list:
912
+ """Extract Python package information from file path"""
913
+ import os
914
+
915
+ from ..models import Package
916
+
917
+ packages: list[Package] = []
918
+
919
+ # For Python, we infer package from file path structure
920
+ # Look for __init__.py in directories to determine package
921
+ if self.current_file:
922
+ file_path = os.path.abspath(self.current_file)
923
+ current_dir = os.path.dirname(file_path)
924
+ package_parts = []
925
+
926
+ # Walk up the directory tree looking for __init__.py
927
+ check_dir = current_dir
928
+ while check_dir:
929
+ # Check if current directory has __init__.py (indicating it's a package)
930
+ init_file = os.path.join(check_dir, "__init__.py")
931
+
932
+ if os.path.exists(init_file):
933
+ package_parts.insert(0, os.path.basename(check_dir))
934
+ # Move to parent directory
935
+ parent_dir = os.path.dirname(check_dir)
936
+ if parent_dir == check_dir: # Reached root
937
+ break
938
+ check_dir = parent_dir
939
+ else:
940
+ # No __init__.py, stop here
941
+ break
942
+
943
+ # If we found package structure, create Package object
944
+ if package_parts:
945
+ package_name = ".".join(package_parts)
946
+ self.current_module = package_name
947
+
948
+ package = Package(
949
+ name=package_name,
950
+ start_line=1,
951
+ end_line=1,
952
+ raw_text=f"# Package: {package_name}",
953
+ language="python",
954
+ )
955
+ packages.append(package)
956
+
957
+ return packages
958
+
959
+ def _extract_detailed_function_info(
960
+ self, node: "tree_sitter.Node", source_code: str, is_async: bool = False
961
+ ) -> Function | None:
962
+ """Extract comprehensive function information from AST node"""
963
+ try:
964
+ # Extract basic information
965
+ name = self._extract_name_from_node(node, source_code)
966
+ if not name:
967
+ return None
968
+
969
+ # Extract parameters
970
+ parameters = self._extract_parameters_from_node(node, source_code)
971
+
972
+ # Extract decorators
973
+ decorators = self._extract_decorators_from_node(node, source_code)
974
+
975
+ # Extract return type hint
976
+ return_type = self._extract_return_type_from_node(node, source_code)
977
+
978
+ # Extract docstring
979
+ # docstring = self._extract_docstring_from_node(node, source_code) # Not used currently
980
+
981
+ # Extract function body
982
+ # body = self._extract_function_body(node, source_code) # Not used currently
983
+
984
+ # Calculate complexity (simplified)
985
+ # complexity_score = self._calculate_complexity(body) # Not used currently
986
+
987
+ # Determine visibility (Python conventions)
988
+ visibility = "public"
989
+ if name.startswith("__") and name.endswith("__"):
990
+ visibility = "magic" # Magic methods
991
+ elif name.startswith("_"):
992
+ visibility = "private"
993
+
994
+ # Safely extract raw text, avoiding index out of bounds
995
+ start_byte = min(node.start_byte, len(source_code))
996
+ end_byte = min(node.end_byte, len(source_code))
997
+ raw_text = (
998
+ source_code[start_byte:end_byte]
999
+ if start_byte < end_byte
1000
+ else source_code
1001
+ )
1002
+
1003
+ return Function(
1004
+ name=name,
1005
+ start_line=node.start_point[0] + 1,
1006
+ end_line=node.end_point[0] + 1,
1007
+ raw_text=raw_text,
1008
+ language="python",
1009
+ parameters=parameters,
1010
+ return_type=return_type or "Any",
1011
+ modifiers=decorators,
1012
+ is_static="staticmethod" in decorators,
1013
+ is_private=visibility == "private",
1014
+ is_public=visibility == "public",
1015
+ )
1016
+
1017
+ except Exception as e:
1018
+ log_warning(f"Could not extract detailed function info: {e}")
1019
+ return None
1020
+
1021
+ def _extract_detailed_class_info(
1022
+ self, node: "tree_sitter.Node", source_code: str
1023
+ ) -> Class | None:
1024
+ """Extract comprehensive class information from AST node"""
1025
+ try:
1026
+ # Extract basic information
1027
+ name = self._extract_name_from_node(node, source_code)
1028
+ if not name:
1029
+ return None
1030
+
1031
+ # Extract superclasses
1032
+ superclasses = self._extract_superclasses_from_node(node, source_code)
1033
+
1034
+ # Extract decorators
1035
+ decorators = self._extract_decorators_from_node(node, source_code)
1036
+
1037
+ # Extract docstring
1038
+ # docstring = self._extract_docstring_from_node(node, source_code) # Not used currently
1039
+
1040
+ # Generate fully qualified name
1041
+ full_qualified_name = (
1042
+ f"{self.current_module}.{name}" if self.current_module else name
1043
+ )
1044
+
1045
+ # Determine visibility
1046
+ # visibility = "public"
1047
+ # if name.startswith("_"):
1048
+ # visibility = "private" # Not used currently
1049
+
1050
+ return Class(
1051
+ name=name,
1052
+ start_line=node.start_point[0] + 1,
1053
+ end_line=node.end_point[0] + 1,
1054
+ raw_text=source_code[node.start_byte : node.end_byte],
1055
+ language="python",
1056
+ class_type="class",
1057
+ full_qualified_name=full_qualified_name,
1058
+ package_name=self.current_module,
1059
+ superclass=superclasses[0] if superclasses else None,
1060
+ interfaces=superclasses[1:] if len(superclasses) > 1 else [],
1061
+ modifiers=decorators,
1062
+ )
1063
+
1064
+ except Exception as e:
1065
+ log_warning(f"Could not extract detailed class info: {e}")
1066
+ return None
1067
+
1068
+ def _extract_variable_info(
1069
+ self, node: "tree_sitter.Node", source_code: str, assignment_type: str
1070
+ ) -> Variable | None:
1071
+ """Extract detailed variable information from AST node"""
1072
+ try:
1073
+ if not self._validate_node(node):
1074
+ return None
1075
+
1076
+ # Extract variable text
1077
+ variable_text = source_code[node.start_byte : node.end_byte]
1078
+
1079
+ # Extract variable name (simplified)
1080
+ if "=" in variable_text:
1081
+ name_part = variable_text.split("=")[0].strip()
1082
+ if assignment_type == "multiple_assignment" and "," in name_part:
1083
+ name = name_part.split(",")[0].strip()
1084
+ else:
1085
+ name = name_part
1086
+ else:
1087
+ name = "variable"
1088
+
1089
+ return Variable(
1090
+ name=name,
1091
+ start_line=node.start_point[0] + 1,
1092
+ end_line=node.end_point[0] + 1,
1093
+ raw_text=variable_text,
1094
+ language="python",
1095
+ variable_type=assignment_type,
1096
+ )
1097
+
1098
+ except Exception as e:
1099
+ log_warning(f"Could not extract variable info: {e}")
1100
+ return None
1101
+
1102
+ def _extract_import_info(
1103
+ self, node: "tree_sitter.Node", source_code: str, import_type: str
1104
+ ) -> Import | None:
1105
+ """Extract detailed import information from AST node"""
1106
+ try:
1107
+ if not self._validate_node(node):
1108
+ return None
1109
+
1110
+ # Safely extract import text, avoiding index out of bounds
1111
+ start_byte = min(node.start_byte, len(source_code))
1112
+ end_byte = min(node.end_byte, len(source_code))
1113
+ import_text = (
1114
+ source_code[start_byte:end_byte]
1115
+ if start_byte < end_byte
1116
+ else source_code
1117
+ )
1118
+
1119
+ # Extract import name and module name (simplified)
1120
+ if import_type == "from_import":
1121
+ if "from" in import_text and "import" in import_text:
1122
+ parts = import_text.split("import")
1123
+ module_name = parts[0].replace("from", "").strip()
1124
+ import_name = parts[1].strip()
1125
+ else:
1126
+ module_name = ""
1127
+ import_name = import_text
1128
+ elif import_type == "aliased_import":
1129
+ module_name = ""
1130
+ import_name = import_text
1131
+ else:
1132
+ module_name = ""
1133
+ import_name = import_text.replace("import", "").strip()
1134
+
1135
+ return Import(
1136
+ name=import_name,
1137
+ start_line=node.start_point[0] + 1,
1138
+ end_line=node.end_point[0] + 1,
1139
+ raw_text=import_text,
1140
+ language="python",
1141
+ module_name=module_name,
1142
+ )
1143
+
1144
+ except Exception as e:
1145
+ log_warning(f"Could not extract import info: {e}")
1146
+ return None
1147
+
1148
+ # Helper methods
1149
+ def _validate_node(self, node: "tree_sitter.Node") -> bool:
1150
+ """Validate that a node has required attributes"""
1151
+ required_attrs = ["start_byte", "end_byte", "start_point", "end_point"]
1152
+ for attr in required_attrs:
1153
+ if not hasattr(node, attr) or getattr(node, attr) is None:
1154
+ return False
1155
+ return True
1156
+
1157
+ def _extract_name_from_node(
1158
+ self, node: "tree_sitter.Node", source_code: str
1159
+ ) -> str | None:
1160
+ """Extract name from AST node"""
1161
+ for child in node.children:
1162
+ if child.type == "identifier":
1163
+ return source_code[child.start_byte : child.end_byte]
1164
+ return None
1165
+
1166
+ def _extract_parameters_from_node(
1167
+ self, node: "tree_sitter.Node", source_code: str
1168
+ ) -> list[str]:
1169
+ """Extract parameters from function node"""
1170
+ parameters: list[str] = []
1171
+ for child in node.children:
1172
+ if child.type == "parameters":
1173
+ for param_child in child.children:
1174
+ if param_child.type in [
1175
+ "identifier",
1176
+ "typed_parameter",
1177
+ "default_parameter",
1178
+ ]:
1179
+ param_text = source_code[
1180
+ param_child.start_byte : param_child.end_byte
1181
+ ]
1182
+ parameters.append(param_text)
1183
+ return parameters
1184
+
1185
+ def _extract_decorators_from_node(
1186
+ self, node: "tree_sitter.Node", source_code: str
1187
+ ) -> list[str]:
1188
+ """Extract decorators from node"""
1189
+ decorators: list[str] = []
1190
+
1191
+ # Decorators are before function/class definitions
1192
+ if hasattr(node, "parent") and node.parent:
1193
+ for sibling in node.parent.children:
1194
+ if (
1195
+ sibling.type == "decorator"
1196
+ and sibling.end_point[0] < node.start_point[0]
1197
+ ):
1198
+ decorator_text = source_code[sibling.start_byte : sibling.end_byte]
1199
+ # Remove @
1200
+ if decorator_text.startswith("@"):
1201
+ decorator_text = decorator_text[1:].strip()
1202
+ decorators.append(decorator_text)
1203
+
1204
+ return decorators
1205
+
1206
+ def _extract_return_type_from_node(
1207
+ self, node: "tree_sitter.Node", source_code: str
1208
+ ) -> str | None:
1209
+ """Extract return type annotation from function node"""
1210
+ # Look for return type annotation after '->'
1211
+ node_text = self._get_node_text_optimized(node)
1212
+ if "->" in node_text:
1213
+ # Extract everything after '->' and before ':'
1214
+ parts = node_text.split("->")
1215
+ if len(parts) > 1:
1216
+ return_part = parts[1].split(":")[0].strip()
1217
+ # Clean up the return type (remove whitespace and newlines)
1218
+ return_type = return_part.replace("\n", " ").strip()
1219
+ # Don't return decorator names as return types
1220
+ if return_type and not return_type.startswith("@"):
1221
+ return return_type
1222
+
1223
+ # Fallback to original method
1224
+ for child in node.children:
1225
+ if child.type == "type":
1226
+ type_text = source_code[child.start_byte : child.end_byte]
1227
+ # Don't return decorator names as return types
1228
+ if type_text and not type_text.startswith("@"):
1229
+ return type_text
1230
+ return None
1231
+
1232
+ def _extract_docstring_from_node(
1233
+ self, node: "tree_sitter.Node", source_code: str
1234
+ ) -> str | None:
1235
+ """Extract docstring from function/class node"""
1236
+ for child in node.children:
1237
+ if child.type == "block":
1238
+ # Check if the first statement in the block is a docstring
1239
+ for stmt in child.children:
1240
+ if stmt.type == "expression_statement":
1241
+ for expr in stmt.children:
1242
+ if expr.type == "string":
1243
+ if self._validate_node(expr):
1244
+ docstring = source_code[
1245
+ expr.start_byte : expr.end_byte
1246
+ ]
1247
+ # Remove quotes
1248
+ if docstring.startswith(
1249
+ '"""'
1250
+ ) or docstring.startswith("'''"):
1251
+ return docstring[3:-3].strip()
1252
+ elif docstring.startswith(
1253
+ '"'
1254
+ ) or docstring.startswith("'"):
1255
+ return docstring[1:-1].strip()
1256
+ return docstring
1257
+ break
1258
+ break
1259
+ return None
1260
+
1261
+ def _extract_function_body(self, node: "tree_sitter.Node", source_code: str) -> str:
1262
+ """Extract function body"""
1263
+ for child in node.children:
1264
+ if child.type == "block":
1265
+ return source_code[child.start_byte : child.end_byte]
1266
+ return ""
1267
+
1268
+ def _extract_superclasses_from_node(
1269
+ self, node: "tree_sitter.Node", source_code: str
1270
+ ) -> list[str]:
1271
+ """Extract superclasses from class node"""
1272
+ superclasses: list[str] = []
1273
+ for child in node.children:
1274
+ if child.type == "argument_list":
1275
+ for arg in child.children:
1276
+ if arg.type == "identifier":
1277
+ superclasses.append(source_code[arg.start_byte : arg.end_byte])
1278
+ return superclasses
1279
+
1280
+ def _calculate_complexity(self, body: str) -> int:
1281
+ """Calculate cyclomatic complexity (simplified)"""
1282
+ complexity = 1 # Base complexity
1283
+ keywords = ["if", "elif", "for", "while", "try", "except", "with", "and", "or"]
1284
+ for keyword in keywords:
1285
+ complexity += body.count(f" {keyword} ") + body.count(f"\n{keyword} ")
1286
+ return complexity
1287
+
1288
+
1289
+ class PythonPlugin(LanguagePlugin):
1290
+ """Python language plugin for the new architecture"""
1291
+
1292
+ def __init__(self) -> None:
1293
+ """Initialize the Python plugin"""
1294
+ super().__init__()
1295
+ self._language_cache: tree_sitter.Language | None = None
1296
+ self._extractor: PythonElementExtractor | None = None
1297
+
1298
+ # Legacy compatibility attributes for tests
1299
+ self.language = "python"
1300
+ self.extractor = self.get_extractor()
1301
+
1302
+ def get_language_name(self) -> str:
1303
+ """Return the name of the programming language this plugin supports"""
1304
+ return "python"
1305
+
1306
+ def get_file_extensions(self) -> list[str]:
1307
+ """Return list of file extensions this plugin supports"""
1308
+ return [".py", ".pyw", ".pyi"]
1309
+
1310
+ def create_extractor(self) -> ElementExtractor:
1311
+ """Create and return an element extractor for this language"""
1312
+ return PythonElementExtractor()
1313
+
1314
+ def get_extractor(self) -> ElementExtractor:
1315
+ """Get the cached extractor instance, creating it if necessary"""
1316
+ if self._extractor is None:
1317
+ self._extractor = PythonElementExtractor()
1318
+ return self._extractor
1319
+
1320
+ def get_language(self) -> str:
1321
+ """Get the language name for Python (legacy compatibility)"""
1322
+ return "python"
1323
+
1324
+ def extract_functions(
1325
+ self, tree: "tree_sitter.Tree", source_code: str
1326
+ ) -> list[Function]:
1327
+ """Extract functions from the tree (legacy compatibility)"""
1328
+ extractor = self.get_extractor()
1329
+ return extractor.extract_functions(tree, source_code)
1330
+
1331
+ def extract_classes(
1332
+ self, tree: "tree_sitter.Tree", source_code: str
1333
+ ) -> list[Class]:
1334
+ """Extract classes from the tree (legacy compatibility)"""
1335
+ extractor = self.get_extractor()
1336
+ return extractor.extract_classes(tree, source_code)
1337
+
1338
+ def extract_variables(
1339
+ self, tree: "tree_sitter.Tree", source_code: str
1340
+ ) -> list[Variable]:
1341
+ """Extract variables from the tree (legacy compatibility)"""
1342
+ extractor = self.get_extractor()
1343
+ return extractor.extract_variables(tree, source_code)
1344
+
1345
+ def extract_imports(
1346
+ self, tree: "tree_sitter.Tree", source_code: str
1347
+ ) -> list[Import]:
1348
+ """Extract imports from the tree (legacy compatibility)"""
1349
+ extractor = self.get_extractor()
1350
+ return extractor.extract_imports(tree, source_code)
1351
+
1352
+ def get_tree_sitter_language(self) -> Optional["tree_sitter.Language"]:
1353
+ """Get the Tree-sitter language object for Python"""
1354
+ if self._language_cache is None:
1355
+ try:
1356
+ import tree_sitter
1357
+ import tree_sitter_python as tspython
1358
+
1359
+ # PyCapsuleオブジェクトをLanguageオブジェクトに変換
1360
+ language_capsule = tspython.language()
1361
+ self._language_cache = tree_sitter.Language(language_capsule)
1362
+ except ImportError:
1363
+ log_error("tree-sitter-python not available")
1364
+ return None
1365
+ except Exception as e:
1366
+ log_error(f"Failed to load Python language: {e}")
1367
+ return None
1368
+ return self._language_cache
1369
+
1370
+ def get_supported_queries(self) -> list[str]:
1371
+ """Get list of supported query names for this language"""
1372
+ return [
1373
+ "function",
1374
+ "class",
1375
+ "variable",
1376
+ "import",
1377
+ "async_function",
1378
+ "method",
1379
+ "decorator",
1380
+ "exception",
1381
+ "comprehension",
1382
+ "lambda",
1383
+ "context_manager",
1384
+ "type_hint",
1385
+ "docstring",
1386
+ "django_model",
1387
+ "flask_route",
1388
+ "fastapi_endpoint",
1389
+ ]
1390
+
1391
+ def is_applicable(self, file_path: str) -> bool:
1392
+ """Check if this plugin is applicable for the given file"""
1393
+ return any(
1394
+ file_path.lower().endswith(ext.lower())
1395
+ for ext in self.get_file_extensions()
1396
+ )
1397
+
1398
+ def get_plugin_info(self) -> dict:
1399
+ """Get information about this plugin"""
1400
+ return {
1401
+ "name": "Python Plugin",
1402
+ "language": self.get_language_name(),
1403
+ "extensions": self.get_file_extensions(),
1404
+ "version": "2.0.0",
1405
+ "supported_queries": self.get_supported_queries(),
1406
+ "features": [
1407
+ "Async/await functions",
1408
+ "Type hints support",
1409
+ "Decorators",
1410
+ "Context managers",
1411
+ "Comprehensions",
1412
+ "Lambda expressions",
1413
+ "Exception handling",
1414
+ "Docstring extraction",
1415
+ "Django framework support",
1416
+ "Flask framework support",
1417
+ "FastAPI framework support",
1418
+ "Dataclass support",
1419
+ "Abstract class detection",
1420
+ "Complexity analysis",
1421
+ ],
1422
+ }
1423
+
1424
+ def execute_query_strategy(
1425
+ self, query_key: str | None, language: str
1426
+ ) -> str | None:
1427
+ """Execute query strategy for Python language"""
1428
+ queries = self.get_queries()
1429
+ return queries.get(query_key) if query_key else None
1430
+
1431
+ def _get_node_type_for_element(self, element: Any) -> str:
1432
+ """Get appropriate node type for element"""
1433
+ from ..models import Class, Function, Import, Variable
1434
+
1435
+ if isinstance(element, Function):
1436
+ return "function_definition"
1437
+ elif isinstance(element, Class):
1438
+ return "class_definition"
1439
+ elif isinstance(element, Variable):
1440
+ return "assignment"
1441
+ elif isinstance(element, Import):
1442
+ return "import_statement"
1443
+ else:
1444
+ return "unknown"
1445
+
1446
+ def get_element_categories(self) -> dict[str, list[str]]:
1447
+ """
1448
+ Get element categories mapping query keys to node types
1449
+
1450
+ Returns:
1451
+ Dictionary mapping query keys to lists of node types
1452
+ """
1453
+ return {
1454
+ # Function-related queries
1455
+ "function": ["function_definition"],
1456
+ "functions": ["function_definition"],
1457
+ "async_function": ["function_definition"],
1458
+ "async_functions": ["function_definition"],
1459
+ "method": ["function_definition"],
1460
+ "methods": ["function_definition"],
1461
+ "lambda": ["lambda"],
1462
+ "lambdas": ["lambda"],
1463
+ # Class-related queries
1464
+ "class": ["class_definition"],
1465
+ "classes": ["class_definition"],
1466
+ # Import-related queries
1467
+ "import": ["import_statement", "import_from_statement"],
1468
+ "imports": ["import_statement", "import_from_statement"],
1469
+ "from_import": ["import_from_statement"],
1470
+ "from_imports": ["import_from_statement"],
1471
+ # Variable-related queries
1472
+ "variable": ["assignment"],
1473
+ "variables": ["assignment"],
1474
+ # Decorator-related queries
1475
+ "decorator": ["decorator"],
1476
+ "decorators": ["decorator"],
1477
+ # Exception-related queries
1478
+ "exception": ["raise_statement", "except_clause"],
1479
+ "exceptions": ["raise_statement", "except_clause"],
1480
+ # Comprehension-related queries
1481
+ "comprehension": [
1482
+ "list_comprehension",
1483
+ "set_comprehension",
1484
+ "dictionary_comprehension",
1485
+ "generator_expression",
1486
+ ],
1487
+ "comprehensions": [
1488
+ "list_comprehension",
1489
+ "set_comprehension",
1490
+ "dictionary_comprehension",
1491
+ "generator_expression",
1492
+ ],
1493
+ # Context manager queries
1494
+ "context_manager": ["with_statement"],
1495
+ "context_managers": ["with_statement"],
1496
+ # Type hint queries
1497
+ "type_hint": ["type"],
1498
+ "type_hints": ["type"],
1499
+ # Docstring queries
1500
+ "docstring": ["string"],
1501
+ "docstrings": ["string"],
1502
+ # Framework-specific queries
1503
+ "django_model": ["class_definition"],
1504
+ "django_models": ["class_definition"],
1505
+ "flask_route": ["decorator"],
1506
+ "flask_routes": ["decorator"],
1507
+ "fastapi_endpoint": ["function_definition"],
1508
+ "fastapi_endpoints": ["function_definition"],
1509
+ # Generic queries
1510
+ "all_elements": [
1511
+ "function_definition",
1512
+ "class_definition",
1513
+ "import_statement",
1514
+ "import_from_statement",
1515
+ "assignment",
1516
+ "decorator",
1517
+ "raise_statement",
1518
+ "except_clause",
1519
+ "list_comprehension",
1520
+ "set_comprehension",
1521
+ "dictionary_comprehension",
1522
+ "generator_expression",
1523
+ "with_statement",
1524
+ "type",
1525
+ "string",
1526
+ "lambda",
1527
+ ],
1528
+ }
1529
+
1530
+ async def analyze_file(
1531
+ self, file_path: str, request: AnalysisRequest
1532
+ ) -> AnalysisResult:
1533
+ """Analyze a Python file and return the analysis results."""
1534
+ if not TREE_SITTER_AVAILABLE:
1535
+ return AnalysisResult(
1536
+ file_path=file_path,
1537
+ language=self.get_language_name(),
1538
+ success=False,
1539
+ error_message="Tree-sitter library not available.",
1540
+ )
1541
+
1542
+ language = self.get_tree_sitter_language()
1543
+ if not language:
1544
+ return AnalysisResult(
1545
+ file_path=file_path,
1546
+ language=self.get_language_name(),
1547
+ success=False,
1548
+ error_message="Could not load Python language for parsing.",
1549
+ )
1550
+
1551
+ try:
1552
+ from ..encoding_utils import read_file_safe
1553
+
1554
+ source_code, _ = read_file_safe(file_path)
1555
+
1556
+ parser = tree_sitter.Parser()
1557
+ parser.language = language
1558
+ tree = parser.parse(bytes(source_code, "utf8"))
1559
+
1560
+ extractor = self.create_extractor()
1561
+ extractor.current_file = file_path # Set current file for context
1562
+
1563
+ elements: list[CodeElement] = []
1564
+
1565
+ # Extract all element types
1566
+ functions = extractor.extract_functions(tree, source_code)
1567
+ classes = extractor.extract_classes(tree, source_code)
1568
+ variables = extractor.extract_variables(tree, source_code)
1569
+ imports = extractor.extract_imports(tree, source_code)
1570
+
1571
+ elements.extend(functions)
1572
+ elements.extend(classes)
1573
+ elements.extend(variables)
1574
+ elements.extend(imports)
1575
+
1576
+ def count_nodes(node: "tree_sitter.Node") -> int:
1577
+ count = 1
1578
+ for child in node.children:
1579
+ count += count_nodes(child)
1580
+ return count
1581
+
1582
+ return AnalysisResult(
1583
+ file_path=file_path,
1584
+ language=self.get_language_name(),
1585
+ success=True,
1586
+ elements=elements,
1587
+ line_count=len(source_code.splitlines()),
1588
+ node_count=count_nodes(tree.root_node),
1589
+ )
1590
+ except Exception as e:
1591
+ log_error(f"Error analyzing Python file {file_path}: {e}")
1592
+ return AnalysisResult(
1593
+ file_path=file_path,
1594
+ language=self.get_language_name(),
1595
+ success=False,
1596
+ error_message=str(e),
1597
+ )
1598
+
1599
+ def execute_query(self, tree: "tree_sitter.Tree", query_name: str) -> dict:
1600
+ """Execute a specific query on the tree"""
1601
+ try:
1602
+ language = self.get_tree_sitter_language()
1603
+ if not language:
1604
+ return {"error": "Language not available"}
1605
+
1606
+ # Simple query execution for testing
1607
+ if query_name == "function":
1608
+ query_string = "(function_definition) @function"
1609
+ elif query_name == "class":
1610
+ query_string = "(class_definition) @class"
1611
+ else:
1612
+ return {"error": f"Unknown query: {query_name}"}
1613
+
1614
+ captures = TreeSitterQueryCompat.safe_execute_query(
1615
+ language, query_string, tree.root_node, fallback_result=[]
1616
+ )
1617
+ return {"captures": captures, "query": query_string}
1618
+
1619
+ except Exception as e:
1620
+ log_error(f"Query execution failed: {e}")
1621
+ return {"error": str(e)}
1622
+
1623
+ def extract_elements(self, tree: "tree_sitter.Tree", source_code: str) -> list:
1624
+ """Extract elements from source code using tree-sitter AST"""
1625
+ extractor = self.get_extractor()
1626
+ elements = []
1627
+
1628
+ try:
1629
+ elements.extend(extractor.extract_functions(tree, source_code))
1630
+ elements.extend(extractor.extract_classes(tree, source_code)) # type: ignore
1631
+ elements.extend(extractor.extract_variables(tree, source_code)) # type: ignore
1632
+ elements.extend(extractor.extract_imports(tree, source_code)) # type: ignore
1633
+ except Exception as e:
1634
+ log_error(f"Failed to extract elements: {e}")
1635
+
1636
+ return elements