tree-sitter-analyzer 1.9.17.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. tree_sitter_analyzer/__init__.py +132 -0
  2. tree_sitter_analyzer/__main__.py +11 -0
  3. tree_sitter_analyzer/api.py +853 -0
  4. tree_sitter_analyzer/cli/__init__.py +39 -0
  5. tree_sitter_analyzer/cli/__main__.py +12 -0
  6. tree_sitter_analyzer/cli/argument_validator.py +89 -0
  7. tree_sitter_analyzer/cli/commands/__init__.py +26 -0
  8. tree_sitter_analyzer/cli/commands/advanced_command.py +226 -0
  9. tree_sitter_analyzer/cli/commands/base_command.py +181 -0
  10. tree_sitter_analyzer/cli/commands/default_command.py +18 -0
  11. tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +188 -0
  12. tree_sitter_analyzer/cli/commands/list_files_cli.py +133 -0
  13. tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -0
  14. tree_sitter_analyzer/cli/commands/query_command.py +109 -0
  15. tree_sitter_analyzer/cli/commands/search_content_cli.py +161 -0
  16. tree_sitter_analyzer/cli/commands/structure_command.py +156 -0
  17. tree_sitter_analyzer/cli/commands/summary_command.py +116 -0
  18. tree_sitter_analyzer/cli/commands/table_command.py +414 -0
  19. tree_sitter_analyzer/cli/info_commands.py +124 -0
  20. tree_sitter_analyzer/cli_main.py +472 -0
  21. tree_sitter_analyzer/constants.py +85 -0
  22. tree_sitter_analyzer/core/__init__.py +15 -0
  23. tree_sitter_analyzer/core/analysis_engine.py +580 -0
  24. tree_sitter_analyzer/core/cache_service.py +333 -0
  25. tree_sitter_analyzer/core/engine.py +585 -0
  26. tree_sitter_analyzer/core/parser.py +293 -0
  27. tree_sitter_analyzer/core/query.py +605 -0
  28. tree_sitter_analyzer/core/query_filter.py +200 -0
  29. tree_sitter_analyzer/core/query_service.py +340 -0
  30. tree_sitter_analyzer/encoding_utils.py +530 -0
  31. tree_sitter_analyzer/exceptions.py +747 -0
  32. tree_sitter_analyzer/file_handler.py +246 -0
  33. tree_sitter_analyzer/formatters/__init__.py +1 -0
  34. tree_sitter_analyzer/formatters/base_formatter.py +201 -0
  35. tree_sitter_analyzer/formatters/csharp_formatter.py +367 -0
  36. tree_sitter_analyzer/formatters/formatter_config.py +197 -0
  37. tree_sitter_analyzer/formatters/formatter_factory.py +84 -0
  38. tree_sitter_analyzer/formatters/formatter_registry.py +377 -0
  39. tree_sitter_analyzer/formatters/formatter_selector.py +96 -0
  40. tree_sitter_analyzer/formatters/go_formatter.py +368 -0
  41. tree_sitter_analyzer/formatters/html_formatter.py +498 -0
  42. tree_sitter_analyzer/formatters/java_formatter.py +423 -0
  43. tree_sitter_analyzer/formatters/javascript_formatter.py +611 -0
  44. tree_sitter_analyzer/formatters/kotlin_formatter.py +268 -0
  45. tree_sitter_analyzer/formatters/language_formatter_factory.py +123 -0
  46. tree_sitter_analyzer/formatters/legacy_formatter_adapters.py +228 -0
  47. tree_sitter_analyzer/formatters/markdown_formatter.py +725 -0
  48. tree_sitter_analyzer/formatters/php_formatter.py +301 -0
  49. tree_sitter_analyzer/formatters/python_formatter.py +830 -0
  50. tree_sitter_analyzer/formatters/ruby_formatter.py +278 -0
  51. tree_sitter_analyzer/formatters/rust_formatter.py +233 -0
  52. tree_sitter_analyzer/formatters/sql_formatter_wrapper.py +689 -0
  53. tree_sitter_analyzer/formatters/sql_formatters.py +536 -0
  54. tree_sitter_analyzer/formatters/typescript_formatter.py +543 -0
  55. tree_sitter_analyzer/formatters/yaml_formatter.py +462 -0
  56. tree_sitter_analyzer/interfaces/__init__.py +9 -0
  57. tree_sitter_analyzer/interfaces/cli.py +535 -0
  58. tree_sitter_analyzer/interfaces/cli_adapter.py +359 -0
  59. tree_sitter_analyzer/interfaces/mcp_adapter.py +224 -0
  60. tree_sitter_analyzer/interfaces/mcp_server.py +428 -0
  61. tree_sitter_analyzer/language_detector.py +553 -0
  62. tree_sitter_analyzer/language_loader.py +271 -0
  63. tree_sitter_analyzer/languages/__init__.py +10 -0
  64. tree_sitter_analyzer/languages/csharp_plugin.py +1076 -0
  65. tree_sitter_analyzer/languages/css_plugin.py +449 -0
  66. tree_sitter_analyzer/languages/go_plugin.py +836 -0
  67. tree_sitter_analyzer/languages/html_plugin.py +496 -0
  68. tree_sitter_analyzer/languages/java_plugin.py +1299 -0
  69. tree_sitter_analyzer/languages/javascript_plugin.py +1622 -0
  70. tree_sitter_analyzer/languages/kotlin_plugin.py +656 -0
  71. tree_sitter_analyzer/languages/markdown_plugin.py +1928 -0
  72. tree_sitter_analyzer/languages/php_plugin.py +862 -0
  73. tree_sitter_analyzer/languages/python_plugin.py +1636 -0
  74. tree_sitter_analyzer/languages/ruby_plugin.py +757 -0
  75. tree_sitter_analyzer/languages/rust_plugin.py +673 -0
  76. tree_sitter_analyzer/languages/sql_plugin.py +2444 -0
  77. tree_sitter_analyzer/languages/typescript_plugin.py +1892 -0
  78. tree_sitter_analyzer/languages/yaml_plugin.py +695 -0
  79. tree_sitter_analyzer/legacy_table_formatter.py +860 -0
  80. tree_sitter_analyzer/mcp/__init__.py +34 -0
  81. tree_sitter_analyzer/mcp/resources/__init__.py +43 -0
  82. tree_sitter_analyzer/mcp/resources/code_file_resource.py +208 -0
  83. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +586 -0
  84. tree_sitter_analyzer/mcp/server.py +869 -0
  85. tree_sitter_analyzer/mcp/tools/__init__.py +28 -0
  86. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +779 -0
  87. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +291 -0
  88. tree_sitter_analyzer/mcp/tools/base_tool.py +139 -0
  89. tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +816 -0
  90. tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +686 -0
  91. tree_sitter_analyzer/mcp/tools/list_files_tool.py +413 -0
  92. tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
  93. tree_sitter_analyzer/mcp/tools/query_tool.py +443 -0
  94. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +464 -0
  95. tree_sitter_analyzer/mcp/tools/search_content_tool.py +836 -0
  96. tree_sitter_analyzer/mcp/tools/table_format_tool.py +572 -0
  97. tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +653 -0
  98. tree_sitter_analyzer/mcp/utils/__init__.py +113 -0
  99. tree_sitter_analyzer/mcp/utils/error_handler.py +569 -0
  100. tree_sitter_analyzer/mcp/utils/file_output_factory.py +217 -0
  101. tree_sitter_analyzer/mcp/utils/file_output_manager.py +322 -0
  102. tree_sitter_analyzer/mcp/utils/gitignore_detector.py +358 -0
  103. tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -0
  104. tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
  105. tree_sitter_analyzer/models.py +840 -0
  106. tree_sitter_analyzer/mypy_current_errors.txt +2 -0
  107. tree_sitter_analyzer/output_manager.py +255 -0
  108. tree_sitter_analyzer/platform_compat/__init__.py +3 -0
  109. tree_sitter_analyzer/platform_compat/adapter.py +324 -0
  110. tree_sitter_analyzer/platform_compat/compare.py +224 -0
  111. tree_sitter_analyzer/platform_compat/detector.py +67 -0
  112. tree_sitter_analyzer/platform_compat/fixtures.py +228 -0
  113. tree_sitter_analyzer/platform_compat/profiles.py +217 -0
  114. tree_sitter_analyzer/platform_compat/record.py +55 -0
  115. tree_sitter_analyzer/platform_compat/recorder.py +155 -0
  116. tree_sitter_analyzer/platform_compat/report.py +92 -0
  117. tree_sitter_analyzer/plugins/__init__.py +280 -0
  118. tree_sitter_analyzer/plugins/base.py +647 -0
  119. tree_sitter_analyzer/plugins/manager.py +384 -0
  120. tree_sitter_analyzer/project_detector.py +328 -0
  121. tree_sitter_analyzer/queries/__init__.py +27 -0
  122. tree_sitter_analyzer/queries/csharp.py +216 -0
  123. tree_sitter_analyzer/queries/css.py +615 -0
  124. tree_sitter_analyzer/queries/go.py +275 -0
  125. tree_sitter_analyzer/queries/html.py +543 -0
  126. tree_sitter_analyzer/queries/java.py +402 -0
  127. tree_sitter_analyzer/queries/javascript.py +724 -0
  128. tree_sitter_analyzer/queries/kotlin.py +192 -0
  129. tree_sitter_analyzer/queries/markdown.py +258 -0
  130. tree_sitter_analyzer/queries/php.py +95 -0
  131. tree_sitter_analyzer/queries/python.py +859 -0
  132. tree_sitter_analyzer/queries/ruby.py +92 -0
  133. tree_sitter_analyzer/queries/rust.py +223 -0
  134. tree_sitter_analyzer/queries/sql.py +555 -0
  135. tree_sitter_analyzer/queries/typescript.py +871 -0
  136. tree_sitter_analyzer/queries/yaml.py +236 -0
  137. tree_sitter_analyzer/query_loader.py +272 -0
  138. tree_sitter_analyzer/security/__init__.py +22 -0
  139. tree_sitter_analyzer/security/boundary_manager.py +277 -0
  140. tree_sitter_analyzer/security/regex_checker.py +297 -0
  141. tree_sitter_analyzer/security/validator.py +599 -0
  142. tree_sitter_analyzer/table_formatter.py +782 -0
  143. tree_sitter_analyzer/utils/__init__.py +53 -0
  144. tree_sitter_analyzer/utils/logging.py +433 -0
  145. tree_sitter_analyzer/utils/tree_sitter_compat.py +289 -0
  146. tree_sitter_analyzer-1.9.17.1.dist-info/METADATA +485 -0
  147. tree_sitter_analyzer-1.9.17.1.dist-info/RECORD +149 -0
  148. tree_sitter_analyzer-1.9.17.1.dist-info/WHEEL +4 -0
  149. tree_sitter_analyzer-1.9.17.1.dist-info/entry_points.txt +25 -0
@@ -0,0 +1,1299 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Java Language Plugin
4
+
5
+ Provides Java-specific parsing and element extraction functionality.
6
+ Migrated from AdvancedAnalyzer implementation for future independence.
7
+ """
8
+
9
+ import re
10
+ from typing import TYPE_CHECKING, Any
11
+
12
+ if TYPE_CHECKING:
13
+ import tree_sitter
14
+
15
+ from ..core.analysis_engine import AnalysisRequest
16
+ from ..models import AnalysisResult
17
+
18
+ from ..encoding_utils import extract_text_slice, safe_encode
19
+ from ..models import Class, Function, Import, Package, Variable
20
+ from ..plugins.base import ElementExtractor, LanguagePlugin
21
+ from ..utils import log_debug, log_error, log_warning
22
+
23
+
24
+ class JavaElementExtractor(ElementExtractor):
25
+ """Java-specific element extractor with AdvancedAnalyzer implementation"""
26
+
27
+ def __init__(self) -> None:
28
+ """Initialize the Java element extractor."""
29
+ self.current_package: str = ""
30
+ self.current_file: str = ""
31
+ self.source_code: str = ""
32
+ self.content_lines: list[str] = []
33
+ self.imports: list[str] = []
34
+
35
+ # Performance optimization caches (from AdvancedAnalyzer)
36
+ self._node_text_cache: dict[int, str] = {}
37
+ self._processed_nodes: set[int] = set()
38
+ self._element_cache: dict[tuple[int, str], Any] = {}
39
+ self._file_encoding: str | None = None
40
+ self._annotation_cache: dict[int, list[dict[str, Any]]] = {}
41
+ self._signature_cache: dict[int, str] = {}
42
+
43
+ # Extracted annotations for cross-referencing
44
+ self.annotations: list[dict[str, Any]] = []
45
+
46
+ def extract_annotations(
47
+ self, tree: "tree_sitter.Tree", source_code: str
48
+ ) -> list[dict[str, Any]]:
49
+ """Extract Java annotations using AdvancedAnalyzer implementation"""
50
+ self.source_code = source_code
51
+ self.content_lines = source_code.split("\n")
52
+ self._reset_caches()
53
+
54
+ annotations: list[dict[str, Any]] = []
55
+
56
+ # Use AdvancedAnalyzer's optimized traversal for annotations
57
+ extractors = {
58
+ "annotation": self._extract_annotation_optimized,
59
+ "marker_annotation": self._extract_annotation_optimized,
60
+ }
61
+
62
+ self._traverse_and_extract_iterative(
63
+ tree.root_node, extractors, annotations, "annotation"
64
+ )
65
+
66
+ # Store annotations for cross-referencing
67
+ self.annotations = annotations
68
+
69
+ log_debug(f"Extracted {len(annotations)} annotations")
70
+ return annotations
71
+
72
+ def extract_functions(
73
+ self, tree: "tree_sitter.Tree", source_code: str
74
+ ) -> list[Function]:
75
+ """Extract Java method definitions using AdvancedAnalyzer implementation"""
76
+ self.source_code = source_code
77
+ self.content_lines = source_code.split("\n")
78
+ self._reset_caches()
79
+
80
+ functions: list[Function] = []
81
+
82
+ # Use AdvancedAnalyzer's optimized traversal
83
+ extractors = {
84
+ "method_declaration": self._extract_method_optimized,
85
+ "constructor_declaration": self._extract_method_optimized,
86
+ }
87
+
88
+ self._traverse_and_extract_iterative(
89
+ tree.root_node, extractors, functions, "method"
90
+ )
91
+
92
+ log_debug(f"Extracted {len(functions)} methods")
93
+ return functions
94
+
95
+ def extract_classes(
96
+ self, tree: "tree_sitter.Tree", source_code: str
97
+ ) -> list[Class]:
98
+ """Extract Java class definitions using AdvancedAnalyzer implementation"""
99
+ self.source_code = source_code
100
+ self.content_lines = source_code.split("\n")
101
+ self._reset_caches()
102
+
103
+ # Ensure package information is extracted before processing classes
104
+ # This fixes the issue where current_package is empty when extract_classes
105
+ # is called independently or before extract_imports
106
+ if not self.current_package:
107
+ self._extract_package_from_tree(tree)
108
+
109
+ classes: list[Class] = []
110
+
111
+ # Use AdvancedAnalyzer's optimized traversal
112
+ extractors = {
113
+ "class_declaration": self._extract_class_optimized,
114
+ "interface_declaration": self._extract_class_optimized,
115
+ "enum_declaration": self._extract_class_optimized,
116
+ }
117
+
118
+ self._traverse_and_extract_iterative(
119
+ tree.root_node, extractors, classes, "class"
120
+ )
121
+
122
+ log_debug(f"Extracted {len(classes)} classes")
123
+ return classes
124
+
125
+ def extract_variables(
126
+ self, tree: "tree_sitter.Tree", source_code: str
127
+ ) -> list[Variable]:
128
+ """Extract Java field definitions using AdvancedAnalyzer implementation"""
129
+ self.source_code = source_code
130
+ self.content_lines = source_code.split("\n")
131
+ self._reset_caches()
132
+
133
+ variables: list[Variable] = []
134
+
135
+ # Use AdvancedAnalyzer's optimized traversal
136
+ extractors = {
137
+ "field_declaration": self._extract_field_optimized,
138
+ }
139
+
140
+ log_debug("Starting field extraction with iterative traversal")
141
+ self._traverse_and_extract_iterative(
142
+ tree.root_node, extractors, variables, "field"
143
+ )
144
+
145
+ log_debug(f"Extracted {len(variables)} fields")
146
+ for i, var in enumerate(variables[:3]):
147
+ log_debug(f"Field {i}: {var.name} ({var.variable_type})")
148
+ return variables
149
+
150
+ def extract_imports(
151
+ self, tree: "tree_sitter.Tree", source_code: str
152
+ ) -> list[Import]:
153
+ """Extract Java import statements with enhanced robustness"""
154
+ self.source_code = source_code
155
+ self.content_lines = source_code.split("\n")
156
+
157
+ imports: list[Import] = []
158
+
159
+ # Extract package and imports efficiently (from AdvancedAnalyzer)
160
+ for child in tree.root_node.children:
161
+ if child.type == "package_declaration":
162
+ self._extract_package_info(child)
163
+ elif child.type == "import_declaration":
164
+ import_info = self._extract_import_info(child, source_code)
165
+ if import_info:
166
+ imports.append(import_info)
167
+ elif child.type in [
168
+ "class_declaration",
169
+ "interface_declaration",
170
+ "enum_declaration",
171
+ ]:
172
+ # After package and imports come class declarations, so stop
173
+ break
174
+
175
+ # Fallback: if no imports found via tree-sitter, try regex-based extraction
176
+ if not imports and "import" in source_code:
177
+ log_debug("No imports found via tree-sitter, trying regex fallback")
178
+ fallback_imports = self._extract_imports_fallback(source_code)
179
+ imports.extend(fallback_imports)
180
+
181
+ log_debug(f"Extracted {len(imports)} imports")
182
+ return imports
183
+
184
+ def _extract_imports_fallback(self, source_code: str) -> list[Import]:
185
+ """Fallback import extraction using regex when tree-sitter fails"""
186
+ imports = []
187
+ lines = source_code.split("\n")
188
+
189
+ for line_num, line in enumerate(lines, 1):
190
+ line = line.strip()
191
+ if line.startswith("import ") and line.endswith(";"):
192
+ # Extract import statement
193
+ import_content = line[:-1] # Remove semicolon
194
+
195
+ if "static" in import_content:
196
+ # Static import
197
+ static_match = re.search(
198
+ r"import\s+static\s+([\w.]+)", import_content
199
+ )
200
+ if static_match:
201
+ import_name = static_match.group(1)
202
+ if import_content.endswith(".*"):
203
+ import_name = import_name.replace(".*", "")
204
+
205
+ # For static imports, extract the class name (remove method/field name)
206
+ parts = import_name.split(".")
207
+ if len(parts) > 1:
208
+ # Remove the last part (method/field name) to get class name
209
+ import_name = ".".join(parts[:-1])
210
+
211
+ imports.append(
212
+ Import(
213
+ name=import_name,
214
+ start_line=line_num,
215
+ end_line=line_num,
216
+ raw_text=line,
217
+ language="java",
218
+ module_name=import_name,
219
+ is_static=True,
220
+ is_wildcard=import_content.endswith(".*"),
221
+ import_statement=import_content,
222
+ )
223
+ )
224
+ else:
225
+ # Normal import
226
+ normal_match = re.search(r"import\s+([\w.]+)", import_content)
227
+ if normal_match:
228
+ import_name = normal_match.group(1)
229
+ if import_content.endswith(".*"):
230
+ if import_name.endswith(".*"):
231
+ import_name = import_name[:-2]
232
+ elif import_name.endswith("."):
233
+ import_name = import_name[:-1]
234
+
235
+ imports.append(
236
+ Import(
237
+ name=import_name,
238
+ start_line=line_num,
239
+ end_line=line_num,
240
+ raw_text=line,
241
+ language="java",
242
+ module_name=import_name,
243
+ is_static=False,
244
+ is_wildcard=import_content.endswith(".*"),
245
+ import_statement=import_content,
246
+ )
247
+ )
248
+
249
+ return imports
250
+
251
+ def extract_packages(
252
+ self, tree: "tree_sitter.Tree", source_code: str
253
+ ) -> list[Package]:
254
+ """Extract Java package declarations"""
255
+ self.source_code = source_code
256
+ self.content_lines = source_code.split("\n")
257
+
258
+ packages: list[Package] = []
259
+
260
+ # Extract package declaration from AST
261
+ if tree and tree.root_node:
262
+ for child in tree.root_node.children:
263
+ if child.type == "package_declaration":
264
+ package_info = self._extract_package_element(child)
265
+ if package_info:
266
+ packages.append(package_info)
267
+ # Also set current_package for use by other extractors
268
+ self.current_package = package_info.name
269
+ break # Only one package declaration per file
270
+
271
+ # Fallback: Parse package from source code if AST parsing failed
272
+ if not packages:
273
+ import re
274
+
275
+ # Find package declaration with line number
276
+ lines = source_code.split("\n")
277
+ for line_num, line in enumerate(lines, start=1):
278
+ match = re.search(r"^\s*package\s+([\w.]+)\s*;", line)
279
+ if match:
280
+ package_name = match.group(1).strip()
281
+ packages.append(
282
+ Package(
283
+ name=package_name,
284
+ start_line=line_num,
285
+ end_line=line_num,
286
+ raw_text=line.strip(),
287
+ language="java",
288
+ )
289
+ )
290
+ self.current_package = package_name
291
+ log_debug(f"Package extracted via fallback: {package_name}")
292
+ break
293
+
294
+ log_debug(f"Extracted {len(packages)} packages")
295
+ return packages
296
+
297
+ def _reset_caches(self) -> None:
298
+ """Reset performance caches"""
299
+ self._node_text_cache.clear()
300
+ self._processed_nodes.clear()
301
+ self._element_cache.clear()
302
+ self._annotation_cache.clear()
303
+ self._signature_cache.clear()
304
+ self.annotations.clear()
305
+ self.current_package = (
306
+ "" # Reset package state to avoid cross-test contamination
307
+ )
308
+
309
+ def _traverse_and_extract_iterative(
310
+ self,
311
+ root_node: "tree_sitter.Node",
312
+ extractors: dict[str, Any],
313
+ results: list[Any],
314
+ element_type: str,
315
+ ) -> None:
316
+ """
317
+ Iterative node traversal and extraction (from AdvancedAnalyzer)
318
+ Uses batch processing for optimal performance
319
+ """
320
+ if not root_node:
321
+ return # type: ignore[unreachable]
322
+
323
+ # Target node types for extraction
324
+ target_node_types = set(extractors.keys())
325
+
326
+ # Container node types that may contain target nodes (from AdvancedAnalyzer)
327
+ container_node_types = {
328
+ "program",
329
+ "class_body",
330
+ "interface_body",
331
+ "enum_body",
332
+ "enum_body_declarations", # Required for enum methods/fields/constructors
333
+ "class_declaration",
334
+ "interface_declaration",
335
+ "enum_declaration",
336
+ "method_declaration",
337
+ "constructor_declaration",
338
+ "block",
339
+ "modifiers", # Annotation nodes can appear inside modifiers
340
+ }
341
+
342
+ # Iterative DFS stack: (node, depth)
343
+ node_stack = [(root_node, 0)]
344
+ processed_nodes = 0
345
+ max_depth = 50 # Prevent infinite loops
346
+
347
+ # Batch processing containers (from AdvancedAnalyzer)
348
+ field_batch = []
349
+
350
+ while node_stack:
351
+ current_node, depth = node_stack.pop()
352
+
353
+ # Safety check for maximum depth
354
+ if depth > max_depth:
355
+ log_warning(f"Maximum traversal depth ({max_depth}) exceeded")
356
+ continue
357
+
358
+ processed_nodes += 1
359
+ node_type = current_node.type
360
+
361
+ # Early termination: skip nodes that don't contain target elements
362
+ if (
363
+ depth > 0
364
+ and node_type not in target_node_types
365
+ and node_type not in container_node_types
366
+ ):
367
+ continue
368
+
369
+ # Collect target nodes for batch processing (from AdvancedAnalyzer)
370
+ if node_type in target_node_types:
371
+ if element_type == "field" and node_type == "field_declaration":
372
+ field_batch.append(current_node)
373
+ else:
374
+ # Process non-field elements immediately
375
+ node_id = id(current_node)
376
+
377
+ # Skip if already processed
378
+ if node_id in self._processed_nodes:
379
+ continue
380
+
381
+ # Check element cache first
382
+ cache_key = (node_id, element_type)
383
+ if cache_key in self._element_cache:
384
+ element = self._element_cache[cache_key]
385
+ if element:
386
+ if isinstance(element, list):
387
+ results.extend(element)
388
+ else:
389
+ results.append(element)
390
+ self._processed_nodes.add(node_id)
391
+ continue
392
+
393
+ # Extract and cache
394
+ extractor = extractors.get(node_type)
395
+ if extractor:
396
+ element = extractor(current_node)
397
+ self._element_cache[cache_key] = element
398
+ if element:
399
+ if isinstance(element, list):
400
+ results.extend(element)
401
+ else:
402
+ results.append(element)
403
+ self._processed_nodes.add(node_id)
404
+
405
+ # Add children to stack (reversed for correct DFS traversal)
406
+ if current_node.children:
407
+ for child in reversed(current_node.children):
408
+ node_stack.append((child, depth + 1))
409
+
410
+ # Process field batch when it reaches optimal size (from AdvancedAnalyzer)
411
+ if len(field_batch) >= 10:
412
+ self._process_field_batch(field_batch, extractors, results)
413
+ field_batch.clear()
414
+
415
+ # Process remaining field batch (from AdvancedAnalyzer)
416
+ if field_batch:
417
+ self._process_field_batch(field_batch, extractors, results)
418
+
419
+ log_debug(f"Iterative traversal processed {processed_nodes} nodes")
420
+
421
+ def _process_field_batch(
422
+ self, batch: list["tree_sitter.Node"], extractors: dict, results: list[Any]
423
+ ) -> None:
424
+ """Process field nodes with caching (from AdvancedAnalyzer)"""
425
+ for node in batch:
426
+ node_id = id(node)
427
+
428
+ # Skip if already processed
429
+ if node_id in self._processed_nodes:
430
+ continue
431
+
432
+ # Check element cache first
433
+ cache_key = (node_id, "field")
434
+ if cache_key in self._element_cache:
435
+ elements = self._element_cache[cache_key]
436
+ if elements:
437
+ if isinstance(elements, list):
438
+ results.extend(elements)
439
+ else:
440
+ results.append(elements)
441
+ self._processed_nodes.add(node_id)
442
+ continue
443
+
444
+ # Extract and cache
445
+ extractor = extractors.get(node.type)
446
+ if extractor:
447
+ elements = extractor(node)
448
+ self._element_cache[cache_key] = elements
449
+ if elements:
450
+ if isinstance(elements, list):
451
+ results.extend(elements)
452
+ else:
453
+ results.append(elements)
454
+ self._processed_nodes.add(node_id)
455
+
456
+ def _get_node_text_optimized(self, node: "tree_sitter.Node") -> str:
457
+ """Get node text with optimized caching (from AdvancedAnalyzer)"""
458
+ node_id = id(node)
459
+
460
+ # Check cache first
461
+ if node_id in self._node_text_cache:
462
+ return self._node_text_cache[node_id]
463
+
464
+ try:
465
+ # Use encoding utilities for text extraction
466
+ start_byte = node.start_byte
467
+ end_byte = node.end_byte
468
+
469
+ encoding = self._file_encoding or "utf-8"
470
+ content_bytes = safe_encode("\n".join(self.content_lines), encoding)
471
+ text = extract_text_slice(content_bytes, start_byte, end_byte, encoding)
472
+
473
+ self._node_text_cache[node_id] = text
474
+ return text
475
+ except Exception as e:
476
+ log_error(f"Error in _get_node_text_optimized: {e}")
477
+ # Fallback to simple text extraction
478
+ try:
479
+ start_point = node.start_point
480
+ end_point = node.end_point
481
+
482
+ if start_point[0] == end_point[0]:
483
+ # Single line
484
+ line = self.content_lines[start_point[0]]
485
+ result: str = line[start_point[1] : end_point[1]]
486
+ return result
487
+ else:
488
+ # Multiple lines
489
+ lines = []
490
+ for i in range(start_point[0], end_point[0] + 1):
491
+ if i < len(self.content_lines):
492
+ line = self.content_lines[i]
493
+ if i == start_point[0]:
494
+ lines.append(line[start_point[1] :])
495
+ elif i == end_point[0]:
496
+ lines.append(line[: end_point[1]])
497
+ else:
498
+ lines.append(line)
499
+ return "\n".join(lines)
500
+ except Exception as fallback_error:
501
+ log_error(f"Fallback text extraction also failed: {fallback_error}")
502
+ return ""
503
+
504
+ def _extract_class_optimized(self, node: "tree_sitter.Node") -> Class | None:
505
+ """Extract class information optimized (from AdvancedAnalyzer)"""
506
+ try:
507
+ start_line = node.start_point[0] + 1
508
+ end_line = node.end_point[0] + 1
509
+
510
+ # Extract class name efficiently
511
+ class_name = None
512
+ for child in node.children:
513
+ if child.type == "identifier":
514
+ class_name = self._get_node_text_optimized(child)
515
+ break
516
+
517
+ if not class_name:
518
+ return None
519
+
520
+ # Determine package name
521
+ package_name = self.current_package
522
+ full_qualified_name = (
523
+ f"{package_name}.{class_name}" if package_name else class_name
524
+ )
525
+
526
+ # Determine class type (optimized: dictionary lookup)
527
+ class_type_map = {
528
+ "class_declaration": "class",
529
+ "interface_declaration": "interface",
530
+ "enum_declaration": "enum",
531
+ }
532
+ class_type = class_type_map.get(node.type, "class")
533
+
534
+ # Extract modifiers efficiently
535
+ modifiers = self._extract_modifiers_optimized(node)
536
+ visibility = self._determine_visibility(modifiers)
537
+
538
+ # Extract superclass and interfaces (optimized: single pass)
539
+ extends_class = None
540
+ implements_interfaces = []
541
+
542
+ for child in node.children:
543
+ if child.type == "superclass":
544
+ extends_text = self._get_node_text_optimized(child)
545
+ match = re.search(r"\b[A-Z]\w*", extends_text)
546
+ if match:
547
+ extends_class = match.group(0)
548
+ elif child.type == "super_interfaces":
549
+ implements_text = self._get_node_text_optimized(child)
550
+ implements_interfaces = re.findall(r"\b[A-Z]\w*", implements_text)
551
+
552
+ # Extract annotations for this class
553
+ class_annotations = self._find_annotations_for_line_cached(start_line)
554
+
555
+ # Check if this is a nested class
556
+ is_nested = self._is_nested_class(node)
557
+ parent_class = self._find_parent_class(node) if is_nested else None
558
+
559
+ # Extract raw text
560
+ start_line_idx = max(0, start_line - 1)
561
+ end_line_idx = min(len(self.content_lines), end_line)
562
+ raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
563
+
564
+ return Class(
565
+ name=class_name,
566
+ start_line=start_line,
567
+ end_line=end_line,
568
+ raw_text=raw_text,
569
+ language="java",
570
+ class_type=class_type,
571
+ full_qualified_name=full_qualified_name,
572
+ package_name=package_name,
573
+ superclass=extends_class,
574
+ interfaces=implements_interfaces,
575
+ modifiers=modifiers,
576
+ visibility=visibility,
577
+ # Java-specific detailed information
578
+ annotations=class_annotations,
579
+ is_nested=is_nested,
580
+ parent_class=parent_class,
581
+ extends_class=extends_class, # Alias for superclass
582
+ implements_interfaces=implements_interfaces, # Alias for interfaces
583
+ )
584
+ except (AttributeError, ValueError, TypeError) as e:
585
+ log_debug(f"Failed to extract class info: {e}")
586
+ return None
587
+ except Exception as e:
588
+ log_error(f"Unexpected error in class extraction: {e}")
589
+ return None
590
+
591
+ def _extract_method_optimized(self, node: "tree_sitter.Node") -> Function | None:
592
+ """Extract method information optimized (from AdvancedAnalyzer)"""
593
+ try:
594
+ start_line = node.start_point[0] + 1
595
+ end_line = node.end_point[0] + 1
596
+
597
+ # Extract method information efficiently
598
+ method_info = self._parse_method_signature_optimized(node)
599
+ if not method_info:
600
+ return None
601
+
602
+ method_name, return_type, parameters, modifiers, throws = method_info
603
+ is_constructor = node.type == "constructor_declaration"
604
+ visibility = self._determine_visibility(modifiers)
605
+
606
+ # Extract annotations for this method
607
+ method_annotations = self._find_annotations_for_line_cached(start_line)
608
+
609
+ # Calculate complexity score
610
+ complexity_score = self._calculate_complexity_optimized(node)
611
+
612
+ # Extract JavaDoc
613
+ javadoc = self._extract_javadoc_for_line(start_line)
614
+
615
+ # Extract raw text
616
+ start_line_idx = max(0, start_line - 1)
617
+ end_line_idx = min(len(self.content_lines), end_line)
618
+ raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
619
+
620
+ return Function(
621
+ name=method_name,
622
+ start_line=start_line,
623
+ end_line=end_line,
624
+ raw_text=raw_text,
625
+ language="java",
626
+ parameters=parameters,
627
+ return_type=return_type if not is_constructor else "void",
628
+ modifiers=modifiers,
629
+ is_static="static" in modifiers,
630
+ is_private="private" in modifiers,
631
+ is_public="public" in modifiers,
632
+ is_constructor=is_constructor,
633
+ visibility=visibility,
634
+ docstring=javadoc,
635
+ # Java-specific detailed information
636
+ annotations=method_annotations,
637
+ throws=throws,
638
+ complexity_score=complexity_score,
639
+ is_abstract="abstract" in modifiers,
640
+ is_final="final" in modifiers,
641
+ )
642
+ except (AttributeError, ValueError, TypeError) as e:
643
+ log_debug(f"Failed to extract method info: {e}")
644
+ return None
645
+ except Exception as e:
646
+ log_error(f"Unexpected error in method extraction: {e}")
647
+ return None
648
+
649
+ def _extract_field_optimized(self, node: "tree_sitter.Node") -> list[Variable]:
650
+ """Extract field information optimized (from AdvancedAnalyzer)"""
651
+ fields: list[Variable] = []
652
+ try:
653
+ start_line = node.start_point[0] + 1
654
+ end_line = node.end_point[0] + 1
655
+
656
+ # Parse field declaration using AdvancedAnalyzer method
657
+ field_info = self._parse_field_declaration_optimized(node)
658
+ if not field_info:
659
+ return fields
660
+
661
+ field_type, variable_names, modifiers = field_info
662
+ visibility = self._determine_visibility(modifiers)
663
+
664
+ # Extract annotations for this field
665
+ field_annotations = self._find_annotations_for_line_cached(start_line)
666
+
667
+ # Extract JavaDoc for this field
668
+ field_javadoc = self._extract_javadoc_for_line(start_line)
669
+
670
+ # Create Variable object for each variable (matching AdvancedAnalyzer structure)
671
+ for var_name in variable_names:
672
+ # Extract raw text
673
+ start_line_idx = max(0, start_line - 1)
674
+ end_line_idx = min(len(self.content_lines), end_line)
675
+ raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
676
+
677
+ field = Variable(
678
+ name=var_name,
679
+ start_line=start_line,
680
+ end_line=end_line,
681
+ raw_text=raw_text,
682
+ language="java",
683
+ variable_type=field_type,
684
+ modifiers=modifiers,
685
+ is_static="static" in modifiers,
686
+ is_constant="final" in modifiers,
687
+ visibility=visibility,
688
+ docstring=field_javadoc,
689
+ # Java-specific detailed information
690
+ annotations=field_annotations,
691
+ is_final="final" in modifiers,
692
+ field_type=field_type, # Alias for variable_type
693
+ )
694
+ fields.append(field)
695
+ except (AttributeError, ValueError, TypeError) as e:
696
+ log_debug(f"Failed to extract field info: {e}")
697
+ except Exception as e:
698
+ log_error(f"Unexpected error in field extraction: {e}")
699
+
700
+ return fields
701
+
702
+ def _parse_method_signature_optimized(
703
+ self, node: "tree_sitter.Node"
704
+ ) -> tuple[str, str, list[str], list[str], list[str]] | None:
705
+ """Parse method signature optimized (from AdvancedAnalyzer)"""
706
+ try:
707
+ # Extract method name
708
+ method_name = None
709
+ for child in node.children:
710
+ if child.type == "identifier":
711
+ method_name = self._get_node_text_optimized(child)
712
+ break
713
+
714
+ if not method_name:
715
+ return None
716
+
717
+ # Extract return type
718
+ return_type = "void"
719
+ for child in node.children:
720
+ if (
721
+ child.type
722
+ in [
723
+ "type_identifier",
724
+ "void_type",
725
+ "primitive_type",
726
+ "integral_type",
727
+ "boolean_type",
728
+ "floating_point_type",
729
+ "array_type",
730
+ ]
731
+ or child.type == "generic_type"
732
+ ):
733
+ return_type = self._get_node_text_optimized(child)
734
+ break
735
+
736
+ # Extract parameters
737
+ parameters = []
738
+ for child in node.children:
739
+ if child.type == "formal_parameters":
740
+ for param in child.children:
741
+ if param.type == "formal_parameter":
742
+ param_text = self._get_node_text_optimized(param)
743
+ parameters.append(param_text)
744
+
745
+ # Extract modifiers
746
+ modifiers = self._extract_modifiers_optimized(node)
747
+
748
+ # Extract throws clause
749
+ throws = []
750
+ for child in node.children:
751
+ if child.type == "throws":
752
+ throws_text = self._get_node_text_optimized(child)
753
+ exceptions = re.findall(r"\b[A-Z]\w*Exception\b", throws_text)
754
+ throws.extend(exceptions)
755
+
756
+ return method_name, return_type, parameters, modifiers, throws
757
+ except Exception:
758
+ return None
759
+
760
+ def _parse_field_declaration_optimized(
761
+ self, node: "tree_sitter.Node"
762
+ ) -> tuple[str, list[str], list[str]] | None:
763
+ """Parse field declaration optimized (from AdvancedAnalyzer)"""
764
+ try:
765
+ # Extract type (exactly as in AdvancedAnalyzer)
766
+ field_type = None
767
+ for child in node.children:
768
+ if child.type in [
769
+ "type_identifier",
770
+ "primitive_type",
771
+ "integral_type",
772
+ "generic_type",
773
+ "boolean_type",
774
+ "floating_point_type",
775
+ "array_type",
776
+ ]:
777
+ field_type = self._get_node_text_optimized(child)
778
+ break
779
+
780
+ if not field_type:
781
+ return None
782
+
783
+ # Extract variable names (exactly as in AdvancedAnalyzer)
784
+ variable_names = []
785
+ for child in node.children:
786
+ if child.type == "variable_declarator":
787
+ for grandchild in child.children:
788
+ if grandchild.type == "identifier":
789
+ var_name = self._get_node_text_optimized(grandchild)
790
+ variable_names.append(var_name)
791
+
792
+ if not variable_names:
793
+ return None
794
+
795
+ # Extract modifiers (exactly as in AdvancedAnalyzer)
796
+ modifiers = self._extract_modifiers_optimized(node)
797
+
798
+ return field_type, variable_names, modifiers
799
+ except Exception:
800
+ return None
801
+
802
+ def _extract_modifiers_optimized(self, node: "tree_sitter.Node") -> list[str]:
803
+ """Extract modifiers efficiently (from AdvancedAnalyzer)"""
804
+ modifiers = []
805
+ for child in node.children:
806
+ if child.type == "modifiers":
807
+ for mod_child in child.children:
808
+ if mod_child.type in [
809
+ "public",
810
+ "private",
811
+ "protected",
812
+ "static",
813
+ "final",
814
+ "abstract",
815
+ "synchronized",
816
+ "volatile",
817
+ "transient",
818
+ ]:
819
+ modifiers.append(mod_child.type)
820
+ elif mod_child.type not in ["marker_annotation"]:
821
+ mod_text = self._get_node_text_optimized(mod_child)
822
+ if mod_text in [
823
+ "public",
824
+ "private",
825
+ "protected",
826
+ "static",
827
+ "final",
828
+ "abstract",
829
+ "synchronized",
830
+ "volatile",
831
+ "transient",
832
+ ]:
833
+ modifiers.append(mod_text)
834
+ return modifiers
835
+
836
+ def _extract_package_info(self, node: "tree_sitter.Node") -> None:
837
+ """Extract package information (from AdvancedAnalyzer)"""
838
+ try:
839
+ package_text = self._get_node_text_optimized(node)
840
+ match = re.search(r"package\s+([\w.]+)", package_text)
841
+ if match:
842
+ self.current_package = match.group(1)
843
+ except (AttributeError, ValueError, IndexError) as e:
844
+ log_debug(f"Failed to extract package info: {e}")
845
+ except Exception as e:
846
+ log_error(f"Unexpected error in package extraction: {e}")
847
+
848
+ def _extract_package_element(self, node: "tree_sitter.Node") -> Package | None:
849
+ """Extract package element for inclusion in results"""
850
+ try:
851
+ package_text = self._get_node_text_optimized(node)
852
+ match = re.search(r"package\s+([\w.]+)", package_text)
853
+ if match:
854
+ package_name = match.group(1)
855
+ return Package(
856
+ name=package_name,
857
+ start_line=node.start_point[0] + 1,
858
+ end_line=node.end_point[0] + 1,
859
+ raw_text=package_text,
860
+ language="java",
861
+ )
862
+ except (AttributeError, ValueError, IndexError) as e:
863
+ log_debug(f"Failed to extract package element: {e}")
864
+ except Exception as e:
865
+ log_error(f"Unexpected error in package element extraction: {e}")
866
+
867
+ return None
868
+
869
+ def _extract_package_from_tree(self, tree: "tree_sitter.Tree") -> None:
870
+ """Extract package information from tree when needed"""
871
+ if tree and tree.root_node:
872
+ for child in tree.root_node.children:
873
+ if child.type == "package_declaration":
874
+ self._extract_package_info(child)
875
+ break
876
+
877
+ def _extract_import_info(
878
+ self, node: "tree_sitter.Node", source_code: str
879
+ ) -> Import | None:
880
+ """Extract import information from import declaration node"""
881
+ try:
882
+ import_text = self._get_node_text_optimized(node)
883
+ line_num = node.start_point[0] + 1
884
+
885
+ # Parse import statement
886
+ if "static" in import_text:
887
+ # Static import
888
+ static_match = re.search(r"import\s+static\s+([\w.]+)", import_text)
889
+ if static_match:
890
+ import_name = static_match.group(1)
891
+ if import_text.endswith(".*"):
892
+ import_name = import_name.replace(".*", "")
893
+
894
+ # For static imports, extract the class name
895
+ parts = import_name.split(".")
896
+ if len(parts) > 1:
897
+ import_name = ".".join(parts[:-1])
898
+
899
+ return Import(
900
+ name=import_name,
901
+ start_line=line_num,
902
+ end_line=line_num,
903
+ raw_text=import_text,
904
+ language="java",
905
+ module_name=import_name,
906
+ is_static=True,
907
+ is_wildcard=import_text.endswith(".*"),
908
+ import_statement=import_text,
909
+ )
910
+ else:
911
+ # Normal import
912
+ normal_match = re.search(r"import\s+([\w.]+)", import_text)
913
+ if normal_match:
914
+ import_name = normal_match.group(1)
915
+ if import_text.endswith(".*"):
916
+ if import_name.endswith(".*"):
917
+ import_name = import_name[:-2]
918
+ elif import_name.endswith("."):
919
+ import_name = import_name[:-1]
920
+
921
+ return Import(
922
+ name=import_name,
923
+ start_line=line_num,
924
+ end_line=line_num,
925
+ raw_text=import_text,
926
+ language="java",
927
+ module_name=import_name,
928
+ is_static=False,
929
+ is_wildcard=import_text.endswith(".*"),
930
+ import_statement=import_text,
931
+ )
932
+ except Exception as e:
933
+ log_debug(f"Failed to extract import info: {e}")
934
+
935
+ return None
936
+
937
+ def _extract_annotation_optimized(
938
+ self, node: "tree_sitter.Node"
939
+ ) -> dict[str, Any] | None:
940
+ """Extract annotation information optimized"""
941
+ try:
942
+ annotation_text = self._get_node_text_optimized(node)
943
+ start_line = node.start_point[0] + 1
944
+
945
+ # Extract annotation name
946
+ annotation_name = None
947
+ for child in node.children:
948
+ if child.type == "identifier":
949
+ annotation_name = self._get_node_text_optimized(child)
950
+ break
951
+
952
+ if not annotation_name:
953
+ # Try to extract from text
954
+ match = re.search(r"@(\w+)", annotation_text)
955
+ if match:
956
+ annotation_name = match.group(1)
957
+
958
+ if annotation_name:
959
+ return {
960
+ "name": annotation_name,
961
+ "line": start_line,
962
+ "text": annotation_text,
963
+ "type": "annotation",
964
+ }
965
+ except Exception as e:
966
+ log_debug(f"Failed to extract annotation: {e}")
967
+
968
+ return None
969
+
970
+ def _determine_visibility(self, modifiers: list[str]) -> str:
971
+ """Determine visibility from modifiers"""
972
+ if "public" in modifiers:
973
+ return "public"
974
+ elif "private" in modifiers:
975
+ return "private"
976
+ elif "protected" in modifiers:
977
+ return "protected"
978
+ else:
979
+ return "package"
980
+
981
+ def _find_annotations_for_line_cached(self, line: int) -> list[dict[str, Any]]:
982
+ """Find annotations for a specific line with caching"""
983
+ if line in self._annotation_cache:
984
+ return self._annotation_cache[line]
985
+
986
+ # Find annotations near this line
987
+ annotations = []
988
+ for annotation in self.annotations:
989
+ if abs(annotation.get("line", 0) - line) <= 2:
990
+ annotations.append(annotation)
991
+
992
+ self._annotation_cache[line] = annotations
993
+ return annotations
994
+
995
+ def _is_nested_class(self, node: "tree_sitter.Node") -> bool:
996
+ """Check if this is a nested class"""
997
+ parent = node.parent
998
+ while parent:
999
+ if parent.type in [
1000
+ "class_declaration",
1001
+ "interface_declaration",
1002
+ "enum_declaration",
1003
+ ]:
1004
+ return True
1005
+ parent = parent.parent
1006
+ return False
1007
+
1008
+ def _find_parent_class(self, node: "tree_sitter.Node") -> str | None:
1009
+ """Find parent class name for nested classes"""
1010
+ parent = node.parent
1011
+ while parent:
1012
+ if parent.type in [
1013
+ "class_declaration",
1014
+ "interface_declaration",
1015
+ "enum_declaration",
1016
+ ]:
1017
+ for child in parent.children:
1018
+ if child.type == "identifier":
1019
+ return self._get_node_text_optimized(child)
1020
+ parent = parent.parent
1021
+ return None
1022
+
1023
+ def _calculate_complexity_optimized(self, node: "tree_sitter.Node") -> int:
1024
+ """Calculate cyclomatic complexity optimized"""
1025
+ complexity = 1 # Base complexity
1026
+
1027
+ # Count decision points
1028
+ decision_nodes = [
1029
+ "if_statement",
1030
+ "while_statement",
1031
+ "for_statement",
1032
+ "switch_statement",
1033
+ "catch_clause",
1034
+ "conditional_expression",
1035
+ "enhanced_for_statement",
1036
+ ]
1037
+
1038
+ def count_decisions(n: "tree_sitter.Node") -> int:
1039
+ count = 0
1040
+ if hasattr(n, "type") and n.type in decision_nodes:
1041
+ count += 1
1042
+ if hasattr(n, "children"):
1043
+ try:
1044
+ for child in n.children:
1045
+ count += count_decisions(child)
1046
+ except (TypeError, AttributeError):
1047
+ # Handle Mock objects or other non-iterable children
1048
+ pass
1049
+ return count
1050
+
1051
+ complexity += count_decisions(node)
1052
+ return complexity
1053
+
1054
+ def _extract_javadoc_for_line(self, line: int) -> str | None:
1055
+ """Extract JavaDoc comment for a specific line"""
1056
+ try:
1057
+ # Look for JavaDoc comment before the line
1058
+ for i in range(max(0, line - 10), line):
1059
+ if i < len(self.content_lines):
1060
+ line_content = self.content_lines[i].strip()
1061
+ if line_content.startswith("/**"):
1062
+ # Found start of JavaDoc, collect until */
1063
+ javadoc_lines = []
1064
+ for j in range(i, min(len(self.content_lines), line)):
1065
+ doc_line = self.content_lines[j].strip()
1066
+ javadoc_lines.append(doc_line)
1067
+ if doc_line.endswith("*/"):
1068
+ break
1069
+ return "\n".join(javadoc_lines)
1070
+ except Exception as e:
1071
+ log_debug(f"Failed to extract JavaDoc: {e}")
1072
+
1073
+ return None
1074
+
1075
+ def _extract_class_name(self, node: "tree_sitter.Node") -> str | None:
1076
+ """Extract class name from a class declaration node."""
1077
+ try:
1078
+ for child in node.children:
1079
+ if child.type == "identifier":
1080
+ return self._get_node_text_optimized(child)
1081
+ return None
1082
+ except Exception as e:
1083
+ log_debug(f"Failed to extract class name: {e}")
1084
+ return None
1085
+
1086
+
1087
+ class JavaPlugin(LanguagePlugin):
1088
+ """Java language plugin implementation"""
1089
+
1090
+ def __init__(self) -> None:
1091
+ """Initialize the Java language plugin."""
1092
+ super().__init__()
1093
+ self.extractor = JavaElementExtractor()
1094
+ self.language = "java" # Add language property for test compatibility
1095
+ self.supported_extensions = (
1096
+ self.get_file_extensions()
1097
+ ) # Add for test compatibility
1098
+ self._cached_language: Any | None = None # Cache for tree-sitter language
1099
+
1100
+ def get_language_name(self) -> str:
1101
+ """Get the language name."""
1102
+ return "java"
1103
+
1104
+ def get_file_extensions(self) -> list[str]:
1105
+ """Get supported file extensions."""
1106
+ return [".java", ".jsp", ".jspx"]
1107
+
1108
+ def create_extractor(self) -> ElementExtractor:
1109
+ """Create a new element extractor instance."""
1110
+ return JavaElementExtractor()
1111
+
1112
+ async def analyze_file(
1113
+ self, file_path: str, request: "AnalysisRequest"
1114
+ ) -> "AnalysisResult":
1115
+ """Analyze Java code and return structured results."""
1116
+
1117
+ from ..models import AnalysisResult
1118
+
1119
+ try:
1120
+ # Read the file content using safe encoding detection
1121
+ from ..encoding_utils import read_file_safe
1122
+
1123
+ file_content, detected_encoding = read_file_safe(file_path)
1124
+
1125
+ # Get tree-sitter language and parse
1126
+ language = self.get_tree_sitter_language()
1127
+ if language is None:
1128
+ # Return empty result if language loading fails
1129
+ return AnalysisResult(
1130
+ file_path=file_path,
1131
+ language="java",
1132
+ line_count=len(file_content.split("\n")),
1133
+ elements=[],
1134
+ source_code=file_content,
1135
+ )
1136
+
1137
+ # Parse the code
1138
+ import tree_sitter
1139
+
1140
+ parser = tree_sitter.Parser()
1141
+
1142
+ # Set language using the appropriate method
1143
+ if hasattr(parser, "set_language"):
1144
+ parser.set_language(language)
1145
+ elif hasattr(parser, "language"):
1146
+ parser.language = language
1147
+ else:
1148
+ # Try constructor approach as last resort
1149
+ try:
1150
+ parser = tree_sitter.Parser(language)
1151
+ except Exception as e:
1152
+ log_error(f"Failed to create parser with language: {e}")
1153
+ return AnalysisResult(
1154
+ file_path=file_path,
1155
+ language="java",
1156
+ line_count=len(file_content.split("\n")),
1157
+ elements=[],
1158
+ source_code=file_content,
1159
+ error_message=f"Parser creation failed: {e}",
1160
+ success=False,
1161
+ )
1162
+
1163
+ tree = parser.parse(file_content.encode("utf-8"))
1164
+
1165
+ # Extract elements using our extractor
1166
+ elements_dict = self.extract_elements(tree, file_content)
1167
+
1168
+ # Combine all elements into a single list
1169
+ all_elements = []
1170
+ all_elements.extend(elements_dict.get("functions", []))
1171
+ all_elements.extend(elements_dict.get("classes", []))
1172
+ all_elements.extend(elements_dict.get("variables", []))
1173
+ all_elements.extend(elements_dict.get("imports", []))
1174
+ all_elements.extend(elements_dict.get("packages", []))
1175
+
1176
+ # Get package info if available
1177
+ packages = elements_dict.get("packages", [])
1178
+ package = packages[0] if packages else None
1179
+
1180
+ # Count nodes in the AST tree
1181
+ node_count = (
1182
+ self._count_tree_nodes(tree.root_node) if tree and tree.root_node else 0
1183
+ )
1184
+
1185
+ return AnalysisResult(
1186
+ file_path=file_path,
1187
+ language="java",
1188
+ line_count=len(file_content.split("\n")),
1189
+ elements=all_elements,
1190
+ node_count=node_count,
1191
+ source_code=file_content,
1192
+ package=package,
1193
+ )
1194
+
1195
+ except Exception as e:
1196
+ log_error(f"Error analyzing Java file {file_path}: {e}")
1197
+ # Return empty result on error
1198
+ return AnalysisResult(
1199
+ file_path=file_path,
1200
+ language="java",
1201
+ line_count=0,
1202
+ elements=[],
1203
+ source_code="",
1204
+ error_message=str(e),
1205
+ success=False,
1206
+ )
1207
+
1208
+ def _count_tree_nodes(self, node: Any) -> int:
1209
+ """
1210
+ Recursively count nodes in the AST tree.
1211
+
1212
+ Args:
1213
+ node: Tree-sitter node
1214
+
1215
+ Returns:
1216
+ Total number of nodes
1217
+ """
1218
+ if node is None:
1219
+ return 0
1220
+
1221
+ count = 1 # Count current node
1222
+ if hasattr(node, "children"):
1223
+ for child in node.children:
1224
+ count += self._count_tree_nodes(child)
1225
+ return count
1226
+
1227
+ def get_tree_sitter_language(self) -> Any | None:
1228
+ """Get the tree-sitter language for Java."""
1229
+ if self._cached_language is not None:
1230
+ return self._cached_language
1231
+
1232
+ try:
1233
+ import tree_sitter
1234
+ import tree_sitter_java
1235
+
1236
+ # Get the language function result
1237
+ caps_or_lang = tree_sitter_java.language()
1238
+
1239
+ # Convert to proper Language object if needed
1240
+ if hasattr(caps_or_lang, "__class__") and "Language" in str(
1241
+ type(caps_or_lang)
1242
+ ):
1243
+ # Already a Language object
1244
+ self._cached_language = caps_or_lang
1245
+ else:
1246
+ # PyCapsule - convert to Language object
1247
+ try:
1248
+ # Use modern tree-sitter API - PyCapsule should be passed to Language constructor
1249
+ self._cached_language = tree_sitter.Language(caps_or_lang)
1250
+ except Exception as e:
1251
+ log_error(f"Failed to create Language object from PyCapsule: {e}")
1252
+ return None
1253
+
1254
+ return self._cached_language
1255
+ except ImportError as e:
1256
+ log_error(f"tree-sitter-java not available: {e}")
1257
+ return None
1258
+ except Exception as e:
1259
+ log_error(f"Failed to load tree-sitter language for Java: {e}")
1260
+ return None
1261
+
1262
+ def extract_elements(self, tree: Any | None, source_code: str) -> dict[str, Any]:
1263
+ """Extract all elements from Java code for test compatibility."""
1264
+ if tree is None:
1265
+ return {
1266
+ "functions": [],
1267
+ "classes": [],
1268
+ "variables": [],
1269
+ "imports": [],
1270
+ "packages": [],
1271
+ "annotations": [],
1272
+ }
1273
+
1274
+ try:
1275
+ extractor = self.create_extractor()
1276
+ return {
1277
+ "functions": extractor.extract_functions(tree, source_code),
1278
+ "classes": extractor.extract_classes(tree, source_code),
1279
+ "variables": extractor.extract_variables(tree, source_code),
1280
+ "imports": extractor.extract_imports(tree, source_code),
1281
+ "packages": extractor.extract_packages(tree, source_code),
1282
+ "annotations": extractor.extract_annotations(tree, source_code),
1283
+ }
1284
+ except Exception as e:
1285
+ log_error(f"Error extracting elements: {e}")
1286
+ return {
1287
+ "functions": [],
1288
+ "classes": [],
1289
+ "variables": [],
1290
+ "imports": [],
1291
+ "packages": [],
1292
+ "annotations": [],
1293
+ }
1294
+
1295
+ def supports_file(self, file_path: str) -> bool:
1296
+ """Check if this plugin supports the given file."""
1297
+ return any(
1298
+ file_path.lower().endswith(ext) for ext in self.get_file_extensions()
1299
+ )