tree-sitter-analyzer 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (78) hide show
  1. tree_sitter_analyzer/__init__.py +134 -121
  2. tree_sitter_analyzer/__main__.py +11 -12
  3. tree_sitter_analyzer/api.py +533 -539
  4. tree_sitter_analyzer/cli/__init__.py +39 -39
  5. tree_sitter_analyzer/cli/__main__.py +12 -13
  6. tree_sitter_analyzer/cli/commands/__init__.py +26 -27
  7. tree_sitter_analyzer/cli/commands/advanced_command.py +88 -88
  8. tree_sitter_analyzer/cli/commands/base_command.py +160 -155
  9. tree_sitter_analyzer/cli/commands/default_command.py +18 -19
  10. tree_sitter_analyzer/cli/commands/partial_read_command.py +141 -133
  11. tree_sitter_analyzer/cli/commands/query_command.py +81 -82
  12. tree_sitter_analyzer/cli/commands/structure_command.py +138 -121
  13. tree_sitter_analyzer/cli/commands/summary_command.py +101 -93
  14. tree_sitter_analyzer/cli/commands/table_command.py +235 -233
  15. tree_sitter_analyzer/cli/info_commands.py +120 -121
  16. tree_sitter_analyzer/cli_main.py +278 -276
  17. tree_sitter_analyzer/core/__init__.py +15 -20
  18. tree_sitter_analyzer/core/analysis_engine.py +555 -574
  19. tree_sitter_analyzer/core/cache_service.py +320 -330
  20. tree_sitter_analyzer/core/engine.py +559 -560
  21. tree_sitter_analyzer/core/parser.py +293 -288
  22. tree_sitter_analyzer/core/query.py +502 -502
  23. tree_sitter_analyzer/encoding_utils.py +456 -460
  24. tree_sitter_analyzer/exceptions.py +337 -340
  25. tree_sitter_analyzer/file_handler.py +210 -222
  26. tree_sitter_analyzer/formatters/__init__.py +1 -1
  27. tree_sitter_analyzer/formatters/base_formatter.py +167 -168
  28. tree_sitter_analyzer/formatters/formatter_factory.py +78 -74
  29. tree_sitter_analyzer/formatters/java_formatter.py +291 -270
  30. tree_sitter_analyzer/formatters/python_formatter.py +259 -235
  31. tree_sitter_analyzer/interfaces/__init__.py +9 -10
  32. tree_sitter_analyzer/interfaces/cli.py +528 -557
  33. tree_sitter_analyzer/interfaces/cli_adapter.py +343 -319
  34. tree_sitter_analyzer/interfaces/mcp_adapter.py +206 -170
  35. tree_sitter_analyzer/interfaces/mcp_server.py +405 -416
  36. tree_sitter_analyzer/java_analyzer.py +187 -219
  37. tree_sitter_analyzer/language_detector.py +398 -400
  38. tree_sitter_analyzer/language_loader.py +224 -228
  39. tree_sitter_analyzer/languages/__init__.py +10 -11
  40. tree_sitter_analyzer/languages/java_plugin.py +1174 -1113
  41. tree_sitter_analyzer/{plugins → languages}/javascript_plugin.py +446 -439
  42. tree_sitter_analyzer/languages/python_plugin.py +747 -712
  43. tree_sitter_analyzer/mcp/__init__.py +31 -32
  44. tree_sitter_analyzer/mcp/resources/__init__.py +44 -47
  45. tree_sitter_analyzer/mcp/resources/code_file_resource.py +209 -213
  46. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +555 -550
  47. tree_sitter_analyzer/mcp/server.py +333 -345
  48. tree_sitter_analyzer/mcp/tools/__init__.py +30 -31
  49. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +654 -557
  50. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +247 -245
  51. tree_sitter_analyzer/mcp/tools/base_tool.py +54 -55
  52. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +300 -302
  53. tree_sitter_analyzer/mcp/tools/table_format_tool.py +362 -359
  54. tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +543 -476
  55. tree_sitter_analyzer/mcp/utils/__init__.py +107 -106
  56. tree_sitter_analyzer/mcp/utils/error_handler.py +549 -549
  57. tree_sitter_analyzer/models.py +470 -481
  58. tree_sitter_analyzer/output_manager.py +255 -264
  59. tree_sitter_analyzer/plugins/__init__.py +280 -334
  60. tree_sitter_analyzer/plugins/base.py +496 -446
  61. tree_sitter_analyzer/plugins/manager.py +379 -355
  62. tree_sitter_analyzer/queries/__init__.py +26 -27
  63. tree_sitter_analyzer/queries/java.py +391 -394
  64. tree_sitter_analyzer/queries/javascript.py +148 -149
  65. tree_sitter_analyzer/queries/python.py +285 -286
  66. tree_sitter_analyzer/queries/typescript.py +229 -230
  67. tree_sitter_analyzer/query_loader.py +257 -260
  68. tree_sitter_analyzer/table_formatter.py +471 -448
  69. tree_sitter_analyzer/utils.py +277 -277
  70. {tree_sitter_analyzer-0.2.0.dist-info → tree_sitter_analyzer-0.4.0.dist-info}/METADATA +23 -8
  71. tree_sitter_analyzer-0.4.0.dist-info/RECORD +73 -0
  72. {tree_sitter_analyzer-0.2.0.dist-info → tree_sitter_analyzer-0.4.0.dist-info}/entry_points.txt +2 -1
  73. tree_sitter_analyzer/plugins/java_plugin.py +0 -625
  74. tree_sitter_analyzer/plugins/plugin_loader.py +0 -83
  75. tree_sitter_analyzer/plugins/python_plugin.py +0 -598
  76. tree_sitter_analyzer/plugins/registry.py +0 -366
  77. tree_sitter_analyzer-0.2.0.dist-info/RECORD +0 -77
  78. {tree_sitter_analyzer-0.2.0.dist-info → tree_sitter_analyzer-0.4.0.dist-info}/WHEEL +0 -0
@@ -1,1113 +1,1174 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
- Java Language Plugin
5
-
6
- Provides Java-specific parsing and element extraction functionality.
7
- Migrated from AdvancedAnalyzer implementation for future independence.
8
- """
9
-
10
- import re
11
- from functools import lru_cache
12
- from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Any
13
-
14
- if TYPE_CHECKING:
15
- import tree_sitter
16
- from ..models import AnalysisRequest, AnalysisResult
17
-
18
- from ..plugins.base import LanguagePlugin, ElementExtractor
19
- from ..models import (
20
- Class,
21
- Function,
22
- Import,
23
- Variable,
24
- Package,
25
- )
26
- from ..utils import log_debug, log_error, log_warning
27
- from ..encoding_utils import extract_text_slice, safe_encode
28
-
29
-
30
- class JavaElementExtractor(ElementExtractor):
31
- """Java-specific element extractor with AdvancedAnalyzer implementation"""
32
-
33
- def __init__(self) -> None:
34
- """Initialize the Java element extractor."""
35
- self.current_package: str = ""
36
- self.current_file: str = ""
37
- self.source_code: str = ""
38
- self.content_lines: List[str] = []
39
- self.imports: List[str] = []
40
-
41
- # Performance optimization caches (from AdvancedAnalyzer)
42
- self._node_text_cache: Dict[int, str] = {}
43
- self._processed_nodes: Set[int] = set()
44
- self._element_cache: Dict[Tuple[int, str], Any] = {}
45
- self._file_encoding: Optional[str] = None
46
- self._annotation_cache: Dict[int, List[Dict[str, Any]]] = {}
47
- self._signature_cache: Dict[int, str] = {}
48
-
49
- # Extracted annotations for cross-referencing
50
- self.annotations: List[Dict[str, Any]] = []
51
-
52
- def extract_annotations(
53
- self, tree: "tree_sitter.Tree", source_code: str
54
- ) -> List[Dict[str, Any]]:
55
- """Extract Java annotations using AdvancedAnalyzer implementation"""
56
- self.source_code = source_code
57
- self.content_lines = source_code.split("\n")
58
- self._reset_caches()
59
-
60
- annotations: List[Dict[str, Any]] = []
61
-
62
- # Use AdvancedAnalyzer's optimized traversal for annotations
63
- extractors = {
64
- "annotation": self._extract_annotation_optimized,
65
- "marker_annotation": self._extract_annotation_optimized,
66
- }
67
-
68
- self._traverse_and_extract_iterative(tree.root_node, extractors, annotations, "annotation")
69
-
70
- # Store annotations for cross-referencing
71
- self.annotations = annotations
72
-
73
- log_debug(f"Extracted {len(annotations)} annotations")
74
- return annotations
75
-
76
- def extract_functions(
77
- self, tree: "tree_sitter.Tree", source_code: str
78
- ) -> List[Function]:
79
- """Extract Java method definitions using AdvancedAnalyzer implementation"""
80
- self.source_code = source_code
81
- self.content_lines = source_code.split("\n")
82
- self._reset_caches()
83
-
84
- functions: List[Function] = []
85
-
86
- # Use AdvancedAnalyzer's optimized traversal
87
- extractors = {
88
- "method_declaration": self._extract_method_optimized,
89
- "constructor_declaration": self._extract_method_optimized,
90
- }
91
-
92
- self._traverse_and_extract_iterative(tree.root_node, extractors, functions, "method")
93
-
94
- log_debug(f"Extracted {len(functions)} methods")
95
- return functions
96
-
97
- def extract_classes(
98
- self, tree: "tree_sitter.Tree", source_code: str
99
- ) -> List[Class]:
100
- """Extract Java class definitions using AdvancedAnalyzer implementation"""
101
- self.source_code = source_code
102
- self.content_lines = source_code.split("\n")
103
- self._reset_caches()
104
-
105
- classes: List[Class] = []
106
-
107
- # Use AdvancedAnalyzer's optimized traversal
108
- extractors = {
109
- "class_declaration": self._extract_class_optimized,
110
- "interface_declaration": self._extract_class_optimized,
111
- "enum_declaration": self._extract_class_optimized,
112
- }
113
-
114
- self._traverse_and_extract_iterative(tree.root_node, extractors, classes, "class")
115
-
116
- log_debug(f"Extracted {len(classes)} classes")
117
- return classes
118
-
119
- def extract_variables(
120
- self, tree: "tree_sitter.Tree", source_code: str
121
- ) -> List[Variable]:
122
- """Extract Java field definitions using AdvancedAnalyzer implementation"""
123
- self.source_code = source_code
124
- self.content_lines = source_code.split("\n")
125
- self._reset_caches()
126
-
127
- variables: List[Variable] = []
128
-
129
- # Use AdvancedAnalyzer's optimized traversal
130
- extractors = {
131
- "field_declaration": self._extract_field_optimized,
132
- }
133
-
134
- log_debug("Starting field extraction with iterative traversal")
135
- self._traverse_and_extract_iterative(tree.root_node, extractors, variables, "field")
136
-
137
- log_debug(f"Extracted {len(variables)} fields")
138
- for i, var in enumerate(variables[:3]):
139
- log_debug(f"Field {i}: {var.name} ({var.variable_type})")
140
- return variables
141
-
142
- def extract_imports(
143
- self, tree: "tree_sitter.Tree", source_code: str
144
- ) -> List[Import]:
145
- """Extract Java import statements"""
146
- self.source_code = source_code
147
- self.content_lines = source_code.split("\n")
148
-
149
- imports: List[Import] = []
150
-
151
- # Extract package and imports efficiently (from AdvancedAnalyzer)
152
- for child in tree.root_node.children:
153
- if child.type == "package_declaration":
154
- self._extract_package_info(child)
155
- elif child.type == "import_declaration":
156
- import_info = self._extract_import_info(child, source_code)
157
- if import_info:
158
- imports.append(import_info)
159
- elif child.type in [
160
- "class_declaration",
161
- "interface_declaration",
162
- "enum_declaration",
163
- ]:
164
- # After package and imports come class declarations, so stop
165
- break
166
-
167
- log_debug(f"Extracted {len(imports)} imports")
168
- return imports
169
-
170
- def extract_packages(
171
- self, tree: "tree_sitter.Tree", source_code: str
172
- ) -> List[Package]:
173
- """Extract Java package declarations"""
174
- self.source_code = source_code
175
- self.content_lines = source_code.split("\n")
176
-
177
- packages: List[Package] = []
178
-
179
- # Extract package declaration
180
- for child in tree.root_node.children:
181
- if child.type == "package_declaration":
182
- package_info = self._extract_package_element(child)
183
- if package_info:
184
- packages.append(package_info)
185
- break # Only one package declaration per file
186
-
187
- log_debug(f"Extracted {len(packages)} packages")
188
- return packages
189
-
190
- def _reset_caches(self) -> None:
191
- """Reset performance caches"""
192
- self._node_text_cache.clear()
193
- self._processed_nodes.clear()
194
- self._element_cache.clear()
195
- self._annotation_cache.clear()
196
- self._signature_cache.clear()
197
- self.annotations.clear()
198
-
199
- def _traverse_and_extract_iterative(
200
- self,
201
- root_node: "tree_sitter.Node",
202
- extractors: Dict[str, Any],
203
- results: List[Any],
204
- element_type: str
205
- ) -> None:
206
- """
207
- Iterative node traversal and extraction (from AdvancedAnalyzer)
208
- Uses batch processing for optimal performance
209
- """
210
- if not root_node:
211
- return
212
-
213
- # Target node types for extraction
214
- target_node_types = set(extractors.keys())
215
-
216
- # Container node types that may contain target nodes (from AdvancedAnalyzer)
217
- container_node_types = {
218
- "program",
219
- "class_body",
220
- "interface_body",
221
- "enum_body",
222
- "class_declaration",
223
- "interface_declaration",
224
- "enum_declaration",
225
- "method_declaration",
226
- "constructor_declaration",
227
- "block",
228
- "modifiers", # アノテーションは修飾子に含まれることがある
229
- }
230
-
231
- # Iterative DFS stack: (node, depth)
232
- node_stack = [(root_node, 0)]
233
- processed_nodes = 0
234
- max_depth = 50 # Prevent infinite loops
235
-
236
- # Batch processing containers (from AdvancedAnalyzer)
237
- field_batch = []
238
-
239
- while node_stack:
240
- current_node, depth = node_stack.pop()
241
-
242
- # Safety check for maximum depth
243
- if depth > max_depth:
244
- log_warning(f"Maximum traversal depth ({max_depth}) exceeded")
245
- continue
246
-
247
- processed_nodes += 1
248
- node_type = current_node.type
249
-
250
- # Early termination: skip nodes that don't contain target elements
251
- if (
252
- depth > 0
253
- and node_type not in target_node_types
254
- and node_type not in container_node_types
255
- ):
256
- continue
257
-
258
- # Collect target nodes for batch processing (from AdvancedAnalyzer)
259
- if node_type in target_node_types:
260
- if element_type == "field" and node_type == "field_declaration":
261
- field_batch.append(current_node)
262
- else:
263
- # Process non-field elements immediately
264
- node_id = id(current_node)
265
-
266
- # Skip if already processed
267
- if node_id in self._processed_nodes:
268
- continue
269
-
270
- # Check element cache first
271
- cache_key = (node_id, element_type)
272
- if cache_key in self._element_cache:
273
- element = self._element_cache[cache_key]
274
- if element:
275
- if isinstance(element, list):
276
- results.extend(element)
277
- else:
278
- results.append(element)
279
- self._processed_nodes.add(node_id)
280
- continue
281
-
282
- # Extract and cache
283
- extractor = extractors.get(node_type)
284
- if extractor:
285
- element = extractor(current_node)
286
- self._element_cache[cache_key] = element
287
- if element:
288
- if isinstance(element, list):
289
- results.extend(element)
290
- else:
291
- results.append(element)
292
- self._processed_nodes.add(node_id)
293
-
294
- # Add children to stack (reversed for correct DFS traversal)
295
- if current_node.children:
296
- for child in reversed(current_node.children):
297
- node_stack.append((child, depth + 1))
298
-
299
- # Process field batch when it reaches optimal size (from AdvancedAnalyzer)
300
- if len(field_batch) >= 10:
301
- self._process_field_batch(field_batch, extractors, results)
302
- field_batch.clear()
303
-
304
- # Process remaining field batch (from AdvancedAnalyzer)
305
- if field_batch:
306
- self._process_field_batch(field_batch, extractors, results)
307
-
308
- log_debug(f"Iterative traversal processed {processed_nodes} nodes")
309
-
310
- def _process_field_batch(self, batch: List["tree_sitter.Node"], extractors: Dict, results: List[Any]) -> None:
311
- """Process field nodes with caching (from AdvancedAnalyzer)"""
312
- for node in batch:
313
- node_id = id(node)
314
-
315
- # Skip if already processed
316
- if node_id in self._processed_nodes:
317
- continue
318
-
319
- # Check element cache first
320
- cache_key = (node_id, "field")
321
- if cache_key in self._element_cache:
322
- elements = self._element_cache[cache_key]
323
- if elements:
324
- if isinstance(elements, list):
325
- results.extend(elements)
326
- else:
327
- results.append(elements)
328
- self._processed_nodes.add(node_id)
329
- continue
330
-
331
- # Extract and cache
332
- extractor = extractors.get(node.type)
333
- if extractor:
334
- elements = extractor(node)
335
- self._element_cache[cache_key] = elements
336
- if elements:
337
- if isinstance(elements, list):
338
- results.extend(elements)
339
- else:
340
- results.append(elements)
341
- self._processed_nodes.add(node_id)
342
-
343
- def _get_node_text_optimized(self, node: "tree_sitter.Node") -> str:
344
- """Get node text with optimized caching (from AdvancedAnalyzer)"""
345
- node_id = id(node)
346
-
347
- # Check cache first
348
- if node_id in self._node_text_cache:
349
- return self._node_text_cache[node_id]
350
-
351
- try:
352
- # Use encoding utilities for text extraction
353
- start_byte = node.start_byte
354
- end_byte = node.end_byte
355
-
356
- encoding = self._file_encoding or "utf-8"
357
- content_bytes = safe_encode("\n".join(self.content_lines), encoding)
358
- text = extract_text_slice(content_bytes, start_byte, end_byte, encoding)
359
-
360
- self._node_text_cache[node_id] = text
361
- return text
362
- except Exception as e:
363
- log_error(f"Error in _get_node_text_optimized: {e}")
364
- # Fallback to simple text extraction
365
- try:
366
- start_point = node.start_point
367
- end_point = node.end_point
368
-
369
- if start_point[0] == end_point[0]:
370
- # Single line
371
- line = self.content_lines[start_point[0]]
372
- return line[start_point[1] : end_point[1]]
373
- else:
374
- # Multiple lines
375
- lines = []
376
- for i in range(start_point[0], end_point[0] + 1):
377
- if i < len(self.content_lines):
378
- line = self.content_lines[i]
379
- if i == start_point[0]:
380
- lines.append(line[start_point[1] :])
381
- elif i == end_point[0]:
382
- lines.append(line[: end_point[1]])
383
- else:
384
- lines.append(line)
385
- return "\n".join(lines)
386
- except Exception as fallback_error:
387
- log_error(f"Fallback text extraction also failed: {fallback_error}")
388
- return ""
389
-
390
- def _extract_class_optimized(self, node: "tree_sitter.Node") -> Optional[Class]:
391
- """Extract class information optimized (from AdvancedAnalyzer)"""
392
- try:
393
- start_line = node.start_point[0] + 1
394
- end_line = node.end_point[0] + 1
395
-
396
- # Extract class name efficiently
397
- class_name = None
398
- for child in node.children:
399
- if child.type == "identifier":
400
- class_name = self._get_node_text_optimized(child)
401
- break
402
-
403
- if not class_name:
404
- return None
405
-
406
- # Determine package name
407
- package_name = self.current_package
408
- full_qualified_name = (
409
- f"{package_name}.{class_name}" if package_name else class_name
410
- )
411
-
412
- # Determine class type (optimized: dictionary lookup)
413
- class_type_map = {
414
- "class_declaration": "class",
415
- "interface_declaration": "interface",
416
- "enum_declaration": "enum",
417
- }
418
- class_type = class_type_map.get(node.type, "class")
419
-
420
- # Extract modifiers efficiently
421
- modifiers = self._extract_modifiers_optimized(node)
422
- visibility = self._determine_visibility(modifiers)
423
-
424
- # Extract superclass and interfaces (optimized: single pass)
425
- extends_class = None
426
- implements_interfaces = []
427
-
428
- for child in node.children:
429
- if child.type == "superclass":
430
- extends_text = self._get_node_text_optimized(child)
431
- match = re.search(r"\b[A-Z]\w*", extends_text)
432
- if match:
433
- extends_class = match.group(0)
434
- elif child.type == "super_interfaces":
435
- implements_text = self._get_node_text_optimized(child)
436
- implements_interfaces = re.findall(r"\b[A-Z]\w*", implements_text)
437
-
438
- # Extract annotations for this class
439
- class_annotations = self._find_annotations_for_line_cached(start_line)
440
-
441
- # Check if this is a nested class
442
- is_nested = self._is_nested_class(node)
443
- parent_class = self._find_parent_class(node) if is_nested else None
444
-
445
- # Extract raw text
446
- start_line_idx = max(0, start_line - 1)
447
- end_line_idx = min(len(self.content_lines), end_line)
448
- raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
449
-
450
- return Class(
451
- name=class_name,
452
- start_line=start_line,
453
- end_line=end_line,
454
- raw_text=raw_text,
455
- language="java",
456
- class_type=class_type,
457
- full_qualified_name=full_qualified_name,
458
- package_name=package_name,
459
- superclass=extends_class,
460
- interfaces=implements_interfaces,
461
- modifiers=modifiers,
462
- visibility=visibility,
463
- # Java-specific detailed information
464
- annotations=class_annotations,
465
- is_nested=is_nested,
466
- parent_class=parent_class,
467
- extends_class=extends_class, # Alias for superclass
468
- implements_interfaces=implements_interfaces, # Alias for interfaces
469
- )
470
- except (AttributeError, ValueError, TypeError) as e:
471
- log_debug(f"Failed to extract class info: {e}")
472
- return None
473
- except Exception as e:
474
- log_error(f"Unexpected error in class extraction: {e}")
475
- return None
476
-
477
- def _extract_method_optimized(self, node: "tree_sitter.Node") -> Optional[Function]:
478
- """Extract method information optimized (from AdvancedAnalyzer)"""
479
- try:
480
- start_line = node.start_point[0] + 1
481
- end_line = node.end_point[0] + 1
482
-
483
- # Extract method information efficiently
484
- method_info = self._parse_method_signature_optimized(node)
485
- if not method_info:
486
- return None
487
-
488
- method_name, return_type, parameters, modifiers, throws = method_info
489
- is_constructor = node.type == "constructor_declaration"
490
- visibility = self._determine_visibility(modifiers)
491
-
492
- # Extract annotations for this method
493
- method_annotations = self._find_annotations_for_line_cached(start_line)
494
-
495
- # Calculate complexity score
496
- complexity_score = self._calculate_complexity_optimized(node)
497
-
498
- # Extract JavaDoc
499
- javadoc = self._extract_javadoc_for_line(start_line)
500
-
501
- # Extract raw text
502
- start_line_idx = max(0, start_line - 1)
503
- end_line_idx = min(len(self.content_lines), end_line)
504
- raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
505
-
506
- return Function(
507
- name=method_name,
508
- start_line=start_line,
509
- end_line=end_line,
510
- raw_text=raw_text,
511
- language="java",
512
- parameters=parameters,
513
- return_type=return_type if not is_constructor else "void",
514
- modifiers=modifiers,
515
- is_static="static" in modifiers,
516
- is_private="private" in modifiers,
517
- is_public="public" in modifiers,
518
- is_constructor=is_constructor,
519
- visibility=visibility,
520
- docstring=javadoc,
521
- # Java-specific detailed information
522
- annotations=method_annotations,
523
- throws=throws,
524
- complexity_score=complexity_score,
525
- is_abstract="abstract" in modifiers,
526
- is_final="final" in modifiers,
527
- )
528
- except (AttributeError, ValueError, TypeError) as e:
529
- log_debug(f"Failed to extract method info: {e}")
530
- return None
531
- except Exception as e:
532
- log_error(f"Unexpected error in method extraction: {e}")
533
- return None
534
-
535
- def _extract_field_optimized(self, node: "tree_sitter.Node") -> List[Variable]:
536
- """Extract field information optimized (from AdvancedAnalyzer)"""
537
- fields: List[Variable] = []
538
- try:
539
- start_line = node.start_point[0] + 1
540
- end_line = node.end_point[0] + 1
541
-
542
- # Parse field declaration using AdvancedAnalyzer method
543
- field_info = self._parse_field_declaration_optimized(node)
544
- if not field_info:
545
- return fields
546
-
547
- field_type, variable_names, modifiers = field_info
548
- visibility = self._determine_visibility(modifiers)
549
-
550
- # Extract annotations for this field
551
- field_annotations = self._find_annotations_for_line_cached(start_line)
552
-
553
- # Extract JavaDoc for this field
554
- field_javadoc = self._extract_javadoc_for_line(start_line)
555
-
556
- # Create Variable object for each variable (matching AdvancedAnalyzer structure)
557
- for var_name in variable_names:
558
- # Extract raw text
559
- start_line_idx = max(0, start_line - 1)
560
- end_line_idx = min(len(self.content_lines), end_line)
561
- raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
562
-
563
- field = Variable(
564
- name=var_name,
565
- start_line=start_line,
566
- end_line=end_line,
567
- raw_text=raw_text,
568
- language="java",
569
- variable_type=field_type,
570
- modifiers=modifiers,
571
- is_static="static" in modifiers,
572
- is_constant="final" in modifiers,
573
- visibility=visibility,
574
- docstring=field_javadoc,
575
- # Java-specific detailed information
576
- annotations=field_annotations,
577
- is_final="final" in modifiers,
578
- field_type=field_type, # Alias for variable_type
579
- )
580
- fields.append(field)
581
- except (AttributeError, ValueError, TypeError) as e:
582
- log_debug(f"Failed to extract field info: {e}")
583
- except Exception as e:
584
- log_error(f"Unexpected error in field extraction: {e}")
585
-
586
- return fields
587
-
588
- def _parse_method_signature_optimized(
589
- self, node: "tree_sitter.Node"
590
- ) -> Optional[Tuple[str, str, List[str], List[str], List[str]]]:
591
- """Parse method signature optimized (from AdvancedAnalyzer)"""
592
- try:
593
- # Extract method name
594
- method_name = None
595
- for child in node.children:
596
- if child.type == "identifier":
597
- method_name = self._get_node_text_optimized(child)
598
- break
599
-
600
- if not method_name:
601
- return None
602
-
603
- # Extract return type
604
- return_type = "void"
605
- for child in node.children:
606
- if child.type in [
607
- "type_identifier",
608
- "void_type",
609
- "primitive_type",
610
- "integral_type",
611
- ]:
612
- return_type = self._get_node_text_optimized(child)
613
- break
614
- elif child.type == "generic_type":
615
- return_type = self._get_node_text_optimized(child)
616
- break
617
-
618
- # Extract parameters
619
- parameters = []
620
- for child in node.children:
621
- if child.type == "formal_parameters":
622
- for param in child.children:
623
- if param.type == "formal_parameter":
624
- param_text = self._get_node_text_optimized(param)
625
- parameters.append(param_text)
626
-
627
- # Extract modifiers
628
- modifiers = self._extract_modifiers_optimized(node)
629
-
630
- # Extract throws clause
631
- throws = []
632
- for child in node.children:
633
- if child.type == "throws":
634
- throws_text = self._get_node_text_optimized(child)
635
- exceptions = re.findall(r"\b[A-Z]\w*Exception\b", throws_text)
636
- throws.extend(exceptions)
637
-
638
- return method_name, return_type, parameters, modifiers, throws
639
- except Exception:
640
- return None
641
-
642
- def _parse_field_declaration_optimized(
643
- self, node: "tree_sitter.Node"
644
- ) -> Optional[Tuple[str, List[str], List[str]]]:
645
- """Parse field declaration optimized (from AdvancedAnalyzer)"""
646
- try:
647
- # Extract type (exactly as in AdvancedAnalyzer)
648
- field_type = None
649
- for child in node.children:
650
- if child.type in ["type_identifier", "primitive_type", "integral_type"]:
651
- field_type = self._get_node_text_optimized(child)
652
- break
653
-
654
- if not field_type:
655
- return None
656
-
657
- # Extract variable names (exactly as in AdvancedAnalyzer)
658
- variable_names = []
659
- for child in node.children:
660
- if child.type == "variable_declarator":
661
- for grandchild in child.children:
662
- if grandchild.type == "identifier":
663
- var_name = self._get_node_text_optimized(grandchild)
664
- variable_names.append(var_name)
665
-
666
- if not variable_names:
667
- return None
668
-
669
- # Extract modifiers (exactly as in AdvancedAnalyzer)
670
- modifiers = self._extract_modifiers_optimized(node)
671
-
672
- return field_type, variable_names, modifiers
673
- except Exception:
674
- return None
675
-
676
- def _extract_modifiers_optimized(self, node: "tree_sitter.Node") -> List[str]:
677
- """Extract modifiers efficiently (from AdvancedAnalyzer)"""
678
- modifiers = []
679
- for child in node.children:
680
- if child.type == "modifiers":
681
- for mod_child in child.children:
682
- if mod_child.type in [
683
- "public",
684
- "private",
685
- "protected",
686
- "static",
687
- "final",
688
- "abstract",
689
- "synchronized",
690
- "volatile",
691
- "transient",
692
- ]:
693
- modifiers.append(mod_child.type)
694
- elif mod_child.type not in ["marker_annotation"]:
695
- mod_text = self._get_node_text_optimized(mod_child)
696
- if mod_text in [
697
- "public",
698
- "private",
699
- "protected",
700
- "static",
701
- "final",
702
- "abstract",
703
- "synchronized",
704
- "volatile",
705
- "transient",
706
- ]:
707
- modifiers.append(mod_text)
708
- return modifiers
709
-
710
- def _extract_package_info(self, node: "tree_sitter.Node") -> None:
711
- """Extract package information (from AdvancedAnalyzer)"""
712
- try:
713
- package_text = self._get_node_text_optimized(node)
714
- match = re.search(r"package\s+([\w.]+)", package_text)
715
- if match:
716
- self.current_package = match.group(1)
717
- except (AttributeError, ValueError, IndexError) as e:
718
- log_debug(f"Failed to extract package info: {e}")
719
- except Exception as e:
720
- log_error(f"Unexpected error in package extraction: {e}")
721
-
722
- def _extract_package_element(self, node: "tree_sitter.Node") -> Optional[Package]:
723
- """Extract package element for inclusion in results"""
724
- try:
725
- package_text = self._get_node_text_optimized(node)
726
- match = re.search(r"package\s+([\w.]+)", package_text)
727
- if match:
728
- package_name = match.group(1)
729
- return Package(
730
- name=package_name,
731
- start_line=node.start_point[0] + 1,
732
- end_line=node.end_point[0] + 1,
733
- raw_text=package_text,
734
- language="java",
735
- )
736
- except (AttributeError, ValueError, IndexError) as e:
737
- log_debug(f"Failed to extract package element: {e}")
738
- except Exception as e:
739
- log_error(f"Unexpected error in package element extraction: {e}")
740
- return None
741
-
742
- def _determine_visibility(self, modifiers: List[str]) -> str:
743
- """Determine visibility from modifiers"""
744
- if "public" in modifiers:
745
- return "public"
746
- elif "private" in modifiers:
747
- return "private"
748
- elif "protected" in modifiers:
749
- return "protected"
750
- else:
751
- return "package" # Default package visibility
752
-
753
- def _find_annotations_for_line_cached(self, target_line: int) -> List[Dict[str, Any]]:
754
- """Find annotations for specified line with caching (from AdvancedAnalyzer)"""
755
- if target_line in self._annotation_cache:
756
- return self._annotation_cache[target_line]
757
-
758
- result_annotations = []
759
- for annotation in self.annotations:
760
- line_distance = target_line - annotation.get("end_line", 0)
761
- if 1 <= line_distance <= 5:
762
- result_annotations.append(annotation)
763
-
764
- self._annotation_cache[target_line] = result_annotations
765
- return result_annotations
766
-
767
- def _calculate_complexity_optimized(self, node: "tree_sitter.Node") -> int:
768
- """Calculate cyclomatic complexity efficiently (from AdvancedAnalyzer)"""
769
- complexity = 1
770
- try:
771
- node_text = self._get_node_text_optimized(node).lower()
772
- keywords = ["if", "while", "for", "catch", "case", "switch"]
773
- for keyword in keywords:
774
- complexity += node_text.count(keyword)
775
- except (AttributeError, TypeError) as e:
776
- log_debug(f"Failed to calculate complexity: {e}")
777
- except Exception as e:
778
- log_error(f"Unexpected error in complexity calculation: {e}")
779
- return complexity
780
-
781
- def _extract_javadoc_for_line(self, target_line: int) -> Optional[str]:
782
- """Extract JavaDoc comment immediately before the specified line (from AdvancedAnalyzer)"""
783
- try:
784
- # Search backwards from target_line
785
- javadoc_lines = []
786
- current_line = target_line - 1
787
-
788
- # Skip empty lines
789
- while current_line > 0 and current_line <= len(self.content_lines):
790
- line = self.content_lines[current_line - 1].strip()
791
- if line:
792
- break
793
- current_line -= 1
794
-
795
- # Check for JavaDoc end
796
- if current_line > 0 and current_line <= len(self.content_lines):
797
- line = self.content_lines[current_line - 1].strip()
798
- if line.endswith("*/"):
799
- javadoc_lines.append(line)
800
- current_line -= 1
801
-
802
- # Collect JavaDoc content
803
- while current_line > 0:
804
- line = self.content_lines[current_line - 1].strip()
805
- javadoc_lines.append(line)
806
- if line.startswith("/**"):
807
- break
808
- current_line -= 1
809
-
810
- if javadoc_lines and javadoc_lines[-1].startswith("/**"):
811
- # Reverse to correct order
812
- javadoc_lines.reverse()
813
- return "\n".join(javadoc_lines)
814
-
815
- return None
816
-
817
- except Exception as e:
818
- log_debug(f"Failed to extract JavaDoc: {e}")
819
- return None
820
-
821
- def _is_nested_class(self, node: "tree_sitter.Node") -> bool:
822
- """Check if this is a nested class (from AdvancedAnalyzer)"""
823
- current = node.parent
824
- while current:
825
- if current.type in [
826
- "class_declaration",
827
- "interface_declaration",
828
- "enum_declaration",
829
- ]:
830
- return True
831
- current = current.parent
832
- return False
833
-
834
- def _find_parent_class(self, node: "tree_sitter.Node") -> Optional[str]:
835
- """Find parent class name (from AdvancedAnalyzer)"""
836
- current = node.parent
837
- while current:
838
- if current.type in [
839
- "class_declaration",
840
- "interface_declaration",
841
- "enum_declaration",
842
- ]:
843
- return self._extract_class_name(current)
844
- current = current.parent
845
- return None
846
-
847
- def _extract_class_name(self, node: "tree_sitter.Node") -> Optional[str]:
848
- """Extract class name from node (from AdvancedAnalyzer)"""
849
- for child in node.children:
850
- if child.type == "identifier":
851
- return self._get_node_text_optimized(child)
852
- return None
853
-
854
- def _extract_annotation_optimized(self, node: "tree_sitter.Node") -> Optional[Dict[str, Any]]:
855
- """Extract annotation information optimized (from AdvancedAnalyzer)"""
856
- try:
857
- start_line = node.start_point[0] + 1
858
- end_line = node.end_point[0] + 1
859
- raw_text = self._get_node_text_optimized(node)
860
-
861
- # Extract annotation name efficiently
862
- name_match = re.search(r"@(\w+)", raw_text)
863
- if not name_match:
864
- return None
865
-
866
- annotation_name = name_match.group(1)
867
-
868
- # Extract parameters efficiently
869
- parameters = []
870
- param_match = re.search(r"\((.*?)\)", raw_text, re.DOTALL)
871
- if param_match:
872
- param_text = param_match.group(1).strip()
873
- if param_text:
874
- # Simple parameter parsing
875
- if "=" in param_text:
876
- parameters = [
877
- p.strip() for p in re.split(r",(?![^()]*\))", param_text)
878
- ]
879
- else:
880
- parameters = [param_text]
881
-
882
- return {
883
- "name": annotation_name,
884
- "parameters": parameters,
885
- "start_line": start_line,
886
- "end_line": end_line,
887
- "raw_text": raw_text,
888
- }
889
- except (AttributeError, IndexError, ValueError) as e:
890
- log_debug(f"Failed to extract annotation from node: {e}")
891
- return None
892
- except Exception as e:
893
- log_error(f"Unexpected exception in annotation extraction: {e}")
894
- return None
895
-
896
- def _extract_import_info(
897
- self, node: "tree_sitter.Node", source_code: str
898
- ) -> Optional[Import]:
899
- """Extract import information (from AdvancedAnalyzer)"""
900
- try:
901
- import_text = self._get_node_text_optimized(node)
902
- # Simple approach: get everything until semicolon then process
903
- import_content = import_text.strip()
904
- if import_content.endswith(";"):
905
- import_content = import_content[:-1]
906
-
907
- if "static" in import_content:
908
- # Static import
909
- static_match = re.search(r"import\s+static\s+([\w.]+)", import_content)
910
- if static_match:
911
- import_name = static_match.group(1)
912
- # Handle wildcard case
913
- if import_content.endswith(".*"):
914
- import_name = import_name.replace(".*", "")
915
- # For static wildcard, remove last element
916
- parts = import_name.split(".")
917
- if len(parts) > 1:
918
- import_name = ".".join(parts[:-1])
919
-
920
- return Import(
921
- name=import_name,
922
- start_line=node.start_point[0] + 1,
923
- end_line=node.end_point[0] + 1,
924
- raw_text=import_text,
925
- language="java",
926
- module_name=import_name,
927
- is_static=True,
928
- is_wildcard=import_content.endswith(".*"),
929
- import_statement=import_content,
930
- )
931
- else:
932
- # Normal import
933
- normal_match = re.search(r"import\s+([\w.]+)", import_content)
934
- if normal_match:
935
- import_name = normal_match.group(1)
936
- # Handle wildcard case
937
- if import_content.endswith(".*"):
938
- if import_name.endswith(".*"):
939
- import_name = import_name[:-2] # Remove trailing .*
940
- elif import_name.endswith("."):
941
- import_name = import_name[:-1] # Remove trailing .
942
-
943
- return Import(
944
- name=import_name,
945
- start_line=node.start_point[0] + 1,
946
- end_line=node.end_point[0] + 1,
947
- raw_text=import_text,
948
- language="java",
949
- module_name=import_name,
950
- is_static=False,
951
- is_wildcard=import_content.endswith(".*"),
952
- import_statement=import_content,
953
- )
954
- except (AttributeError, ValueError, IndexError) as e:
955
- log_debug(f"Failed to extract import info: {e}")
956
- except Exception as e:
957
- log_error(f"Unexpected error in import extraction: {e}")
958
- return None
959
-
960
-
961
- class JavaPlugin(LanguagePlugin):
962
- """Java language plugin for the new architecture"""
963
-
964
- def __init__(self) -> None:
965
- """Initialize the Java plugin"""
966
- super().__init__()
967
- self._language_cache: Optional["tree_sitter.Language"] = None
968
-
969
- def get_language_name(self) -> str:
970
- """Return the name of the programming language this plugin supports"""
971
- return "java"
972
-
973
- def get_file_extensions(self) -> List[str]:
974
- """Return list of file extensions this plugin supports"""
975
- return [".java", ".jsp", ".jspx"]
976
-
977
- def create_extractor(self) -> ElementExtractor:
978
- """Create and return an element extractor for this language"""
979
- return JavaElementExtractor()
980
-
981
- def get_tree_sitter_language(self) -> Optional["tree_sitter.Language"]:
982
- """Get the Tree-sitter language object for Java"""
983
- if self._language_cache is None:
984
- try:
985
- import tree_sitter_java as tsjava
986
- self._language_cache = tsjava.language()
987
- except ImportError:
988
- log_error("tree-sitter-java not available")
989
- return None
990
- except Exception as e:
991
- log_error(f"Failed to load Java language: {e}")
992
- return None
993
- return self._language_cache
994
-
995
- def get_supported_queries(self) -> List[str]:
996
- """Get list of supported query names for this language"""
997
- return ["class", "method", "field", "import"]
998
-
999
- def is_applicable(self, file_path: str) -> bool:
1000
- """Check if this plugin is applicable for the given file"""
1001
- return any(file_path.lower().endswith(ext.lower()) for ext in self.get_file_extensions())
1002
-
1003
- def get_plugin_info(self) -> dict:
1004
- """Get information about this plugin"""
1005
- return {
1006
- "name": "Java Plugin",
1007
- "language": self.get_language_name(),
1008
- "extensions": self.get_file_extensions(),
1009
- "version": "2.0.0",
1010
- "supported_queries": self.get_supported_queries()
1011
- }
1012
-
1013
- async def analyze_file(self, file_path: str, request: 'AnalysisRequest') -> 'AnalysisResult':
1014
- """
1015
- Analyze a Java file and return analysis results.
1016
-
1017
- Args:
1018
- file_path: Path to the Java file to analyze
1019
- request: Analysis request object
1020
-
1021
- Returns:
1022
- AnalysisResult object containing the analysis results
1023
- """
1024
- try:
1025
- from ..models import AnalysisResult
1026
- from ..core.parser import Parser
1027
-
1028
- log_debug(f"Java Plugin: Starting analysis of {file_path}")
1029
-
1030
- # Read file content
1031
- with open(file_path, 'r', encoding='utf-8') as f:
1032
- source_code = f.read()
1033
-
1034
- log_debug(f"Java Plugin: Read {len(source_code)} characters from file")
1035
-
1036
- # Parse the file
1037
- parser = Parser()
1038
- parse_result = parser.parse_code(source_code, "java")
1039
-
1040
- log_debug(f"Java Plugin: Parse result success: {parse_result.success}")
1041
-
1042
- if not parse_result.success:
1043
- log_error(f"Java Plugin: Parse failed: {parse_result.error_message}")
1044
- return AnalysisResult(
1045
- file_path=file_path,
1046
- language="java",
1047
- line_count=len(source_code.splitlines()),
1048
- elements=[],
1049
- node_count=0,
1050
- query_results={},
1051
- source_code=source_code,
1052
- success=False,
1053
- error_message=parse_result.error_message
1054
- )
1055
-
1056
- # Extract elements
1057
- extractor = self.create_extractor()
1058
-
1059
- log_debug("Java Plugin: Extracting annotations...")
1060
- annotations = extractor.extract_annotations(parse_result.tree, source_code)
1061
- log_debug(f"Java Plugin: Found {len(annotations)} annotations")
1062
-
1063
- log_debug("Java Plugin: Extracting packages...")
1064
- packages = extractor.extract_packages(parse_result.tree, source_code)
1065
- log_debug(f"Java Plugin: Found {len(packages)} packages")
1066
-
1067
- log_debug("Java Plugin: Extracting functions...")
1068
- functions = extractor.extract_functions(parse_result.tree, source_code)
1069
- log_debug(f"Java Plugin: Found {len(functions)} functions")
1070
-
1071
- log_debug("Java Plugin: Extracting classes...")
1072
- classes = extractor.extract_classes(parse_result.tree, source_code)
1073
- log_debug(f"Java Plugin: Found {len(classes)} classes")
1074
-
1075
- log_debug("Java Plugin: Extracting variables...")
1076
- variables = extractor.extract_variables(parse_result.tree, source_code)
1077
- log_debug(f"Java Plugin: Found {len(variables)} variables")
1078
-
1079
- log_debug("Java Plugin: Extracting imports...")
1080
- imports = extractor.extract_imports(parse_result.tree, source_code)
1081
- log_debug(f"Java Plugin: Found {len(imports)} imports")
1082
-
1083
- # Combine all elements (annotations are stored in extractor for cross-referencing)
1084
- all_elements = packages + functions + classes + variables + imports
1085
- log_debug(f"Java Plugin: Total elements: {len(all_elements)}")
1086
-
1087
- return AnalysisResult(
1088
- file_path=file_path,
1089
- language="java",
1090
- line_count=len(source_code.splitlines()),
1091
- elements=all_elements,
1092
- node_count=parse_result.tree.root_node.child_count if parse_result.tree else 0,
1093
- query_results={},
1094
- source_code=source_code,
1095
- success=True,
1096
- error_message=None
1097
- )
1098
-
1099
- except Exception as e:
1100
- log_error(f"Failed to analyze Java file {file_path}: {e}")
1101
- import traceback
1102
- log_error(f"Java Plugin traceback: {traceback.format_exc()}")
1103
- return AnalysisResult(
1104
- file_path=file_path,
1105
- language="java",
1106
- line_count=0,
1107
- elements=[],
1108
- node_count=0,
1109
- query_results={},
1110
- source_code="",
1111
- success=False,
1112
- error_message=str(e)
1113
- )
1
+ #!/usr/bin/env python3
2
+ """
3
+ Java Language Plugin
4
+
5
+ Provides Java-specific parsing and element extraction functionality.
6
+ Migrated from AdvancedAnalyzer implementation for future independence.
7
+ """
8
+
9
+ import re
10
+ from typing import TYPE_CHECKING, Any, Optional
11
+
12
+ if TYPE_CHECKING:
13
+ import tree_sitter
14
+
15
+ from ..core.analysis_engine import AnalysisRequest
16
+ from ..models import AnalysisResult
17
+
18
+ from ..encoding_utils import extract_text_slice, safe_encode
19
+ from ..models import Class, CodeElement, Function, Import, Package, Variable
20
+ from ..plugins.base import ElementExtractor, LanguagePlugin
21
+ from ..utils import log_debug, log_error, log_warning
22
+
23
+
24
+ class JavaElementExtractor(ElementExtractor):
25
+ """Java-specific element extractor with AdvancedAnalyzer implementation"""
26
+
27
+ def __init__(self) -> None:
28
+ """Initialize the Java element extractor."""
29
+ self.current_package: str = ""
30
+ self.current_file: str = ""
31
+ self.source_code: str = ""
32
+ self.content_lines: list[str] = []
33
+ self.imports: list[str] = []
34
+
35
+ # Performance optimization caches (from AdvancedAnalyzer)
36
+ self._node_text_cache: dict[int, str] = {}
37
+ self._processed_nodes: set[int] = set()
38
+ self._element_cache: dict[tuple[int, str], Any] = {}
39
+ self._file_encoding: str | None = None
40
+ self._annotation_cache: dict[int, list[dict[str, Any]]] = {}
41
+ self._signature_cache: dict[int, str] = {}
42
+
43
+ # Extracted annotations for cross-referencing
44
+ self.annotations: list[dict[str, Any]] = []
45
+
46
+ def extract_annotations(
47
+ self, tree: "tree_sitter.Tree", source_code: str
48
+ ) -> list[dict[str, Any]]:
49
+ """Extract Java annotations using AdvancedAnalyzer implementation"""
50
+ self.source_code = source_code
51
+ self.content_lines = source_code.split("\n")
52
+ self._reset_caches()
53
+
54
+ annotations: list[dict[str, Any]] = []
55
+
56
+ # Use AdvancedAnalyzer's optimized traversal for annotations
57
+ extractors = {
58
+ "annotation": self._extract_annotation_optimized,
59
+ "marker_annotation": self._extract_annotation_optimized,
60
+ }
61
+
62
+ self._traverse_and_extract_iterative(
63
+ tree.root_node, extractors, annotations, "annotation"
64
+ )
65
+
66
+ # Store annotations for cross-referencing
67
+ self.annotations = annotations
68
+
69
+ log_debug(f"Extracted {len(annotations)} annotations")
70
+ return annotations
71
+
72
+ def extract_functions(
73
+ self, tree: "tree_sitter.Tree", source_code: str
74
+ ) -> list[Function]:
75
+ """Extract Java method definitions using AdvancedAnalyzer implementation"""
76
+ self.source_code = source_code
77
+ self.content_lines = source_code.split("\n")
78
+ self._reset_caches()
79
+
80
+ functions: list[Function] = []
81
+
82
+ # Use AdvancedAnalyzer's optimized traversal
83
+ extractors = {
84
+ "method_declaration": self._extract_method_optimized,
85
+ "constructor_declaration": self._extract_method_optimized,
86
+ }
87
+
88
+ self._traverse_and_extract_iterative(
89
+ tree.root_node, extractors, functions, "method"
90
+ )
91
+
92
+ log_debug(f"Extracted {len(functions)} methods")
93
+ return functions
94
+
95
+ def extract_classes(
96
+ self, tree: "tree_sitter.Tree", source_code: str
97
+ ) -> list[Class]:
98
+ """Extract Java class definitions using AdvancedAnalyzer implementation"""
99
+ self.source_code = source_code
100
+ self.content_lines = source_code.split("\n")
101
+ self._reset_caches()
102
+
103
+ classes: list[Class] = []
104
+
105
+ # Use AdvancedAnalyzer's optimized traversal
106
+ extractors = {
107
+ "class_declaration": self._extract_class_optimized,
108
+ "interface_declaration": self._extract_class_optimized,
109
+ "enum_declaration": self._extract_class_optimized,
110
+ }
111
+
112
+ self._traverse_and_extract_iterative(
113
+ tree.root_node, extractors, classes, "class"
114
+ )
115
+
116
+ log_debug(f"Extracted {len(classes)} classes")
117
+ return classes
118
+
119
+ def extract_variables(
120
+ self, tree: "tree_sitter.Tree", source_code: str
121
+ ) -> list[Variable]:
122
+ """Extract Java field definitions using AdvancedAnalyzer implementation"""
123
+ self.source_code = source_code
124
+ self.content_lines = source_code.split("\n")
125
+ self._reset_caches()
126
+
127
+ variables: list[Variable] = []
128
+
129
+ # Use AdvancedAnalyzer's optimized traversal
130
+ extractors = {
131
+ "field_declaration": self._extract_field_optimized,
132
+ }
133
+
134
+ log_debug("Starting field extraction with iterative traversal")
135
+ self._traverse_and_extract_iterative(
136
+ tree.root_node, extractors, variables, "field"
137
+ )
138
+
139
+ log_debug(f"Extracted {len(variables)} fields")
140
+ for i, var in enumerate(variables[:3]):
141
+ log_debug(f"Field {i}: {var.name} ({var.variable_type})")
142
+ return variables
143
+
144
+ def extract_imports(
145
+ self, tree: "tree_sitter.Tree", source_code: str
146
+ ) -> list[Import]:
147
+ """Extract Java import statements"""
148
+ self.source_code = source_code
149
+ self.content_lines = source_code.split("\n")
150
+
151
+ imports: list[Import] = []
152
+
153
+ # Extract package and imports efficiently (from AdvancedAnalyzer)
154
+ for child in tree.root_node.children:
155
+ if child.type == "package_declaration":
156
+ self._extract_package_info(child)
157
+ elif child.type == "import_declaration":
158
+ import_info = self._extract_import_info(child, source_code)
159
+ if import_info:
160
+ imports.append(import_info)
161
+ elif child.type in [
162
+ "class_declaration",
163
+ "interface_declaration",
164
+ "enum_declaration",
165
+ ]:
166
+ # After package and imports come class declarations, so stop
167
+ break
168
+
169
+ log_debug(f"Extracted {len(imports)} imports")
170
+ return imports
171
+
172
+ def extract_packages(
173
+ self, tree: "tree_sitter.Tree", source_code: str
174
+ ) -> list[Package]:
175
+ """Extract Java package declarations"""
176
+ self.source_code = source_code
177
+ self.content_lines = source_code.split("\n")
178
+
179
+ packages: list[Package] = []
180
+
181
+ # Extract package declaration
182
+ for child in tree.root_node.children:
183
+ if child.type == "package_declaration":
184
+ package_info = self._extract_package_element(child)
185
+ if package_info:
186
+ packages.append(package_info)
187
+ break # Only one package declaration per file
188
+
189
+ log_debug(f"Extracted {len(packages)} packages")
190
+ return packages
191
+
192
+ def _reset_caches(self) -> None:
193
+ """Reset performance caches"""
194
+ self._node_text_cache.clear()
195
+ self._processed_nodes.clear()
196
+ self._element_cache.clear()
197
+ self._annotation_cache.clear()
198
+ self._signature_cache.clear()
199
+ self.annotations.clear()
200
+
201
+ def _traverse_and_extract_iterative(
202
+ self,
203
+ root_node: "tree_sitter.Node",
204
+ extractors: dict[str, Any],
205
+ results: list[Any],
206
+ element_type: str,
207
+ ) -> None:
208
+ """
209
+ Iterative node traversal and extraction (from AdvancedAnalyzer)
210
+ Uses batch processing for optimal performance
211
+ """
212
+ if not root_node:
213
+ return # type: ignore[unreachable]
214
+
215
+ # Target node types for extraction
216
+ target_node_types = set(extractors.keys())
217
+
218
+ # Container node types that may contain target nodes (from AdvancedAnalyzer)
219
+ container_node_types = {
220
+ "program",
221
+ "class_body",
222
+ "interface_body",
223
+ "enum_body",
224
+ "class_declaration",
225
+ "interface_declaration",
226
+ "enum_declaration",
227
+ "method_declaration",
228
+ "constructor_declaration",
229
+ "block",
230
+ "modifiers", # アノテーションは修飾子に含まれることがある
231
+ }
232
+
233
+ # Iterative DFS stack: (node, depth)
234
+ node_stack = [(root_node, 0)]
235
+ processed_nodes = 0
236
+ max_depth = 50 # Prevent infinite loops
237
+
238
+ # Batch processing containers (from AdvancedAnalyzer)
239
+ field_batch = []
240
+
241
+ while node_stack:
242
+ current_node, depth = node_stack.pop()
243
+
244
+ # Safety check for maximum depth
245
+ if depth > max_depth:
246
+ log_warning(f"Maximum traversal depth ({max_depth}) exceeded")
247
+ continue
248
+
249
+ processed_nodes += 1
250
+ node_type = current_node.type
251
+
252
+ # Early termination: skip nodes that don't contain target elements
253
+ if (
254
+ depth > 0
255
+ and node_type not in target_node_types
256
+ and node_type not in container_node_types
257
+ ):
258
+ continue
259
+
260
+ # Collect target nodes for batch processing (from AdvancedAnalyzer)
261
+ if node_type in target_node_types:
262
+ if element_type == "field" and node_type == "field_declaration":
263
+ field_batch.append(current_node)
264
+ else:
265
+ # Process non-field elements immediately
266
+ node_id = id(current_node)
267
+
268
+ # Skip if already processed
269
+ if node_id in self._processed_nodes:
270
+ continue
271
+
272
+ # Check element cache first
273
+ cache_key = (node_id, element_type)
274
+ if cache_key in self._element_cache:
275
+ element = self._element_cache[cache_key]
276
+ if element:
277
+ if isinstance(element, list):
278
+ results.extend(element)
279
+ else:
280
+ results.append(element)
281
+ self._processed_nodes.add(node_id)
282
+ continue
283
+
284
+ # Extract and cache
285
+ extractor = extractors.get(node_type)
286
+ if extractor:
287
+ element = extractor(current_node)
288
+ self._element_cache[cache_key] = element
289
+ if element:
290
+ if isinstance(element, list):
291
+ results.extend(element)
292
+ else:
293
+ results.append(element)
294
+ self._processed_nodes.add(node_id)
295
+
296
+ # Add children to stack (reversed for correct DFS traversal)
297
+ if current_node.children:
298
+ for child in reversed(current_node.children):
299
+ node_stack.append((child, depth + 1))
300
+
301
+ # Process field batch when it reaches optimal size (from AdvancedAnalyzer)
302
+ if len(field_batch) >= 10:
303
+ self._process_field_batch(field_batch, extractors, results)
304
+ field_batch.clear()
305
+
306
+ # Process remaining field batch (from AdvancedAnalyzer)
307
+ if field_batch:
308
+ self._process_field_batch(field_batch, extractors, results)
309
+
310
+ log_debug(f"Iterative traversal processed {processed_nodes} nodes")
311
+
312
+ def _process_field_batch(
313
+ self, batch: list["tree_sitter.Node"], extractors: dict, results: list[Any]
314
+ ) -> None:
315
+ """Process field nodes with caching (from AdvancedAnalyzer)"""
316
+ for node in batch:
317
+ node_id = id(node)
318
+
319
+ # Skip if already processed
320
+ if node_id in self._processed_nodes:
321
+ continue
322
+
323
+ # Check element cache first
324
+ cache_key = (node_id, "field")
325
+ if cache_key in self._element_cache:
326
+ elements = self._element_cache[cache_key]
327
+ if elements:
328
+ if isinstance(elements, list):
329
+ results.extend(elements)
330
+ else:
331
+ results.append(elements)
332
+ self._processed_nodes.add(node_id)
333
+ continue
334
+
335
+ # Extract and cache
336
+ extractor = extractors.get(node.type)
337
+ if extractor:
338
+ elements = extractor(node)
339
+ self._element_cache[cache_key] = elements
340
+ if elements:
341
+ if isinstance(elements, list):
342
+ results.extend(elements)
343
+ else:
344
+ results.append(elements)
345
+ self._processed_nodes.add(node_id)
346
+
347
+ def _get_node_text_optimized(self, node: "tree_sitter.Node") -> str:
348
+ """Get node text with optimized caching (from AdvancedAnalyzer)"""
349
+ node_id = id(node)
350
+
351
+ # Check cache first
352
+ if node_id in self._node_text_cache:
353
+ return self._node_text_cache[node_id]
354
+
355
+ try:
356
+ # Use encoding utilities for text extraction
357
+ start_byte = node.start_byte
358
+ end_byte = node.end_byte
359
+
360
+ encoding = self._file_encoding or "utf-8"
361
+ content_bytes = safe_encode("\n".join(self.content_lines), encoding)
362
+ text = extract_text_slice(content_bytes, start_byte, end_byte, encoding)
363
+
364
+ self._node_text_cache[node_id] = text
365
+ return text
366
+ except Exception as e:
367
+ log_error(f"Error in _get_node_text_optimized: {e}")
368
+ # Fallback to simple text extraction
369
+ try:
370
+ start_point = node.start_point
371
+ end_point = node.end_point
372
+
373
+ if start_point[0] == end_point[0]:
374
+ # Single line
375
+ line = self.content_lines[start_point[0]]
376
+ return line[start_point[1] : end_point[1]]
377
+ else:
378
+ # Multiple lines
379
+ lines = []
380
+ for i in range(start_point[0], end_point[0] + 1):
381
+ if i < len(self.content_lines):
382
+ line = self.content_lines[i]
383
+ if i == start_point[0]:
384
+ lines.append(line[start_point[1] :])
385
+ elif i == end_point[0]:
386
+ lines.append(line[: end_point[1]])
387
+ else:
388
+ lines.append(line)
389
+ return "\n".join(lines)
390
+ except Exception as fallback_error:
391
+ log_error(f"Fallback text extraction also failed: {fallback_error}")
392
+ return ""
393
+
394
+ def _extract_class_optimized(self, node: "tree_sitter.Node") -> Class | None:
395
+ """Extract class information optimized (from AdvancedAnalyzer)"""
396
+ try:
397
+ start_line = node.start_point[0] + 1
398
+ end_line = node.end_point[0] + 1
399
+
400
+ # Extract class name efficiently
401
+ class_name = None
402
+ for child in node.children:
403
+ if child.type == "identifier":
404
+ class_name = self._get_node_text_optimized(child)
405
+ break
406
+
407
+ if not class_name:
408
+ return None
409
+
410
+ # Determine package name
411
+ package_name = self.current_package
412
+ full_qualified_name = (
413
+ f"{package_name}.{class_name}" if package_name else class_name
414
+ )
415
+
416
+ # Determine class type (optimized: dictionary lookup)
417
+ class_type_map = {
418
+ "class_declaration": "class",
419
+ "interface_declaration": "interface",
420
+ "enum_declaration": "enum",
421
+ }
422
+ class_type = class_type_map.get(node.type, "class")
423
+
424
+ # Extract modifiers efficiently
425
+ modifiers = self._extract_modifiers_optimized(node)
426
+ visibility = self._determine_visibility(modifiers)
427
+
428
+ # Extract superclass and interfaces (optimized: single pass)
429
+ extends_class = None
430
+ implements_interfaces = []
431
+
432
+ for child in node.children:
433
+ if child.type == "superclass":
434
+ extends_text = self._get_node_text_optimized(child)
435
+ match = re.search(r"\b[A-Z]\w*", extends_text)
436
+ if match:
437
+ extends_class = match.group(0)
438
+ elif child.type == "super_interfaces":
439
+ implements_text = self._get_node_text_optimized(child)
440
+ implements_interfaces = re.findall(r"\b[A-Z]\w*", implements_text)
441
+
442
+ # Extract annotations for this class
443
+ class_annotations = self._find_annotations_for_line_cached(start_line)
444
+
445
+ # Check if this is a nested class
446
+ is_nested = self._is_nested_class(node)
447
+ parent_class = self._find_parent_class(node) if is_nested else None
448
+
449
+ # Extract raw text
450
+ start_line_idx = max(0, start_line - 1)
451
+ end_line_idx = min(len(self.content_lines), end_line)
452
+ raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
453
+
454
+ return Class(
455
+ name=class_name,
456
+ start_line=start_line,
457
+ end_line=end_line,
458
+ raw_text=raw_text,
459
+ language="java",
460
+ class_type=class_type,
461
+ full_qualified_name=full_qualified_name,
462
+ package_name=package_name,
463
+ superclass=extends_class,
464
+ interfaces=implements_interfaces,
465
+ modifiers=modifiers,
466
+ visibility=visibility,
467
+ # Java-specific detailed information
468
+ annotations=class_annotations,
469
+ is_nested=is_nested,
470
+ parent_class=parent_class,
471
+ extends_class=extends_class, # Alias for superclass
472
+ implements_interfaces=implements_interfaces, # Alias for interfaces
473
+ )
474
+ except (AttributeError, ValueError, TypeError) as e:
475
+ log_debug(f"Failed to extract class info: {e}")
476
+ return None
477
+ except Exception as e:
478
+ log_error(f"Unexpected error in class extraction: {e}")
479
+ return None
480
+
481
+ def _extract_method_optimized(self, node: "tree_sitter.Node") -> Function | None:
482
+ """Extract method information optimized (from AdvancedAnalyzer)"""
483
+ try:
484
+ start_line = node.start_point[0] + 1
485
+ end_line = node.end_point[0] + 1
486
+
487
+ # Extract method information efficiently
488
+ method_info = self._parse_method_signature_optimized(node)
489
+ if not method_info:
490
+ return None
491
+
492
+ method_name, return_type, parameters, modifiers, throws = method_info
493
+ is_constructor = node.type == "constructor_declaration"
494
+ visibility = self._determine_visibility(modifiers)
495
+
496
+ # Extract annotations for this method
497
+ method_annotations = self._find_annotations_for_line_cached(start_line)
498
+
499
+ # Calculate complexity score
500
+ complexity_score = self._calculate_complexity_optimized(node)
501
+
502
+ # Extract JavaDoc
503
+ javadoc = self._extract_javadoc_for_line(start_line)
504
+
505
+ # Extract raw text
506
+ start_line_idx = max(0, start_line - 1)
507
+ end_line_idx = min(len(self.content_lines), end_line)
508
+ raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
509
+
510
+ return Function(
511
+ name=method_name,
512
+ start_line=start_line,
513
+ end_line=end_line,
514
+ raw_text=raw_text,
515
+ language="java",
516
+ parameters=parameters,
517
+ return_type=return_type if not is_constructor else "void",
518
+ modifiers=modifiers,
519
+ is_static="static" in modifiers,
520
+ is_private="private" in modifiers,
521
+ is_public="public" in modifiers,
522
+ is_constructor=is_constructor,
523
+ visibility=visibility,
524
+ docstring=javadoc,
525
+ # Java-specific detailed information
526
+ annotations=method_annotations,
527
+ throws=throws,
528
+ complexity_score=complexity_score,
529
+ is_abstract="abstract" in modifiers,
530
+ is_final="final" in modifiers,
531
+ )
532
+ except (AttributeError, ValueError, TypeError) as e:
533
+ log_debug(f"Failed to extract method info: {e}")
534
+ return None
535
+ except Exception as e:
536
+ log_error(f"Unexpected error in method extraction: {e}")
537
+ return None
538
+
539
+ def _extract_field_optimized(self, node: "tree_sitter.Node") -> list[Variable]:
540
+ """Extract field information optimized (from AdvancedAnalyzer)"""
541
+ fields: list[Variable] = []
542
+ try:
543
+ start_line = node.start_point[0] + 1
544
+ end_line = node.end_point[0] + 1
545
+
546
+ # Parse field declaration using AdvancedAnalyzer method
547
+ field_info = self._parse_field_declaration_optimized(node)
548
+ if not field_info:
549
+ return fields
550
+
551
+ field_type, variable_names, modifiers = field_info
552
+ visibility = self._determine_visibility(modifiers)
553
+
554
+ # Extract annotations for this field
555
+ field_annotations = self._find_annotations_for_line_cached(start_line)
556
+
557
+ # Extract JavaDoc for this field
558
+ field_javadoc = self._extract_javadoc_for_line(start_line)
559
+
560
+ # Create Variable object for each variable (matching AdvancedAnalyzer structure)
561
+ for var_name in variable_names:
562
+ # Extract raw text
563
+ start_line_idx = max(0, start_line - 1)
564
+ end_line_idx = min(len(self.content_lines), end_line)
565
+ raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
566
+
567
+ field = Variable(
568
+ name=var_name,
569
+ start_line=start_line,
570
+ end_line=end_line,
571
+ raw_text=raw_text,
572
+ language="java",
573
+ variable_type=field_type,
574
+ modifiers=modifiers,
575
+ is_static="static" in modifiers,
576
+ is_constant="final" in modifiers,
577
+ visibility=visibility,
578
+ docstring=field_javadoc,
579
+ # Java-specific detailed information
580
+ annotations=field_annotations,
581
+ is_final="final" in modifiers,
582
+ field_type=field_type, # Alias for variable_type
583
+ )
584
+ fields.append(field)
585
+ except (AttributeError, ValueError, TypeError) as e:
586
+ log_debug(f"Failed to extract field info: {e}")
587
+ except Exception as e:
588
+ log_error(f"Unexpected error in field extraction: {e}")
589
+
590
+ return fields
591
+
592
+ def _parse_method_signature_optimized(
593
+ self, node: "tree_sitter.Node"
594
+ ) -> tuple[str, str, list[str], list[str], list[str]] | None:
595
+ """Parse method signature optimized (from AdvancedAnalyzer)"""
596
+ try:
597
+ # Extract method name
598
+ method_name = None
599
+ for child in node.children:
600
+ if child.type == "identifier":
601
+ method_name = self._get_node_text_optimized(child)
602
+ break
603
+
604
+ if not method_name:
605
+ return None
606
+
607
+ # Extract return type
608
+ return_type = "void"
609
+ for child in node.children:
610
+ if child.type in [
611
+ "type_identifier",
612
+ "void_type",
613
+ "primitive_type",
614
+ "integral_type",
615
+ "boolean_type",
616
+ "floating_point_type",
617
+ "array_type",
618
+ ]:
619
+ return_type = self._get_node_text_optimized(child)
620
+ break
621
+ elif child.type == "generic_type":
622
+ return_type = self._get_node_text_optimized(child)
623
+ break
624
+
625
+ # Extract parameters
626
+ parameters = []
627
+ for child in node.children:
628
+ if child.type == "formal_parameters":
629
+ for param in child.children:
630
+ if param.type == "formal_parameter":
631
+ param_text = self._get_node_text_optimized(param)
632
+ parameters.append(param_text)
633
+
634
+ # Extract modifiers
635
+ modifiers = self._extract_modifiers_optimized(node)
636
+
637
+ # Extract throws clause
638
+ throws = []
639
+ for child in node.children:
640
+ if child.type == "throws":
641
+ throws_text = self._get_node_text_optimized(child)
642
+ exceptions = re.findall(r"\b[A-Z]\w*Exception\b", throws_text)
643
+ throws.extend(exceptions)
644
+
645
+ return method_name, return_type, parameters, modifiers, throws
646
+ except Exception:
647
+ return None
648
+
649
+ def _parse_field_declaration_optimized(
650
+ self, node: "tree_sitter.Node"
651
+ ) -> tuple[str, list[str], list[str]] | None:
652
+ """Parse field declaration optimized (from AdvancedAnalyzer)"""
653
+ try:
654
+ # Extract type (exactly as in AdvancedAnalyzer)
655
+ field_type = None
656
+ for child in node.children:
657
+ if child.type in [
658
+ "type_identifier",
659
+ "primitive_type",
660
+ "integral_type",
661
+ "generic_type",
662
+ "boolean_type",
663
+ "floating_point_type",
664
+ "array_type",
665
+ ]:
666
+ field_type = self._get_node_text_optimized(child)
667
+ break
668
+
669
+ if not field_type:
670
+ return None
671
+
672
+ # Extract variable names (exactly as in AdvancedAnalyzer)
673
+ variable_names = []
674
+ for child in node.children:
675
+ if child.type == "variable_declarator":
676
+ for grandchild in child.children:
677
+ if grandchild.type == "identifier":
678
+ var_name = self._get_node_text_optimized(grandchild)
679
+ variable_names.append(var_name)
680
+
681
+ if not variable_names:
682
+ return None
683
+
684
+ # Extract modifiers (exactly as in AdvancedAnalyzer)
685
+ modifiers = self._extract_modifiers_optimized(node)
686
+
687
+ return field_type, variable_names, modifiers
688
+ except Exception:
689
+ return None
690
+
691
+ def _extract_modifiers_optimized(self, node: "tree_sitter.Node") -> list[str]:
692
+ """Extract modifiers efficiently (from AdvancedAnalyzer)"""
693
+ modifiers = []
694
+ for child in node.children:
695
+ if child.type == "modifiers":
696
+ for mod_child in child.children:
697
+ if mod_child.type in [
698
+ "public",
699
+ "private",
700
+ "protected",
701
+ "static",
702
+ "final",
703
+ "abstract",
704
+ "synchronized",
705
+ "volatile",
706
+ "transient",
707
+ ]:
708
+ modifiers.append(mod_child.type)
709
+ elif mod_child.type not in ["marker_annotation"]:
710
+ mod_text = self._get_node_text_optimized(mod_child)
711
+ if mod_text in [
712
+ "public",
713
+ "private",
714
+ "protected",
715
+ "static",
716
+ "final",
717
+ "abstract",
718
+ "synchronized",
719
+ "volatile",
720
+ "transient",
721
+ ]:
722
+ modifiers.append(mod_text)
723
+ return modifiers
724
+
725
+ def _extract_package_info(self, node: "tree_sitter.Node") -> None:
726
+ """Extract package information (from AdvancedAnalyzer)"""
727
+ try:
728
+ package_text = self._get_node_text_optimized(node)
729
+ match = re.search(r"package\s+([\w.]+)", package_text)
730
+ if match:
731
+ self.current_package = match.group(1)
732
+ except (AttributeError, ValueError, IndexError) as e:
733
+ log_debug(f"Failed to extract package info: {e}")
734
+ except Exception as e:
735
+ log_error(f"Unexpected error in package extraction: {e}")
736
+
737
+ def _extract_package_element(self, node: "tree_sitter.Node") -> Package | None:
738
+ """Extract package element for inclusion in results"""
739
+ try:
740
+ package_text = self._get_node_text_optimized(node)
741
+ match = re.search(r"package\s+([\w.]+)", package_text)
742
+ if match:
743
+ package_name = match.group(1)
744
+ return Package(
745
+ name=package_name,
746
+ start_line=node.start_point[0] + 1,
747
+ end_line=node.end_point[0] + 1,
748
+ raw_text=package_text,
749
+ language="java",
750
+ )
751
+ except (AttributeError, ValueError, IndexError) as e:
752
+ log_debug(f"Failed to extract package element: {e}")
753
+ except Exception as e:
754
+ log_error(f"Unexpected error in package element extraction: {e}")
755
+ return None
756
+
757
+ def _determine_visibility(self, modifiers: list[str]) -> str:
758
+ """Determine visibility from modifiers"""
759
+ if "public" in modifiers:
760
+ return "public"
761
+ elif "private" in modifiers:
762
+ return "private"
763
+ elif "protected" in modifiers:
764
+ return "protected"
765
+ else:
766
+ return "package" # Default package visibility
767
+
768
+ def _find_annotations_for_line_cached(
769
+ self, target_line: int
770
+ ) -> list[dict[str, Any]]:
771
+ """Find annotations for specified line with caching (from AdvancedAnalyzer)"""
772
+ if target_line in self._annotation_cache:
773
+ return self._annotation_cache[target_line]
774
+
775
+ result_annotations = []
776
+ for annotation in self.annotations:
777
+ line_distance = target_line - annotation.get("end_line", 0)
778
+ if 1 <= line_distance <= 5:
779
+ result_annotations.append(annotation)
780
+
781
+ self._annotation_cache[target_line] = result_annotations
782
+ return result_annotations
783
+
784
+ def _calculate_complexity_optimized(self, node: "tree_sitter.Node") -> int:
785
+ """Calculate cyclomatic complexity efficiently (from AdvancedAnalyzer)"""
786
+ complexity = 1
787
+ try:
788
+ node_text = self._get_node_text_optimized(node).lower()
789
+ keywords = ["if", "while", "for", "catch", "case", "switch"]
790
+ for keyword in keywords:
791
+ complexity += node_text.count(keyword)
792
+ except (AttributeError, TypeError) as e:
793
+ log_debug(f"Failed to calculate complexity: {e}")
794
+ except Exception as e:
795
+ log_error(f"Unexpected error in complexity calculation: {e}")
796
+ return complexity
797
+
798
+ def _extract_javadoc_for_line(self, target_line: int) -> str | None:
799
+ """Extract JavaDoc comment immediately before the specified line"""
800
+ try:
801
+ if not self.content_lines or target_line <= 1:
802
+ return None
803
+
804
+ # Search backwards from target_line
805
+ javadoc_lines = []
806
+ current_line = target_line - 1
807
+
808
+ # Skip empty lines
809
+ while current_line > 0:
810
+ line = self.content_lines[current_line - 1].strip()
811
+ if line:
812
+ break
813
+ current_line -= 1
814
+
815
+ # Check for JavaDoc end
816
+ if current_line > 0:
817
+ line = self.content_lines[current_line - 1].strip()
818
+ if line.endswith("*/"):
819
+ # This might be a JavaDoc comment
820
+ javadoc_lines.append(self.content_lines[current_line - 1])
821
+ current_line -= 1
822
+
823
+ # Collect JavaDoc content
824
+ while current_line > 0:
825
+ line_content = self.content_lines[current_line - 1]
826
+ line_stripped = line_content.strip()
827
+ javadoc_lines.append(line_content)
828
+
829
+ if line_stripped.startswith("/**"):
830
+ # Found the start of JavaDoc
831
+ javadoc_lines.reverse()
832
+ javadoc_text = "\n".join(javadoc_lines)
833
+
834
+ # Clean up the JavaDoc
835
+ return self._clean_javadoc(javadoc_text)
836
+ current_line -= 1
837
+
838
+ return None
839
+
840
+ except Exception as e:
841
+ log_debug(f"Failed to extract JavaDoc: {e}")
842
+ return None
843
+
844
+ def _clean_javadoc(self, javadoc_text: str) -> str:
845
+ """Clean JavaDoc text by removing comment markers"""
846
+ if not javadoc_text:
847
+ return ""
848
+
849
+ lines = javadoc_text.split("\n")
850
+ cleaned_lines = []
851
+
852
+ for line in lines:
853
+ # Remove leading/trailing whitespace
854
+ line = line.strip()
855
+
856
+ # Remove comment markers
857
+ if line.startswith("/**"):
858
+ line = line[3:].strip()
859
+ elif line.startswith("*/"):
860
+ line = line[2:].strip()
861
+ elif line.startswith("*"):
862
+ line = line[1:].strip()
863
+
864
+ if line: # Only add non-empty lines
865
+ cleaned_lines.append(line)
866
+
867
+ return " ".join(cleaned_lines) if cleaned_lines else ""
868
+
869
+ def _is_nested_class(self, node: "tree_sitter.Node") -> bool:
870
+ """Check if this is a nested class (from AdvancedAnalyzer)"""
871
+ current = node.parent
872
+ while current:
873
+ if current.type in [
874
+ "class_declaration",
875
+ "interface_declaration",
876
+ "enum_declaration",
877
+ ]:
878
+ return True
879
+ current = current.parent
880
+ return False
881
+
882
+ def _find_parent_class(self, node: "tree_sitter.Node") -> str | None:
883
+ """Find parent class name (from AdvancedAnalyzer)"""
884
+ current = node.parent
885
+ while current:
886
+ if current.type in [
887
+ "class_declaration",
888
+ "interface_declaration",
889
+ "enum_declaration",
890
+ ]:
891
+ return self._extract_class_name(current)
892
+ current = current.parent
893
+ return None
894
+
895
+ def _extract_class_name(self, node: "tree_sitter.Node") -> str | None:
896
+ """Extract class name from node (from AdvancedAnalyzer)"""
897
+ for child in node.children:
898
+ if child.type == "identifier":
899
+ return self._get_node_text_optimized(child)
900
+ return None
901
+
902
+ def _extract_annotation_optimized(
903
+ self, node: "tree_sitter.Node"
904
+ ) -> dict[str, Any] | None:
905
+ """Extract annotation information optimized (from AdvancedAnalyzer)"""
906
+ try:
907
+ start_line = node.start_point[0] + 1
908
+ end_line = node.end_point[0] + 1
909
+ raw_text = self._get_node_text_optimized(node)
910
+
911
+ # Extract annotation name efficiently
912
+ name_match = re.search(r"@(\w+)", raw_text)
913
+ if not name_match:
914
+ return None
915
+
916
+ annotation_name = name_match.group(1)
917
+
918
+ # Extract parameters efficiently
919
+ parameters = []
920
+ param_match = re.search(r"\((.*?)\)", raw_text, re.DOTALL)
921
+ if param_match:
922
+ param_text = param_match.group(1).strip()
923
+ if param_text:
924
+ # Simple parameter parsing
925
+ if "=" in param_text:
926
+ parameters = [
927
+ p.strip() for p in re.split(r",(?![^()]*\))", param_text)
928
+ ]
929
+ else:
930
+ parameters = [param_text]
931
+
932
+ return {
933
+ "name": annotation_name,
934
+ "parameters": parameters,
935
+ "start_line": start_line,
936
+ "end_line": end_line,
937
+ "raw_text": raw_text,
938
+ }
939
+ except (AttributeError, IndexError, ValueError) as e:
940
+ log_debug(f"Failed to extract annotation from node: {e}")
941
+ return None
942
+ except Exception as e:
943
+ log_error(f"Unexpected exception in annotation extraction: {e}")
944
+ return None
945
+
946
+ def _extract_import_info(
947
+ self, node: "tree_sitter.Node", source_code: str
948
+ ) -> Import | None:
949
+ """Extract import information (from AdvancedAnalyzer)"""
950
+ try:
951
+ import_text = self._get_node_text_optimized(node)
952
+ # Simple approach: get everything until semicolon then process
953
+ import_content = import_text.strip()
954
+ if import_content.endswith(";"):
955
+ import_content = import_content[:-1]
956
+
957
+ if "static" in import_content:
958
+ # Static import
959
+ static_match = re.search(r"import\s+static\s+([\w.]+)", import_content)
960
+ if static_match:
961
+ import_name = static_match.group(1)
962
+ # Handle wildcard case
963
+ if import_content.endswith(".*"):
964
+ import_name = import_name.replace(".*", "")
965
+ # For static wildcard, remove last element
966
+ parts = import_name.split(".")
967
+ if len(parts) > 1:
968
+ import_name = ".".join(parts[:-1])
969
+
970
+ return Import(
971
+ name=import_name,
972
+ start_line=node.start_point[0] + 1,
973
+ end_line=node.end_point[0] + 1,
974
+ raw_text=import_text,
975
+ language="java",
976
+ module_name=import_name,
977
+ is_static=True,
978
+ is_wildcard=import_content.endswith(".*"),
979
+ import_statement=import_content,
980
+ )
981
+ else:
982
+ # Normal import
983
+ normal_match = re.search(r"import\s+([\w.]+)", import_content)
984
+ if normal_match:
985
+ import_name = normal_match.group(1)
986
+ # Handle wildcard case
987
+ if import_content.endswith(".*"):
988
+ if import_name.endswith(".*"):
989
+ import_name = import_name[:-2] # Remove trailing .*
990
+ elif import_name.endswith("."):
991
+ import_name = import_name[:-1] # Remove trailing .
992
+
993
+ return Import(
994
+ name=import_name,
995
+ start_line=node.start_point[0] + 1,
996
+ end_line=node.end_point[0] + 1,
997
+ raw_text=import_text,
998
+ language="java",
999
+ module_name=import_name,
1000
+ is_static=False,
1001
+ is_wildcard=import_content.endswith(".*"),
1002
+ import_statement=import_content,
1003
+ )
1004
+ except (AttributeError, ValueError, IndexError) as e:
1005
+ log_debug(f"Failed to extract import info: {e}")
1006
+ except Exception as e:
1007
+ log_error(f"Unexpected error in import extraction: {e}")
1008
+ return None
1009
+
1010
+
1011
+ class JavaPlugin(LanguagePlugin):
1012
+ """Java language plugin for the new architecture"""
1013
+
1014
+ def __init__(self) -> None:
1015
+ """Initialize the Java plugin"""
1016
+ super().__init__()
1017
+ self._language_cache: tree_sitter.Language | None = None
1018
+
1019
+ def get_language_name(self) -> str:
1020
+ """Return the name of the programming language this plugin supports"""
1021
+ return "java"
1022
+
1023
+ def get_file_extensions(self) -> list[str]:
1024
+ """Return list of file extensions this plugin supports"""
1025
+ return [".java", ".jsp", ".jspx"]
1026
+
1027
+ def create_extractor(self) -> ElementExtractor:
1028
+ """Create and return an element extractor for this language"""
1029
+ return JavaElementExtractor()
1030
+
1031
+ def get_tree_sitter_language(self) -> Optional["tree_sitter.Language"]:
1032
+ """Get the Tree-sitter language object for Java"""
1033
+ if self._language_cache is None:
1034
+ try:
1035
+ import tree_sitter_java as tsjava
1036
+
1037
+ self._language_cache = tsjava.language() # type: ignore
1038
+ except ImportError:
1039
+ log_error("tree-sitter-java not available")
1040
+ return None
1041
+ except Exception as e:
1042
+ log_error(f"Failed to load Java language: {e}")
1043
+ return None
1044
+ return self._language_cache
1045
+
1046
+ def get_supported_queries(self) -> list[str]:
1047
+ """Get list of supported query names for this language"""
1048
+ return ["class", "method", "field", "import"]
1049
+
1050
+ def is_applicable(self, file_path: str) -> bool:
1051
+ """Check if this plugin is applicable for the given file"""
1052
+ return any(
1053
+ file_path.lower().endswith(ext.lower())
1054
+ for ext in self.get_file_extensions()
1055
+ )
1056
+
1057
+ def get_plugin_info(self) -> dict:
1058
+ """Get information about this plugin"""
1059
+ return {
1060
+ "name": "Java Plugin",
1061
+ "language": self.get_language_name(),
1062
+ "extensions": self.get_file_extensions(),
1063
+ "version": "2.0.0",
1064
+ "supported_queries": self.get_supported_queries(),
1065
+ }
1066
+
1067
+ async def analyze_file(
1068
+ self, file_path: str, request: "AnalysisRequest"
1069
+ ) -> "AnalysisResult":
1070
+ """
1071
+ Analyze a Java file and return analysis results.
1072
+
1073
+ Args:
1074
+ file_path: Path to the Java file to analyze
1075
+ request: Analysis request object
1076
+
1077
+ Returns:
1078
+ AnalysisResult object containing the analysis results
1079
+ """
1080
+ try:
1081
+ from ..core.parser import Parser
1082
+ from ..models import AnalysisResult
1083
+
1084
+ log_debug(f"Java Plugin: Starting analysis of {file_path}")
1085
+
1086
+ # Read file content
1087
+ with open(file_path, encoding="utf-8") as f:
1088
+ source_code = f.read()
1089
+
1090
+ log_debug(f"Java Plugin: Read {len(source_code)} characters from file")
1091
+
1092
+ # Parse the file
1093
+ parser = Parser()
1094
+ parse_result = parser.parse_code(source_code, "java")
1095
+
1096
+ log_debug(f"Java Plugin: Parse result success: {parse_result.success}")
1097
+
1098
+ if not parse_result.success:
1099
+ log_error(f"Java Plugin: Parse failed: {parse_result.error_message}")
1100
+ return AnalysisResult(
1101
+ file_path=file_path,
1102
+ language="java",
1103
+ line_count=len(source_code.splitlines()),
1104
+ elements=[],
1105
+ node_count=0,
1106
+ query_results={},
1107
+ source_code=source_code,
1108
+ success=False,
1109
+ error_message=parse_result.error_message,
1110
+ )
1111
+
1112
+ # Extract elements
1113
+ extractor = self.create_extractor()
1114
+
1115
+ if parse_result.tree:
1116
+ log_debug("Java Plugin: Extracting functions...")
1117
+ functions = extractor.extract_functions(parse_result.tree, source_code)
1118
+ log_debug(f"Java Plugin: Found {len(functions)} functions")
1119
+
1120
+ log_debug("Java Plugin: Extracting classes...")
1121
+ classes = extractor.extract_classes(parse_result.tree, source_code)
1122
+ log_debug(f"Java Plugin: Found {len(classes)} classes")
1123
+
1124
+ log_debug("Java Plugin: Extracting variables...")
1125
+ variables = extractor.extract_variables(parse_result.tree, source_code)
1126
+ log_debug(f"Java Plugin: Found {len(variables)} variables")
1127
+
1128
+ log_debug("Java Plugin: Extracting imports...")
1129
+ imports = extractor.extract_imports(parse_result.tree, source_code)
1130
+ log_debug(f"Java Plugin: Found {len(imports)} imports")
1131
+ else:
1132
+ functions = []
1133
+ classes = []
1134
+ variables = []
1135
+ imports = []
1136
+
1137
+ # Combine all elements
1138
+ all_elements: list[CodeElement] = []
1139
+ all_elements.extend(functions)
1140
+ all_elements.extend(classes)
1141
+ all_elements.extend(variables)
1142
+ all_elements.extend(imports)
1143
+ log_debug(f"Java Plugin: Total elements: {len(all_elements)}")
1144
+
1145
+ return AnalysisResult(
1146
+ file_path=file_path,
1147
+ language="java",
1148
+ line_count=len(source_code.splitlines()),
1149
+ elements=all_elements,
1150
+ node_count=(
1151
+ parse_result.tree.root_node.child_count if parse_result.tree else 0
1152
+ ),
1153
+ query_results={},
1154
+ source_code=source_code,
1155
+ success=True,
1156
+ error_message=None,
1157
+ )
1158
+
1159
+ except Exception as e:
1160
+ log_error(f"Failed to analyze Java file {file_path}: {e}")
1161
+ import traceback
1162
+
1163
+ log_error(f"Java Plugin traceback: {traceback.format_exc()}")
1164
+ return AnalysisResult(
1165
+ file_path=file_path,
1166
+ language="java",
1167
+ line_count=0,
1168
+ elements=[],
1169
+ node_count=0,
1170
+ query_results={},
1171
+ source_code="",
1172
+ success=False,
1173
+ error_message=str(e),
1174
+ )