tree-sitter-analyzer 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (38) hide show
  1. tree_sitter_analyzer/__init__.py +1 -3
  2. tree_sitter_analyzer/__main__.py +2 -2
  3. tree_sitter_analyzer/cli/commands/default_command.py +1 -1
  4. tree_sitter_analyzer/cli/commands/query_command.py +5 -5
  5. tree_sitter_analyzer/cli/commands/table_command.py +3 -3
  6. tree_sitter_analyzer/cli/info_commands.py +14 -13
  7. tree_sitter_analyzer/cli_main.py +49 -30
  8. tree_sitter_analyzer/core/analysis_engine.py +21 -21
  9. tree_sitter_analyzer/core/cache_service.py +31 -31
  10. tree_sitter_analyzer/core/query.py +502 -502
  11. tree_sitter_analyzer/encoding_utils.py +5 -2
  12. tree_sitter_analyzer/file_handler.py +3 -3
  13. tree_sitter_analyzer/formatters/base_formatter.py +18 -18
  14. tree_sitter_analyzer/formatters/formatter_factory.py +15 -15
  15. tree_sitter_analyzer/formatters/java_formatter.py +291 -291
  16. tree_sitter_analyzer/formatters/python_formatter.py +259 -259
  17. tree_sitter_analyzer/interfaces/cli_adapter.py +32 -32
  18. tree_sitter_analyzer/interfaces/mcp_adapter.py +2 -2
  19. tree_sitter_analyzer/language_detector.py +398 -398
  20. tree_sitter_analyzer/language_loader.py +224 -224
  21. tree_sitter_analyzer/languages/java_plugin.py +1174 -1174
  22. tree_sitter_analyzer/languages/python_plugin.py +10 -2
  23. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +555 -555
  24. tree_sitter_analyzer/models.py +470 -470
  25. tree_sitter_analyzer/output_manager.py +8 -10
  26. tree_sitter_analyzer/plugins/base.py +33 -0
  27. tree_sitter_analyzer/queries/java.py +78 -78
  28. tree_sitter_analyzer/queries/javascript.py +7 -7
  29. tree_sitter_analyzer/queries/python.py +18 -18
  30. tree_sitter_analyzer/queries/typescript.py +12 -12
  31. tree_sitter_analyzer/query_loader.py +13 -13
  32. tree_sitter_analyzer/table_formatter.py +20 -18
  33. tree_sitter_analyzer/utils.py +1 -1
  34. {tree_sitter_analyzer-0.4.0.dist-info → tree_sitter_analyzer-0.6.0.dist-info}/METADATA +11 -11
  35. {tree_sitter_analyzer-0.4.0.dist-info → tree_sitter_analyzer-0.6.0.dist-info}/RECORD +37 -38
  36. tree_sitter_analyzer/java_analyzer.py +0 -187
  37. {tree_sitter_analyzer-0.4.0.dist-info → tree_sitter_analyzer-0.6.0.dist-info}/WHEEL +0 -0
  38. {tree_sitter_analyzer-0.4.0.dist-info → tree_sitter_analyzer-0.6.0.dist-info}/entry_points.txt +0 -0
@@ -1,1174 +1,1174 @@
1
- #!/usr/bin/env python3
2
- """
3
- Java Language Plugin
4
-
5
- Provides Java-specific parsing and element extraction functionality.
6
- Migrated from AdvancedAnalyzer implementation for future independence.
7
- """
8
-
9
- import re
10
- from typing import TYPE_CHECKING, Any, Optional
11
-
12
- if TYPE_CHECKING:
13
- import tree_sitter
14
-
15
- from ..core.analysis_engine import AnalysisRequest
16
- from ..models import AnalysisResult
17
-
18
- from ..encoding_utils import extract_text_slice, safe_encode
19
- from ..models import Class, CodeElement, Function, Import, Package, Variable
20
- from ..plugins.base import ElementExtractor, LanguagePlugin
21
- from ..utils import log_debug, log_error, log_warning
22
-
23
-
24
- class JavaElementExtractor(ElementExtractor):
25
- """Java-specific element extractor with AdvancedAnalyzer implementation"""
26
-
27
- def __init__(self) -> None:
28
- """Initialize the Java element extractor."""
29
- self.current_package: str = ""
30
- self.current_file: str = ""
31
- self.source_code: str = ""
32
- self.content_lines: list[str] = []
33
- self.imports: list[str] = []
34
-
35
- # Performance optimization caches (from AdvancedAnalyzer)
36
- self._node_text_cache: dict[int, str] = {}
37
- self._processed_nodes: set[int] = set()
38
- self._element_cache: dict[tuple[int, str], Any] = {}
39
- self._file_encoding: str | None = None
40
- self._annotation_cache: dict[int, list[dict[str, Any]]] = {}
41
- self._signature_cache: dict[int, str] = {}
42
-
43
- # Extracted annotations for cross-referencing
44
- self.annotations: list[dict[str, Any]] = []
45
-
46
- def extract_annotations(
47
- self, tree: "tree_sitter.Tree", source_code: str
48
- ) -> list[dict[str, Any]]:
49
- """Extract Java annotations using AdvancedAnalyzer implementation"""
50
- self.source_code = source_code
51
- self.content_lines = source_code.split("\n")
52
- self._reset_caches()
53
-
54
- annotations: list[dict[str, Any]] = []
55
-
56
- # Use AdvancedAnalyzer's optimized traversal for annotations
57
- extractors = {
58
- "annotation": self._extract_annotation_optimized,
59
- "marker_annotation": self._extract_annotation_optimized,
60
- }
61
-
62
- self._traverse_and_extract_iterative(
63
- tree.root_node, extractors, annotations, "annotation"
64
- )
65
-
66
- # Store annotations for cross-referencing
67
- self.annotations = annotations
68
-
69
- log_debug(f"Extracted {len(annotations)} annotations")
70
- return annotations
71
-
72
- def extract_functions(
73
- self, tree: "tree_sitter.Tree", source_code: str
74
- ) -> list[Function]:
75
- """Extract Java method definitions using AdvancedAnalyzer implementation"""
76
- self.source_code = source_code
77
- self.content_lines = source_code.split("\n")
78
- self._reset_caches()
79
-
80
- functions: list[Function] = []
81
-
82
- # Use AdvancedAnalyzer's optimized traversal
83
- extractors = {
84
- "method_declaration": self._extract_method_optimized,
85
- "constructor_declaration": self._extract_method_optimized,
86
- }
87
-
88
- self._traverse_and_extract_iterative(
89
- tree.root_node, extractors, functions, "method"
90
- )
91
-
92
- log_debug(f"Extracted {len(functions)} methods")
93
- return functions
94
-
95
- def extract_classes(
96
- self, tree: "tree_sitter.Tree", source_code: str
97
- ) -> list[Class]:
98
- """Extract Java class definitions using AdvancedAnalyzer implementation"""
99
- self.source_code = source_code
100
- self.content_lines = source_code.split("\n")
101
- self._reset_caches()
102
-
103
- classes: list[Class] = []
104
-
105
- # Use AdvancedAnalyzer's optimized traversal
106
- extractors = {
107
- "class_declaration": self._extract_class_optimized,
108
- "interface_declaration": self._extract_class_optimized,
109
- "enum_declaration": self._extract_class_optimized,
110
- }
111
-
112
- self._traverse_and_extract_iterative(
113
- tree.root_node, extractors, classes, "class"
114
- )
115
-
116
- log_debug(f"Extracted {len(classes)} classes")
117
- return classes
118
-
119
- def extract_variables(
120
- self, tree: "tree_sitter.Tree", source_code: str
121
- ) -> list[Variable]:
122
- """Extract Java field definitions using AdvancedAnalyzer implementation"""
123
- self.source_code = source_code
124
- self.content_lines = source_code.split("\n")
125
- self._reset_caches()
126
-
127
- variables: list[Variable] = []
128
-
129
- # Use AdvancedAnalyzer's optimized traversal
130
- extractors = {
131
- "field_declaration": self._extract_field_optimized,
132
- }
133
-
134
- log_debug("Starting field extraction with iterative traversal")
135
- self._traverse_and_extract_iterative(
136
- tree.root_node, extractors, variables, "field"
137
- )
138
-
139
- log_debug(f"Extracted {len(variables)} fields")
140
- for i, var in enumerate(variables[:3]):
141
- log_debug(f"Field {i}: {var.name} ({var.variable_type})")
142
- return variables
143
-
144
- def extract_imports(
145
- self, tree: "tree_sitter.Tree", source_code: str
146
- ) -> list[Import]:
147
- """Extract Java import statements"""
148
- self.source_code = source_code
149
- self.content_lines = source_code.split("\n")
150
-
151
- imports: list[Import] = []
152
-
153
- # Extract package and imports efficiently (from AdvancedAnalyzer)
154
- for child in tree.root_node.children:
155
- if child.type == "package_declaration":
156
- self._extract_package_info(child)
157
- elif child.type == "import_declaration":
158
- import_info = self._extract_import_info(child, source_code)
159
- if import_info:
160
- imports.append(import_info)
161
- elif child.type in [
162
- "class_declaration",
163
- "interface_declaration",
164
- "enum_declaration",
165
- ]:
166
- # After package and imports come class declarations, so stop
167
- break
168
-
169
- log_debug(f"Extracted {len(imports)} imports")
170
- return imports
171
-
172
- def extract_packages(
173
- self, tree: "tree_sitter.Tree", source_code: str
174
- ) -> list[Package]:
175
- """Extract Java package declarations"""
176
- self.source_code = source_code
177
- self.content_lines = source_code.split("\n")
178
-
179
- packages: list[Package] = []
180
-
181
- # Extract package declaration
182
- for child in tree.root_node.children:
183
- if child.type == "package_declaration":
184
- package_info = self._extract_package_element(child)
185
- if package_info:
186
- packages.append(package_info)
187
- break # Only one package declaration per file
188
-
189
- log_debug(f"Extracted {len(packages)} packages")
190
- return packages
191
-
192
- def _reset_caches(self) -> None:
193
- """Reset performance caches"""
194
- self._node_text_cache.clear()
195
- self._processed_nodes.clear()
196
- self._element_cache.clear()
197
- self._annotation_cache.clear()
198
- self._signature_cache.clear()
199
- self.annotations.clear()
200
-
201
- def _traverse_and_extract_iterative(
202
- self,
203
- root_node: "tree_sitter.Node",
204
- extractors: dict[str, Any],
205
- results: list[Any],
206
- element_type: str,
207
- ) -> None:
208
- """
209
- Iterative node traversal and extraction (from AdvancedAnalyzer)
210
- Uses batch processing for optimal performance
211
- """
212
- if not root_node:
213
- return # type: ignore[unreachable]
214
-
215
- # Target node types for extraction
216
- target_node_types = set(extractors.keys())
217
-
218
- # Container node types that may contain target nodes (from AdvancedAnalyzer)
219
- container_node_types = {
220
- "program",
221
- "class_body",
222
- "interface_body",
223
- "enum_body",
224
- "class_declaration",
225
- "interface_declaration",
226
- "enum_declaration",
227
- "method_declaration",
228
- "constructor_declaration",
229
- "block",
230
- "modifiers", # アノテーションは修飾子に含まれることがある
231
- }
232
-
233
- # Iterative DFS stack: (node, depth)
234
- node_stack = [(root_node, 0)]
235
- processed_nodes = 0
236
- max_depth = 50 # Prevent infinite loops
237
-
238
- # Batch processing containers (from AdvancedAnalyzer)
239
- field_batch = []
240
-
241
- while node_stack:
242
- current_node, depth = node_stack.pop()
243
-
244
- # Safety check for maximum depth
245
- if depth > max_depth:
246
- log_warning(f"Maximum traversal depth ({max_depth}) exceeded")
247
- continue
248
-
249
- processed_nodes += 1
250
- node_type = current_node.type
251
-
252
- # Early termination: skip nodes that don't contain target elements
253
- if (
254
- depth > 0
255
- and node_type not in target_node_types
256
- and node_type not in container_node_types
257
- ):
258
- continue
259
-
260
- # Collect target nodes for batch processing (from AdvancedAnalyzer)
261
- if node_type in target_node_types:
262
- if element_type == "field" and node_type == "field_declaration":
263
- field_batch.append(current_node)
264
- else:
265
- # Process non-field elements immediately
266
- node_id = id(current_node)
267
-
268
- # Skip if already processed
269
- if node_id in self._processed_nodes:
270
- continue
271
-
272
- # Check element cache first
273
- cache_key = (node_id, element_type)
274
- if cache_key in self._element_cache:
275
- element = self._element_cache[cache_key]
276
- if element:
277
- if isinstance(element, list):
278
- results.extend(element)
279
- else:
280
- results.append(element)
281
- self._processed_nodes.add(node_id)
282
- continue
283
-
284
- # Extract and cache
285
- extractor = extractors.get(node_type)
286
- if extractor:
287
- element = extractor(current_node)
288
- self._element_cache[cache_key] = element
289
- if element:
290
- if isinstance(element, list):
291
- results.extend(element)
292
- else:
293
- results.append(element)
294
- self._processed_nodes.add(node_id)
295
-
296
- # Add children to stack (reversed for correct DFS traversal)
297
- if current_node.children:
298
- for child in reversed(current_node.children):
299
- node_stack.append((child, depth + 1))
300
-
301
- # Process field batch when it reaches optimal size (from AdvancedAnalyzer)
302
- if len(field_batch) >= 10:
303
- self._process_field_batch(field_batch, extractors, results)
304
- field_batch.clear()
305
-
306
- # Process remaining field batch (from AdvancedAnalyzer)
307
- if field_batch:
308
- self._process_field_batch(field_batch, extractors, results)
309
-
310
- log_debug(f"Iterative traversal processed {processed_nodes} nodes")
311
-
312
- def _process_field_batch(
313
- self, batch: list["tree_sitter.Node"], extractors: dict, results: list[Any]
314
- ) -> None:
315
- """Process field nodes with caching (from AdvancedAnalyzer)"""
316
- for node in batch:
317
- node_id = id(node)
318
-
319
- # Skip if already processed
320
- if node_id in self._processed_nodes:
321
- continue
322
-
323
- # Check element cache first
324
- cache_key = (node_id, "field")
325
- if cache_key in self._element_cache:
326
- elements = self._element_cache[cache_key]
327
- if elements:
328
- if isinstance(elements, list):
329
- results.extend(elements)
330
- else:
331
- results.append(elements)
332
- self._processed_nodes.add(node_id)
333
- continue
334
-
335
- # Extract and cache
336
- extractor = extractors.get(node.type)
337
- if extractor:
338
- elements = extractor(node)
339
- self._element_cache[cache_key] = elements
340
- if elements:
341
- if isinstance(elements, list):
342
- results.extend(elements)
343
- else:
344
- results.append(elements)
345
- self._processed_nodes.add(node_id)
346
-
347
- def _get_node_text_optimized(self, node: "tree_sitter.Node") -> str:
348
- """Get node text with optimized caching (from AdvancedAnalyzer)"""
349
- node_id = id(node)
350
-
351
- # Check cache first
352
- if node_id in self._node_text_cache:
353
- return self._node_text_cache[node_id]
354
-
355
- try:
356
- # Use encoding utilities for text extraction
357
- start_byte = node.start_byte
358
- end_byte = node.end_byte
359
-
360
- encoding = self._file_encoding or "utf-8"
361
- content_bytes = safe_encode("\n".join(self.content_lines), encoding)
362
- text = extract_text_slice(content_bytes, start_byte, end_byte, encoding)
363
-
364
- self._node_text_cache[node_id] = text
365
- return text
366
- except Exception as e:
367
- log_error(f"Error in _get_node_text_optimized: {e}")
368
- # Fallback to simple text extraction
369
- try:
370
- start_point = node.start_point
371
- end_point = node.end_point
372
-
373
- if start_point[0] == end_point[0]:
374
- # Single line
375
- line = self.content_lines[start_point[0]]
376
- return line[start_point[1] : end_point[1]]
377
- else:
378
- # Multiple lines
379
- lines = []
380
- for i in range(start_point[0], end_point[0] + 1):
381
- if i < len(self.content_lines):
382
- line = self.content_lines[i]
383
- if i == start_point[0]:
384
- lines.append(line[start_point[1] :])
385
- elif i == end_point[0]:
386
- lines.append(line[: end_point[1]])
387
- else:
388
- lines.append(line)
389
- return "\n".join(lines)
390
- except Exception as fallback_error:
391
- log_error(f"Fallback text extraction also failed: {fallback_error}")
392
- return ""
393
-
394
- def _extract_class_optimized(self, node: "tree_sitter.Node") -> Class | None:
395
- """Extract class information optimized (from AdvancedAnalyzer)"""
396
- try:
397
- start_line = node.start_point[0] + 1
398
- end_line = node.end_point[0] + 1
399
-
400
- # Extract class name efficiently
401
- class_name = None
402
- for child in node.children:
403
- if child.type == "identifier":
404
- class_name = self._get_node_text_optimized(child)
405
- break
406
-
407
- if not class_name:
408
- return None
409
-
410
- # Determine package name
411
- package_name = self.current_package
412
- full_qualified_name = (
413
- f"{package_name}.{class_name}" if package_name else class_name
414
- )
415
-
416
- # Determine class type (optimized: dictionary lookup)
417
- class_type_map = {
418
- "class_declaration": "class",
419
- "interface_declaration": "interface",
420
- "enum_declaration": "enum",
421
- }
422
- class_type = class_type_map.get(node.type, "class")
423
-
424
- # Extract modifiers efficiently
425
- modifiers = self._extract_modifiers_optimized(node)
426
- visibility = self._determine_visibility(modifiers)
427
-
428
- # Extract superclass and interfaces (optimized: single pass)
429
- extends_class = None
430
- implements_interfaces = []
431
-
432
- for child in node.children:
433
- if child.type == "superclass":
434
- extends_text = self._get_node_text_optimized(child)
435
- match = re.search(r"\b[A-Z]\w*", extends_text)
436
- if match:
437
- extends_class = match.group(0)
438
- elif child.type == "super_interfaces":
439
- implements_text = self._get_node_text_optimized(child)
440
- implements_interfaces = re.findall(r"\b[A-Z]\w*", implements_text)
441
-
442
- # Extract annotations for this class
443
- class_annotations = self._find_annotations_for_line_cached(start_line)
444
-
445
- # Check if this is a nested class
446
- is_nested = self._is_nested_class(node)
447
- parent_class = self._find_parent_class(node) if is_nested else None
448
-
449
- # Extract raw text
450
- start_line_idx = max(0, start_line - 1)
451
- end_line_idx = min(len(self.content_lines), end_line)
452
- raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
453
-
454
- return Class(
455
- name=class_name,
456
- start_line=start_line,
457
- end_line=end_line,
458
- raw_text=raw_text,
459
- language="java",
460
- class_type=class_type,
461
- full_qualified_name=full_qualified_name,
462
- package_name=package_name,
463
- superclass=extends_class,
464
- interfaces=implements_interfaces,
465
- modifiers=modifiers,
466
- visibility=visibility,
467
- # Java-specific detailed information
468
- annotations=class_annotations,
469
- is_nested=is_nested,
470
- parent_class=parent_class,
471
- extends_class=extends_class, # Alias for superclass
472
- implements_interfaces=implements_interfaces, # Alias for interfaces
473
- )
474
- except (AttributeError, ValueError, TypeError) as e:
475
- log_debug(f"Failed to extract class info: {e}")
476
- return None
477
- except Exception as e:
478
- log_error(f"Unexpected error in class extraction: {e}")
479
- return None
480
-
481
- def _extract_method_optimized(self, node: "tree_sitter.Node") -> Function | None:
482
- """Extract method information optimized (from AdvancedAnalyzer)"""
483
- try:
484
- start_line = node.start_point[0] + 1
485
- end_line = node.end_point[0] + 1
486
-
487
- # Extract method information efficiently
488
- method_info = self._parse_method_signature_optimized(node)
489
- if not method_info:
490
- return None
491
-
492
- method_name, return_type, parameters, modifiers, throws = method_info
493
- is_constructor = node.type == "constructor_declaration"
494
- visibility = self._determine_visibility(modifiers)
495
-
496
- # Extract annotations for this method
497
- method_annotations = self._find_annotations_for_line_cached(start_line)
498
-
499
- # Calculate complexity score
500
- complexity_score = self._calculate_complexity_optimized(node)
501
-
502
- # Extract JavaDoc
503
- javadoc = self._extract_javadoc_for_line(start_line)
504
-
505
- # Extract raw text
506
- start_line_idx = max(0, start_line - 1)
507
- end_line_idx = min(len(self.content_lines), end_line)
508
- raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
509
-
510
- return Function(
511
- name=method_name,
512
- start_line=start_line,
513
- end_line=end_line,
514
- raw_text=raw_text,
515
- language="java",
516
- parameters=parameters,
517
- return_type=return_type if not is_constructor else "void",
518
- modifiers=modifiers,
519
- is_static="static" in modifiers,
520
- is_private="private" in modifiers,
521
- is_public="public" in modifiers,
522
- is_constructor=is_constructor,
523
- visibility=visibility,
524
- docstring=javadoc,
525
- # Java-specific detailed information
526
- annotations=method_annotations,
527
- throws=throws,
528
- complexity_score=complexity_score,
529
- is_abstract="abstract" in modifiers,
530
- is_final="final" in modifiers,
531
- )
532
- except (AttributeError, ValueError, TypeError) as e:
533
- log_debug(f"Failed to extract method info: {e}")
534
- return None
535
- except Exception as e:
536
- log_error(f"Unexpected error in method extraction: {e}")
537
- return None
538
-
539
- def _extract_field_optimized(self, node: "tree_sitter.Node") -> list[Variable]:
540
- """Extract field information optimized (from AdvancedAnalyzer)"""
541
- fields: list[Variable] = []
542
- try:
543
- start_line = node.start_point[0] + 1
544
- end_line = node.end_point[0] + 1
545
-
546
- # Parse field declaration using AdvancedAnalyzer method
547
- field_info = self._parse_field_declaration_optimized(node)
548
- if not field_info:
549
- return fields
550
-
551
- field_type, variable_names, modifiers = field_info
552
- visibility = self._determine_visibility(modifiers)
553
-
554
- # Extract annotations for this field
555
- field_annotations = self._find_annotations_for_line_cached(start_line)
556
-
557
- # Extract JavaDoc for this field
558
- field_javadoc = self._extract_javadoc_for_line(start_line)
559
-
560
- # Create Variable object for each variable (matching AdvancedAnalyzer structure)
561
- for var_name in variable_names:
562
- # Extract raw text
563
- start_line_idx = max(0, start_line - 1)
564
- end_line_idx = min(len(self.content_lines), end_line)
565
- raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
566
-
567
- field = Variable(
568
- name=var_name,
569
- start_line=start_line,
570
- end_line=end_line,
571
- raw_text=raw_text,
572
- language="java",
573
- variable_type=field_type,
574
- modifiers=modifiers,
575
- is_static="static" in modifiers,
576
- is_constant="final" in modifiers,
577
- visibility=visibility,
578
- docstring=field_javadoc,
579
- # Java-specific detailed information
580
- annotations=field_annotations,
581
- is_final="final" in modifiers,
582
- field_type=field_type, # Alias for variable_type
583
- )
584
- fields.append(field)
585
- except (AttributeError, ValueError, TypeError) as e:
586
- log_debug(f"Failed to extract field info: {e}")
587
- except Exception as e:
588
- log_error(f"Unexpected error in field extraction: {e}")
589
-
590
- return fields
591
-
592
- def _parse_method_signature_optimized(
593
- self, node: "tree_sitter.Node"
594
- ) -> tuple[str, str, list[str], list[str], list[str]] | None:
595
- """Parse method signature optimized (from AdvancedAnalyzer)"""
596
- try:
597
- # Extract method name
598
- method_name = None
599
- for child in node.children:
600
- if child.type == "identifier":
601
- method_name = self._get_node_text_optimized(child)
602
- break
603
-
604
- if not method_name:
605
- return None
606
-
607
- # Extract return type
608
- return_type = "void"
609
- for child in node.children:
610
- if child.type in [
611
- "type_identifier",
612
- "void_type",
613
- "primitive_type",
614
- "integral_type",
615
- "boolean_type",
616
- "floating_point_type",
617
- "array_type",
618
- ]:
619
- return_type = self._get_node_text_optimized(child)
620
- break
621
- elif child.type == "generic_type":
622
- return_type = self._get_node_text_optimized(child)
623
- break
624
-
625
- # Extract parameters
626
- parameters = []
627
- for child in node.children:
628
- if child.type == "formal_parameters":
629
- for param in child.children:
630
- if param.type == "formal_parameter":
631
- param_text = self._get_node_text_optimized(param)
632
- parameters.append(param_text)
633
-
634
- # Extract modifiers
635
- modifiers = self._extract_modifiers_optimized(node)
636
-
637
- # Extract throws clause
638
- throws = []
639
- for child in node.children:
640
- if child.type == "throws":
641
- throws_text = self._get_node_text_optimized(child)
642
- exceptions = re.findall(r"\b[A-Z]\w*Exception\b", throws_text)
643
- throws.extend(exceptions)
644
-
645
- return method_name, return_type, parameters, modifiers, throws
646
- except Exception:
647
- return None
648
-
649
- def _parse_field_declaration_optimized(
650
- self, node: "tree_sitter.Node"
651
- ) -> tuple[str, list[str], list[str]] | None:
652
- """Parse field declaration optimized (from AdvancedAnalyzer)"""
653
- try:
654
- # Extract type (exactly as in AdvancedAnalyzer)
655
- field_type = None
656
- for child in node.children:
657
- if child.type in [
658
- "type_identifier",
659
- "primitive_type",
660
- "integral_type",
661
- "generic_type",
662
- "boolean_type",
663
- "floating_point_type",
664
- "array_type",
665
- ]:
666
- field_type = self._get_node_text_optimized(child)
667
- break
668
-
669
- if not field_type:
670
- return None
671
-
672
- # Extract variable names (exactly as in AdvancedAnalyzer)
673
- variable_names = []
674
- for child in node.children:
675
- if child.type == "variable_declarator":
676
- for grandchild in child.children:
677
- if grandchild.type == "identifier":
678
- var_name = self._get_node_text_optimized(grandchild)
679
- variable_names.append(var_name)
680
-
681
- if not variable_names:
682
- return None
683
-
684
- # Extract modifiers (exactly as in AdvancedAnalyzer)
685
- modifiers = self._extract_modifiers_optimized(node)
686
-
687
- return field_type, variable_names, modifiers
688
- except Exception:
689
- return None
690
-
691
- def _extract_modifiers_optimized(self, node: "tree_sitter.Node") -> list[str]:
692
- """Extract modifiers efficiently (from AdvancedAnalyzer)"""
693
- modifiers = []
694
- for child in node.children:
695
- if child.type == "modifiers":
696
- for mod_child in child.children:
697
- if mod_child.type in [
698
- "public",
699
- "private",
700
- "protected",
701
- "static",
702
- "final",
703
- "abstract",
704
- "synchronized",
705
- "volatile",
706
- "transient",
707
- ]:
708
- modifiers.append(mod_child.type)
709
- elif mod_child.type not in ["marker_annotation"]:
710
- mod_text = self._get_node_text_optimized(mod_child)
711
- if mod_text in [
712
- "public",
713
- "private",
714
- "protected",
715
- "static",
716
- "final",
717
- "abstract",
718
- "synchronized",
719
- "volatile",
720
- "transient",
721
- ]:
722
- modifiers.append(mod_text)
723
- return modifiers
724
-
725
- def _extract_package_info(self, node: "tree_sitter.Node") -> None:
726
- """Extract package information (from AdvancedAnalyzer)"""
727
- try:
728
- package_text = self._get_node_text_optimized(node)
729
- match = re.search(r"package\s+([\w.]+)", package_text)
730
- if match:
731
- self.current_package = match.group(1)
732
- except (AttributeError, ValueError, IndexError) as e:
733
- log_debug(f"Failed to extract package info: {e}")
734
- except Exception as e:
735
- log_error(f"Unexpected error in package extraction: {e}")
736
-
737
- def _extract_package_element(self, node: "tree_sitter.Node") -> Package | None:
738
- """Extract package element for inclusion in results"""
739
- try:
740
- package_text = self._get_node_text_optimized(node)
741
- match = re.search(r"package\s+([\w.]+)", package_text)
742
- if match:
743
- package_name = match.group(1)
744
- return Package(
745
- name=package_name,
746
- start_line=node.start_point[0] + 1,
747
- end_line=node.end_point[0] + 1,
748
- raw_text=package_text,
749
- language="java",
750
- )
751
- except (AttributeError, ValueError, IndexError) as e:
752
- log_debug(f"Failed to extract package element: {e}")
753
- except Exception as e:
754
- log_error(f"Unexpected error in package element extraction: {e}")
755
- return None
756
-
757
- def _determine_visibility(self, modifiers: list[str]) -> str:
758
- """Determine visibility from modifiers"""
759
- if "public" in modifiers:
760
- return "public"
761
- elif "private" in modifiers:
762
- return "private"
763
- elif "protected" in modifiers:
764
- return "protected"
765
- else:
766
- return "package" # Default package visibility
767
-
768
- def _find_annotations_for_line_cached(
769
- self, target_line: int
770
- ) -> list[dict[str, Any]]:
771
- """Find annotations for specified line with caching (from AdvancedAnalyzer)"""
772
- if target_line in self._annotation_cache:
773
- return self._annotation_cache[target_line]
774
-
775
- result_annotations = []
776
- for annotation in self.annotations:
777
- line_distance = target_line - annotation.get("end_line", 0)
778
- if 1 <= line_distance <= 5:
779
- result_annotations.append(annotation)
780
-
781
- self._annotation_cache[target_line] = result_annotations
782
- return result_annotations
783
-
784
- def _calculate_complexity_optimized(self, node: "tree_sitter.Node") -> int:
785
- """Calculate cyclomatic complexity efficiently (from AdvancedAnalyzer)"""
786
- complexity = 1
787
- try:
788
- node_text = self._get_node_text_optimized(node).lower()
789
- keywords = ["if", "while", "for", "catch", "case", "switch"]
790
- for keyword in keywords:
791
- complexity += node_text.count(keyword)
792
- except (AttributeError, TypeError) as e:
793
- log_debug(f"Failed to calculate complexity: {e}")
794
- except Exception as e:
795
- log_error(f"Unexpected error in complexity calculation: {e}")
796
- return complexity
797
-
798
- def _extract_javadoc_for_line(self, target_line: int) -> str | None:
799
- """Extract JavaDoc comment immediately before the specified line"""
800
- try:
801
- if not self.content_lines or target_line <= 1:
802
- return None
803
-
804
- # Search backwards from target_line
805
- javadoc_lines = []
806
- current_line = target_line - 1
807
-
808
- # Skip empty lines
809
- while current_line > 0:
810
- line = self.content_lines[current_line - 1].strip()
811
- if line:
812
- break
813
- current_line -= 1
814
-
815
- # Check for JavaDoc end
816
- if current_line > 0:
817
- line = self.content_lines[current_line - 1].strip()
818
- if line.endswith("*/"):
819
- # This might be a JavaDoc comment
820
- javadoc_lines.append(self.content_lines[current_line - 1])
821
- current_line -= 1
822
-
823
- # Collect JavaDoc content
824
- while current_line > 0:
825
- line_content = self.content_lines[current_line - 1]
826
- line_stripped = line_content.strip()
827
- javadoc_lines.append(line_content)
828
-
829
- if line_stripped.startswith("/**"):
830
- # Found the start of JavaDoc
831
- javadoc_lines.reverse()
832
- javadoc_text = "\n".join(javadoc_lines)
833
-
834
- # Clean up the JavaDoc
835
- return self._clean_javadoc(javadoc_text)
836
- current_line -= 1
837
-
838
- return None
839
-
840
- except Exception as e:
841
- log_debug(f"Failed to extract JavaDoc: {e}")
842
- return None
843
-
844
- def _clean_javadoc(self, javadoc_text: str) -> str:
845
- """Clean JavaDoc text by removing comment markers"""
846
- if not javadoc_text:
847
- return ""
848
-
849
- lines = javadoc_text.split("\n")
850
- cleaned_lines = []
851
-
852
- for line in lines:
853
- # Remove leading/trailing whitespace
854
- line = line.strip()
855
-
856
- # Remove comment markers
857
- if line.startswith("/**"):
858
- line = line[3:].strip()
859
- elif line.startswith("*/"):
860
- line = line[2:].strip()
861
- elif line.startswith("*"):
862
- line = line[1:].strip()
863
-
864
- if line: # Only add non-empty lines
865
- cleaned_lines.append(line)
866
-
867
- return " ".join(cleaned_lines) if cleaned_lines else ""
868
-
869
- def _is_nested_class(self, node: "tree_sitter.Node") -> bool:
870
- """Check if this is a nested class (from AdvancedAnalyzer)"""
871
- current = node.parent
872
- while current:
873
- if current.type in [
874
- "class_declaration",
875
- "interface_declaration",
876
- "enum_declaration",
877
- ]:
878
- return True
879
- current = current.parent
880
- return False
881
-
882
- def _find_parent_class(self, node: "tree_sitter.Node") -> str | None:
883
- """Find parent class name (from AdvancedAnalyzer)"""
884
- current = node.parent
885
- while current:
886
- if current.type in [
887
- "class_declaration",
888
- "interface_declaration",
889
- "enum_declaration",
890
- ]:
891
- return self._extract_class_name(current)
892
- current = current.parent
893
- return None
894
-
895
- def _extract_class_name(self, node: "tree_sitter.Node") -> str | None:
896
- """Extract class name from node (from AdvancedAnalyzer)"""
897
- for child in node.children:
898
- if child.type == "identifier":
899
- return self._get_node_text_optimized(child)
900
- return None
901
-
902
- def _extract_annotation_optimized(
903
- self, node: "tree_sitter.Node"
904
- ) -> dict[str, Any] | None:
905
- """Extract annotation information optimized (from AdvancedAnalyzer)"""
906
- try:
907
- start_line = node.start_point[0] + 1
908
- end_line = node.end_point[0] + 1
909
- raw_text = self._get_node_text_optimized(node)
910
-
911
- # Extract annotation name efficiently
912
- name_match = re.search(r"@(\w+)", raw_text)
913
- if not name_match:
914
- return None
915
-
916
- annotation_name = name_match.group(1)
917
-
918
- # Extract parameters efficiently
919
- parameters = []
920
- param_match = re.search(r"\((.*?)\)", raw_text, re.DOTALL)
921
- if param_match:
922
- param_text = param_match.group(1).strip()
923
- if param_text:
924
- # Simple parameter parsing
925
- if "=" in param_text:
926
- parameters = [
927
- p.strip() for p in re.split(r",(?![^()]*\))", param_text)
928
- ]
929
- else:
930
- parameters = [param_text]
931
-
932
- return {
933
- "name": annotation_name,
934
- "parameters": parameters,
935
- "start_line": start_line,
936
- "end_line": end_line,
937
- "raw_text": raw_text,
938
- }
939
- except (AttributeError, IndexError, ValueError) as e:
940
- log_debug(f"Failed to extract annotation from node: {e}")
941
- return None
942
- except Exception as e:
943
- log_error(f"Unexpected exception in annotation extraction: {e}")
944
- return None
945
-
946
- def _extract_import_info(
947
- self, node: "tree_sitter.Node", source_code: str
948
- ) -> Import | None:
949
- """Extract import information (from AdvancedAnalyzer)"""
950
- try:
951
- import_text = self._get_node_text_optimized(node)
952
- # Simple approach: get everything until semicolon then process
953
- import_content = import_text.strip()
954
- if import_content.endswith(";"):
955
- import_content = import_content[:-1]
956
-
957
- if "static" in import_content:
958
- # Static import
959
- static_match = re.search(r"import\s+static\s+([\w.]+)", import_content)
960
- if static_match:
961
- import_name = static_match.group(1)
962
- # Handle wildcard case
963
- if import_content.endswith(".*"):
964
- import_name = import_name.replace(".*", "")
965
- # For static wildcard, remove last element
966
- parts = import_name.split(".")
967
- if len(parts) > 1:
968
- import_name = ".".join(parts[:-1])
969
-
970
- return Import(
971
- name=import_name,
972
- start_line=node.start_point[0] + 1,
973
- end_line=node.end_point[0] + 1,
974
- raw_text=import_text,
975
- language="java",
976
- module_name=import_name,
977
- is_static=True,
978
- is_wildcard=import_content.endswith(".*"),
979
- import_statement=import_content,
980
- )
981
- else:
982
- # Normal import
983
- normal_match = re.search(r"import\s+([\w.]+)", import_content)
984
- if normal_match:
985
- import_name = normal_match.group(1)
986
- # Handle wildcard case
987
- if import_content.endswith(".*"):
988
- if import_name.endswith(".*"):
989
- import_name = import_name[:-2] # Remove trailing .*
990
- elif import_name.endswith("."):
991
- import_name = import_name[:-1] # Remove trailing .
992
-
993
- return Import(
994
- name=import_name,
995
- start_line=node.start_point[0] + 1,
996
- end_line=node.end_point[0] + 1,
997
- raw_text=import_text,
998
- language="java",
999
- module_name=import_name,
1000
- is_static=False,
1001
- is_wildcard=import_content.endswith(".*"),
1002
- import_statement=import_content,
1003
- )
1004
- except (AttributeError, ValueError, IndexError) as e:
1005
- log_debug(f"Failed to extract import info: {e}")
1006
- except Exception as e:
1007
- log_error(f"Unexpected error in import extraction: {e}")
1008
- return None
1009
-
1010
-
1011
- class JavaPlugin(LanguagePlugin):
1012
- """Java language plugin for the new architecture"""
1013
-
1014
- def __init__(self) -> None:
1015
- """Initialize the Java plugin"""
1016
- super().__init__()
1017
- self._language_cache: tree_sitter.Language | None = None
1018
-
1019
- def get_language_name(self) -> str:
1020
- """Return the name of the programming language this plugin supports"""
1021
- return "java"
1022
-
1023
- def get_file_extensions(self) -> list[str]:
1024
- """Return list of file extensions this plugin supports"""
1025
- return [".java", ".jsp", ".jspx"]
1026
-
1027
- def create_extractor(self) -> ElementExtractor:
1028
- """Create and return an element extractor for this language"""
1029
- return JavaElementExtractor()
1030
-
1031
- def get_tree_sitter_language(self) -> Optional["tree_sitter.Language"]:
1032
- """Get the Tree-sitter language object for Java"""
1033
- if self._language_cache is None:
1034
- try:
1035
- import tree_sitter_java as tsjava
1036
-
1037
- self._language_cache = tsjava.language() # type: ignore
1038
- except ImportError:
1039
- log_error("tree-sitter-java not available")
1040
- return None
1041
- except Exception as e:
1042
- log_error(f"Failed to load Java language: {e}")
1043
- return None
1044
- return self._language_cache
1045
-
1046
- def get_supported_queries(self) -> list[str]:
1047
- """Get list of supported query names for this language"""
1048
- return ["class", "method", "field", "import"]
1049
-
1050
- def is_applicable(self, file_path: str) -> bool:
1051
- """Check if this plugin is applicable for the given file"""
1052
- return any(
1053
- file_path.lower().endswith(ext.lower())
1054
- for ext in self.get_file_extensions()
1055
- )
1056
-
1057
- def get_plugin_info(self) -> dict:
1058
- """Get information about this plugin"""
1059
- return {
1060
- "name": "Java Plugin",
1061
- "language": self.get_language_name(),
1062
- "extensions": self.get_file_extensions(),
1063
- "version": "2.0.0",
1064
- "supported_queries": self.get_supported_queries(),
1065
- }
1066
-
1067
- async def analyze_file(
1068
- self, file_path: str, request: "AnalysisRequest"
1069
- ) -> "AnalysisResult":
1070
- """
1071
- Analyze a Java file and return analysis results.
1072
-
1073
- Args:
1074
- file_path: Path to the Java file to analyze
1075
- request: Analysis request object
1076
-
1077
- Returns:
1078
- AnalysisResult object containing the analysis results
1079
- """
1080
- try:
1081
- from ..core.parser import Parser
1082
- from ..models import AnalysisResult
1083
-
1084
- log_debug(f"Java Plugin: Starting analysis of {file_path}")
1085
-
1086
- # Read file content
1087
- with open(file_path, encoding="utf-8") as f:
1088
- source_code = f.read()
1089
-
1090
- log_debug(f"Java Plugin: Read {len(source_code)} characters from file")
1091
-
1092
- # Parse the file
1093
- parser = Parser()
1094
- parse_result = parser.parse_code(source_code, "java")
1095
-
1096
- log_debug(f"Java Plugin: Parse result success: {parse_result.success}")
1097
-
1098
- if not parse_result.success:
1099
- log_error(f"Java Plugin: Parse failed: {parse_result.error_message}")
1100
- return AnalysisResult(
1101
- file_path=file_path,
1102
- language="java",
1103
- line_count=len(source_code.splitlines()),
1104
- elements=[],
1105
- node_count=0,
1106
- query_results={},
1107
- source_code=source_code,
1108
- success=False,
1109
- error_message=parse_result.error_message,
1110
- )
1111
-
1112
- # Extract elements
1113
- extractor = self.create_extractor()
1114
-
1115
- if parse_result.tree:
1116
- log_debug("Java Plugin: Extracting functions...")
1117
- functions = extractor.extract_functions(parse_result.tree, source_code)
1118
- log_debug(f"Java Plugin: Found {len(functions)} functions")
1119
-
1120
- log_debug("Java Plugin: Extracting classes...")
1121
- classes = extractor.extract_classes(parse_result.tree, source_code)
1122
- log_debug(f"Java Plugin: Found {len(classes)} classes")
1123
-
1124
- log_debug("Java Plugin: Extracting variables...")
1125
- variables = extractor.extract_variables(parse_result.tree, source_code)
1126
- log_debug(f"Java Plugin: Found {len(variables)} variables")
1127
-
1128
- log_debug("Java Plugin: Extracting imports...")
1129
- imports = extractor.extract_imports(parse_result.tree, source_code)
1130
- log_debug(f"Java Plugin: Found {len(imports)} imports")
1131
- else:
1132
- functions = []
1133
- classes = []
1134
- variables = []
1135
- imports = []
1136
-
1137
- # Combine all elements
1138
- all_elements: list[CodeElement] = []
1139
- all_elements.extend(functions)
1140
- all_elements.extend(classes)
1141
- all_elements.extend(variables)
1142
- all_elements.extend(imports)
1143
- log_debug(f"Java Plugin: Total elements: {len(all_elements)}")
1144
-
1145
- return AnalysisResult(
1146
- file_path=file_path,
1147
- language="java",
1148
- line_count=len(source_code.splitlines()),
1149
- elements=all_elements,
1150
- node_count=(
1151
- parse_result.tree.root_node.child_count if parse_result.tree else 0
1152
- ),
1153
- query_results={},
1154
- source_code=source_code,
1155
- success=True,
1156
- error_message=None,
1157
- )
1158
-
1159
- except Exception as e:
1160
- log_error(f"Failed to analyze Java file {file_path}: {e}")
1161
- import traceback
1162
-
1163
- log_error(f"Java Plugin traceback: {traceback.format_exc()}")
1164
- return AnalysisResult(
1165
- file_path=file_path,
1166
- language="java",
1167
- line_count=0,
1168
- elements=[],
1169
- node_count=0,
1170
- query_results={},
1171
- source_code="",
1172
- success=False,
1173
- error_message=str(e),
1174
- )
1
+ #!/usr/bin/env python3
2
+ """
3
+ Java Language Plugin
4
+
5
+ Provides Java-specific parsing and element extraction functionality.
6
+ Migrated from AdvancedAnalyzer implementation for future independence.
7
+ """
8
+
9
+ import re
10
+ from typing import TYPE_CHECKING, Any, Optional
11
+
12
+ if TYPE_CHECKING:
13
+ import tree_sitter
14
+
15
+ from ..core.analysis_engine import AnalysisRequest
16
+ from ..models import AnalysisResult
17
+
18
+ from ..encoding_utils import extract_text_slice, safe_encode
19
+ from ..models import Class, CodeElement, Function, Import, Package, Variable
20
+ from ..plugins.base import ElementExtractor, LanguagePlugin
21
+ from ..utils import log_debug, log_error, log_warning
22
+
23
+
24
+ class JavaElementExtractor(ElementExtractor):
25
+ """Java-specific element extractor with AdvancedAnalyzer implementation"""
26
+
27
+ def __init__(self) -> None:
28
+ """Initialize the Java element extractor."""
29
+ self.current_package: str = ""
30
+ self.current_file: str = ""
31
+ self.source_code: str = ""
32
+ self.content_lines: list[str] = []
33
+ self.imports: list[str] = []
34
+
35
+ # Performance optimization caches (from AdvancedAnalyzer)
36
+ self._node_text_cache: dict[int, str] = {}
37
+ self._processed_nodes: set[int] = set()
38
+ self._element_cache: dict[tuple[int, str], Any] = {}
39
+ self._file_encoding: str | None = None
40
+ self._annotation_cache: dict[int, list[dict[str, Any]]] = {}
41
+ self._signature_cache: dict[int, str] = {}
42
+
43
+ # Extracted annotations for cross-referencing
44
+ self.annotations: list[dict[str, Any]] = []
45
+
46
+ def extract_annotations(
47
+ self, tree: "tree_sitter.Tree", source_code: str
48
+ ) -> list[dict[str, Any]]:
49
+ """Extract Java annotations using AdvancedAnalyzer implementation"""
50
+ self.source_code = source_code
51
+ self.content_lines = source_code.split("\n")
52
+ self._reset_caches()
53
+
54
+ annotations: list[dict[str, Any]] = []
55
+
56
+ # Use AdvancedAnalyzer's optimized traversal for annotations
57
+ extractors = {
58
+ "annotation": self._extract_annotation_optimized,
59
+ "marker_annotation": self._extract_annotation_optimized,
60
+ }
61
+
62
+ self._traverse_and_extract_iterative(
63
+ tree.root_node, extractors, annotations, "annotation"
64
+ )
65
+
66
+ # Store annotations for cross-referencing
67
+ self.annotations = annotations
68
+
69
+ log_debug(f"Extracted {len(annotations)} annotations")
70
+ return annotations
71
+
72
+ def extract_functions(
73
+ self, tree: "tree_sitter.Tree", source_code: str
74
+ ) -> list[Function]:
75
+ """Extract Java method definitions using AdvancedAnalyzer implementation"""
76
+ self.source_code = source_code
77
+ self.content_lines = source_code.split("\n")
78
+ self._reset_caches()
79
+
80
+ functions: list[Function] = []
81
+
82
+ # Use AdvancedAnalyzer's optimized traversal
83
+ extractors = {
84
+ "method_declaration": self._extract_method_optimized,
85
+ "constructor_declaration": self._extract_method_optimized,
86
+ }
87
+
88
+ self._traverse_and_extract_iterative(
89
+ tree.root_node, extractors, functions, "method"
90
+ )
91
+
92
+ log_debug(f"Extracted {len(functions)} methods")
93
+ return functions
94
+
95
+ def extract_classes(
96
+ self, tree: "tree_sitter.Tree", source_code: str
97
+ ) -> list[Class]:
98
+ """Extract Java class definitions using AdvancedAnalyzer implementation"""
99
+ self.source_code = source_code
100
+ self.content_lines = source_code.split("\n")
101
+ self._reset_caches()
102
+
103
+ classes: list[Class] = []
104
+
105
+ # Use AdvancedAnalyzer's optimized traversal
106
+ extractors = {
107
+ "class_declaration": self._extract_class_optimized,
108
+ "interface_declaration": self._extract_class_optimized,
109
+ "enum_declaration": self._extract_class_optimized,
110
+ }
111
+
112
+ self._traverse_and_extract_iterative(
113
+ tree.root_node, extractors, classes, "class"
114
+ )
115
+
116
+ log_debug(f"Extracted {len(classes)} classes")
117
+ return classes
118
+
119
+ def extract_variables(
120
+ self, tree: "tree_sitter.Tree", source_code: str
121
+ ) -> list[Variable]:
122
+ """Extract Java field definitions using AdvancedAnalyzer implementation"""
123
+ self.source_code = source_code
124
+ self.content_lines = source_code.split("\n")
125
+ self._reset_caches()
126
+
127
+ variables: list[Variable] = []
128
+
129
+ # Use AdvancedAnalyzer's optimized traversal
130
+ extractors = {
131
+ "field_declaration": self._extract_field_optimized,
132
+ }
133
+
134
+ log_debug("Starting field extraction with iterative traversal")
135
+ self._traverse_and_extract_iterative(
136
+ tree.root_node, extractors, variables, "field"
137
+ )
138
+
139
+ log_debug(f"Extracted {len(variables)} fields")
140
+ for i, var in enumerate(variables[:3]):
141
+ log_debug(f"Field {i}: {var.name} ({var.variable_type})")
142
+ return variables
143
+
144
+ def extract_imports(
145
+ self, tree: "tree_sitter.Tree", source_code: str
146
+ ) -> list[Import]:
147
+ """Extract Java import statements"""
148
+ self.source_code = source_code
149
+ self.content_lines = source_code.split("\n")
150
+
151
+ imports: list[Import] = []
152
+
153
+ # Extract package and imports efficiently (from AdvancedAnalyzer)
154
+ for child in tree.root_node.children:
155
+ if child.type == "package_declaration":
156
+ self._extract_package_info(child)
157
+ elif child.type == "import_declaration":
158
+ import_info = self._extract_import_info(child, source_code)
159
+ if import_info:
160
+ imports.append(import_info)
161
+ elif child.type in [
162
+ "class_declaration",
163
+ "interface_declaration",
164
+ "enum_declaration",
165
+ ]:
166
+ # After package and imports come class declarations, so stop
167
+ break
168
+
169
+ log_debug(f"Extracted {len(imports)} imports")
170
+ return imports
171
+
172
+ def extract_packages(
173
+ self, tree: "tree_sitter.Tree", source_code: str
174
+ ) -> list[Package]:
175
+ """Extract Java package declarations"""
176
+ self.source_code = source_code
177
+ self.content_lines = source_code.split("\n")
178
+
179
+ packages: list[Package] = []
180
+
181
+ # Extract package declaration
182
+ for child in tree.root_node.children:
183
+ if child.type == "package_declaration":
184
+ package_info = self._extract_package_element(child)
185
+ if package_info:
186
+ packages.append(package_info)
187
+ break # Only one package declaration per file
188
+
189
+ log_debug(f"Extracted {len(packages)} packages")
190
+ return packages
191
+
192
+ def _reset_caches(self) -> None:
193
+ """Reset performance caches"""
194
+ self._node_text_cache.clear()
195
+ self._processed_nodes.clear()
196
+ self._element_cache.clear()
197
+ self._annotation_cache.clear()
198
+ self._signature_cache.clear()
199
+ self.annotations.clear()
200
+
201
+ def _traverse_and_extract_iterative(
202
+ self,
203
+ root_node: "tree_sitter.Node",
204
+ extractors: dict[str, Any],
205
+ results: list[Any],
206
+ element_type: str,
207
+ ) -> None:
208
+ """
209
+ Iterative node traversal and extraction (from AdvancedAnalyzer)
210
+ Uses batch processing for optimal performance
211
+ """
212
+ if not root_node:
213
+ return # type: ignore[unreachable]
214
+
215
+ # Target node types for extraction
216
+ target_node_types = set(extractors.keys())
217
+
218
+ # Container node types that may contain target nodes (from AdvancedAnalyzer)
219
+ container_node_types = {
220
+ "program",
221
+ "class_body",
222
+ "interface_body",
223
+ "enum_body",
224
+ "class_declaration",
225
+ "interface_declaration",
226
+ "enum_declaration",
227
+ "method_declaration",
228
+ "constructor_declaration",
229
+ "block",
230
+ "modifiers", # アノテーションは修飾子に含まれることがある
231
+ }
232
+
233
+ # Iterative DFS stack: (node, depth)
234
+ node_stack = [(root_node, 0)]
235
+ processed_nodes = 0
236
+ max_depth = 50 # Prevent infinite loops
237
+
238
+ # Batch processing containers (from AdvancedAnalyzer)
239
+ field_batch = []
240
+
241
+ while node_stack:
242
+ current_node, depth = node_stack.pop()
243
+
244
+ # Safety check for maximum depth
245
+ if depth > max_depth:
246
+ log_warning(f"Maximum traversal depth ({max_depth}) exceeded")
247
+ continue
248
+
249
+ processed_nodes += 1
250
+ node_type = current_node.type
251
+
252
+ # Early termination: skip nodes that don't contain target elements
253
+ if (
254
+ depth > 0
255
+ and node_type not in target_node_types
256
+ and node_type not in container_node_types
257
+ ):
258
+ continue
259
+
260
+ # Collect target nodes for batch processing (from AdvancedAnalyzer)
261
+ if node_type in target_node_types:
262
+ if element_type == "field" and node_type == "field_declaration":
263
+ field_batch.append(current_node)
264
+ else:
265
+ # Process non-field elements immediately
266
+ node_id = id(current_node)
267
+
268
+ # Skip if already processed
269
+ if node_id in self._processed_nodes:
270
+ continue
271
+
272
+ # Check element cache first
273
+ cache_key = (node_id, element_type)
274
+ if cache_key in self._element_cache:
275
+ element = self._element_cache[cache_key]
276
+ if element:
277
+ if isinstance(element, list):
278
+ results.extend(element)
279
+ else:
280
+ results.append(element)
281
+ self._processed_nodes.add(node_id)
282
+ continue
283
+
284
+ # Extract and cache
285
+ extractor = extractors.get(node_type)
286
+ if extractor:
287
+ element = extractor(current_node)
288
+ self._element_cache[cache_key] = element
289
+ if element:
290
+ if isinstance(element, list):
291
+ results.extend(element)
292
+ else:
293
+ results.append(element)
294
+ self._processed_nodes.add(node_id)
295
+
296
+ # Add children to stack (reversed for correct DFS traversal)
297
+ if current_node.children:
298
+ for child in reversed(current_node.children):
299
+ node_stack.append((child, depth + 1))
300
+
301
+ # Process field batch when it reaches optimal size (from AdvancedAnalyzer)
302
+ if len(field_batch) >= 10:
303
+ self._process_field_batch(field_batch, extractors, results)
304
+ field_batch.clear()
305
+
306
+ # Process remaining field batch (from AdvancedAnalyzer)
307
+ if field_batch:
308
+ self._process_field_batch(field_batch, extractors, results)
309
+
310
+ log_debug(f"Iterative traversal processed {processed_nodes} nodes")
311
+
312
+ def _process_field_batch(
313
+ self, batch: list["tree_sitter.Node"], extractors: dict, results: list[Any]
314
+ ) -> None:
315
+ """Process field nodes with caching (from AdvancedAnalyzer)"""
316
+ for node in batch:
317
+ node_id = id(node)
318
+
319
+ # Skip if already processed
320
+ if node_id in self._processed_nodes:
321
+ continue
322
+
323
+ # Check element cache first
324
+ cache_key = (node_id, "field")
325
+ if cache_key in self._element_cache:
326
+ elements = self._element_cache[cache_key]
327
+ if elements:
328
+ if isinstance(elements, list):
329
+ results.extend(elements)
330
+ else:
331
+ results.append(elements)
332
+ self._processed_nodes.add(node_id)
333
+ continue
334
+
335
+ # Extract and cache
336
+ extractor = extractors.get(node.type)
337
+ if extractor:
338
+ elements = extractor(node)
339
+ self._element_cache[cache_key] = elements
340
+ if elements:
341
+ if isinstance(elements, list):
342
+ results.extend(elements)
343
+ else:
344
+ results.append(elements)
345
+ self._processed_nodes.add(node_id)
346
+
347
+ def _get_node_text_optimized(self, node: "tree_sitter.Node") -> str:
348
+ """Get node text with optimized caching (from AdvancedAnalyzer)"""
349
+ node_id = id(node)
350
+
351
+ # Check cache first
352
+ if node_id in self._node_text_cache:
353
+ return self._node_text_cache[node_id]
354
+
355
+ try:
356
+ # Use encoding utilities for text extraction
357
+ start_byte = node.start_byte
358
+ end_byte = node.end_byte
359
+
360
+ encoding = self._file_encoding or "utf-8"
361
+ content_bytes = safe_encode("\n".join(self.content_lines), encoding)
362
+ text = extract_text_slice(content_bytes, start_byte, end_byte, encoding)
363
+
364
+ self._node_text_cache[node_id] = text
365
+ return text
366
+ except Exception as e:
367
+ log_error(f"Error in _get_node_text_optimized: {e}")
368
+ # Fallback to simple text extraction
369
+ try:
370
+ start_point = node.start_point
371
+ end_point = node.end_point
372
+
373
+ if start_point[0] == end_point[0]:
374
+ # Single line
375
+ line = self.content_lines[start_point[0]]
376
+ return line[start_point[1] : end_point[1]]
377
+ else:
378
+ # Multiple lines
379
+ lines = []
380
+ for i in range(start_point[0], end_point[0] + 1):
381
+ if i < len(self.content_lines):
382
+ line = self.content_lines[i]
383
+ if i == start_point[0]:
384
+ lines.append(line[start_point[1] :])
385
+ elif i == end_point[0]:
386
+ lines.append(line[: end_point[1]])
387
+ else:
388
+ lines.append(line)
389
+ return "\n".join(lines)
390
+ except Exception as fallback_error:
391
+ log_error(f"Fallback text extraction also failed: {fallback_error}")
392
+ return ""
393
+
394
+ def _extract_class_optimized(self, node: "tree_sitter.Node") -> Class | None:
395
+ """Extract class information optimized (from AdvancedAnalyzer)"""
396
+ try:
397
+ start_line = node.start_point[0] + 1
398
+ end_line = node.end_point[0] + 1
399
+
400
+ # Extract class name efficiently
401
+ class_name = None
402
+ for child in node.children:
403
+ if child.type == "identifier":
404
+ class_name = self._get_node_text_optimized(child)
405
+ break
406
+
407
+ if not class_name:
408
+ return None
409
+
410
+ # Determine package name
411
+ package_name = self.current_package
412
+ full_qualified_name = (
413
+ f"{package_name}.{class_name}" if package_name else class_name
414
+ )
415
+
416
+ # Determine class type (optimized: dictionary lookup)
417
+ class_type_map = {
418
+ "class_declaration": "class",
419
+ "interface_declaration": "interface",
420
+ "enum_declaration": "enum",
421
+ }
422
+ class_type = class_type_map.get(node.type, "class")
423
+
424
+ # Extract modifiers efficiently
425
+ modifiers = self._extract_modifiers_optimized(node)
426
+ visibility = self._determine_visibility(modifiers)
427
+
428
+ # Extract superclass and interfaces (optimized: single pass)
429
+ extends_class = None
430
+ implements_interfaces = []
431
+
432
+ for child in node.children:
433
+ if child.type == "superclass":
434
+ extends_text = self._get_node_text_optimized(child)
435
+ match = re.search(r"\b[A-Z]\w*", extends_text)
436
+ if match:
437
+ extends_class = match.group(0)
438
+ elif child.type == "super_interfaces":
439
+ implements_text = self._get_node_text_optimized(child)
440
+ implements_interfaces = re.findall(r"\b[A-Z]\w*", implements_text)
441
+
442
+ # Extract annotations for this class
443
+ class_annotations = self._find_annotations_for_line_cached(start_line)
444
+
445
+ # Check if this is a nested class
446
+ is_nested = self._is_nested_class(node)
447
+ parent_class = self._find_parent_class(node) if is_nested else None
448
+
449
+ # Extract raw text
450
+ start_line_idx = max(0, start_line - 1)
451
+ end_line_idx = min(len(self.content_lines), end_line)
452
+ raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
453
+
454
+ return Class(
455
+ name=class_name,
456
+ start_line=start_line,
457
+ end_line=end_line,
458
+ raw_text=raw_text,
459
+ language="java",
460
+ class_type=class_type,
461
+ full_qualified_name=full_qualified_name,
462
+ package_name=package_name,
463
+ superclass=extends_class,
464
+ interfaces=implements_interfaces,
465
+ modifiers=modifiers,
466
+ visibility=visibility,
467
+ # Java-specific detailed information
468
+ annotations=class_annotations,
469
+ is_nested=is_nested,
470
+ parent_class=parent_class,
471
+ extends_class=extends_class, # Alias for superclass
472
+ implements_interfaces=implements_interfaces, # Alias for interfaces
473
+ )
474
+ except (AttributeError, ValueError, TypeError) as e:
475
+ log_debug(f"Failed to extract class info: {e}")
476
+ return None
477
+ except Exception as e:
478
+ log_error(f"Unexpected error in class extraction: {e}")
479
+ return None
480
+
481
+ def _extract_method_optimized(self, node: "tree_sitter.Node") -> Function | None:
482
+ """Extract method information optimized (from AdvancedAnalyzer)"""
483
+ try:
484
+ start_line = node.start_point[0] + 1
485
+ end_line = node.end_point[0] + 1
486
+
487
+ # Extract method information efficiently
488
+ method_info = self._parse_method_signature_optimized(node)
489
+ if not method_info:
490
+ return None
491
+
492
+ method_name, return_type, parameters, modifiers, throws = method_info
493
+ is_constructor = node.type == "constructor_declaration"
494
+ visibility = self._determine_visibility(modifiers)
495
+
496
+ # Extract annotations for this method
497
+ method_annotations = self._find_annotations_for_line_cached(start_line)
498
+
499
+ # Calculate complexity score
500
+ complexity_score = self._calculate_complexity_optimized(node)
501
+
502
+ # Extract JavaDoc
503
+ javadoc = self._extract_javadoc_for_line(start_line)
504
+
505
+ # Extract raw text
506
+ start_line_idx = max(0, start_line - 1)
507
+ end_line_idx = min(len(self.content_lines), end_line)
508
+ raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
509
+
510
+ return Function(
511
+ name=method_name,
512
+ start_line=start_line,
513
+ end_line=end_line,
514
+ raw_text=raw_text,
515
+ language="java",
516
+ parameters=parameters,
517
+ return_type=return_type if not is_constructor else "void",
518
+ modifiers=modifiers,
519
+ is_static="static" in modifiers,
520
+ is_private="private" in modifiers,
521
+ is_public="public" in modifiers,
522
+ is_constructor=is_constructor,
523
+ visibility=visibility,
524
+ docstring=javadoc,
525
+ # Java-specific detailed information
526
+ annotations=method_annotations,
527
+ throws=throws,
528
+ complexity_score=complexity_score,
529
+ is_abstract="abstract" in modifiers,
530
+ is_final="final" in modifiers,
531
+ )
532
+ except (AttributeError, ValueError, TypeError) as e:
533
+ log_debug(f"Failed to extract method info: {e}")
534
+ return None
535
+ except Exception as e:
536
+ log_error(f"Unexpected error in method extraction: {e}")
537
+ return None
538
+
539
+ def _extract_field_optimized(self, node: "tree_sitter.Node") -> list[Variable]:
540
+ """Extract field information optimized (from AdvancedAnalyzer)"""
541
+ fields: list[Variable] = []
542
+ try:
543
+ start_line = node.start_point[0] + 1
544
+ end_line = node.end_point[0] + 1
545
+
546
+ # Parse field declaration using AdvancedAnalyzer method
547
+ field_info = self._parse_field_declaration_optimized(node)
548
+ if not field_info:
549
+ return fields
550
+
551
+ field_type, variable_names, modifiers = field_info
552
+ visibility = self._determine_visibility(modifiers)
553
+
554
+ # Extract annotations for this field
555
+ field_annotations = self._find_annotations_for_line_cached(start_line)
556
+
557
+ # Extract JavaDoc for this field
558
+ field_javadoc = self._extract_javadoc_for_line(start_line)
559
+
560
+ # Create Variable object for each variable (matching AdvancedAnalyzer structure)
561
+ for var_name in variable_names:
562
+ # Extract raw text
563
+ start_line_idx = max(0, start_line - 1)
564
+ end_line_idx = min(len(self.content_lines), end_line)
565
+ raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
566
+
567
+ field = Variable(
568
+ name=var_name,
569
+ start_line=start_line,
570
+ end_line=end_line,
571
+ raw_text=raw_text,
572
+ language="java",
573
+ variable_type=field_type,
574
+ modifiers=modifiers,
575
+ is_static="static" in modifiers,
576
+ is_constant="final" in modifiers,
577
+ visibility=visibility,
578
+ docstring=field_javadoc,
579
+ # Java-specific detailed information
580
+ annotations=field_annotations,
581
+ is_final="final" in modifiers,
582
+ field_type=field_type, # Alias for variable_type
583
+ )
584
+ fields.append(field)
585
+ except (AttributeError, ValueError, TypeError) as e:
586
+ log_debug(f"Failed to extract field info: {e}")
587
+ except Exception as e:
588
+ log_error(f"Unexpected error in field extraction: {e}")
589
+
590
+ return fields
591
+
592
+ def _parse_method_signature_optimized(
593
+ self, node: "tree_sitter.Node"
594
+ ) -> tuple[str, str, list[str], list[str], list[str]] | None:
595
+ """Parse method signature optimized (from AdvancedAnalyzer)"""
596
+ try:
597
+ # Extract method name
598
+ method_name = None
599
+ for child in node.children:
600
+ if child.type == "identifier":
601
+ method_name = self._get_node_text_optimized(child)
602
+ break
603
+
604
+ if not method_name:
605
+ return None
606
+
607
+ # Extract return type
608
+ return_type = "void"
609
+ for child in node.children:
610
+ if child.type in [
611
+ "type_identifier",
612
+ "void_type",
613
+ "primitive_type",
614
+ "integral_type",
615
+ "boolean_type",
616
+ "floating_point_type",
617
+ "array_type",
618
+ ]:
619
+ return_type = self._get_node_text_optimized(child)
620
+ break
621
+ elif child.type == "generic_type":
622
+ return_type = self._get_node_text_optimized(child)
623
+ break
624
+
625
+ # Extract parameters
626
+ parameters = []
627
+ for child in node.children:
628
+ if child.type == "formal_parameters":
629
+ for param in child.children:
630
+ if param.type == "formal_parameter":
631
+ param_text = self._get_node_text_optimized(param)
632
+ parameters.append(param_text)
633
+
634
+ # Extract modifiers
635
+ modifiers = self._extract_modifiers_optimized(node)
636
+
637
+ # Extract throws clause
638
+ throws = []
639
+ for child in node.children:
640
+ if child.type == "throws":
641
+ throws_text = self._get_node_text_optimized(child)
642
+ exceptions = re.findall(r"\b[A-Z]\w*Exception\b", throws_text)
643
+ throws.extend(exceptions)
644
+
645
+ return method_name, return_type, parameters, modifiers, throws
646
+ except Exception:
647
+ return None
648
+
649
+ def _parse_field_declaration_optimized(
650
+ self, node: "tree_sitter.Node"
651
+ ) -> tuple[str, list[str], list[str]] | None:
652
+ """Parse field declaration optimized (from AdvancedAnalyzer)"""
653
+ try:
654
+ # Extract type (exactly as in AdvancedAnalyzer)
655
+ field_type = None
656
+ for child in node.children:
657
+ if child.type in [
658
+ "type_identifier",
659
+ "primitive_type",
660
+ "integral_type",
661
+ "generic_type",
662
+ "boolean_type",
663
+ "floating_point_type",
664
+ "array_type",
665
+ ]:
666
+ field_type = self._get_node_text_optimized(child)
667
+ break
668
+
669
+ if not field_type:
670
+ return None
671
+
672
+ # Extract variable names (exactly as in AdvancedAnalyzer)
673
+ variable_names = []
674
+ for child in node.children:
675
+ if child.type == "variable_declarator":
676
+ for grandchild in child.children:
677
+ if grandchild.type == "identifier":
678
+ var_name = self._get_node_text_optimized(grandchild)
679
+ variable_names.append(var_name)
680
+
681
+ if not variable_names:
682
+ return None
683
+
684
+ # Extract modifiers (exactly as in AdvancedAnalyzer)
685
+ modifiers = self._extract_modifiers_optimized(node)
686
+
687
+ return field_type, variable_names, modifiers
688
+ except Exception:
689
+ return None
690
+
691
+ def _extract_modifiers_optimized(self, node: "tree_sitter.Node") -> list[str]:
692
+ """Extract modifiers efficiently (from AdvancedAnalyzer)"""
693
+ modifiers = []
694
+ for child in node.children:
695
+ if child.type == "modifiers":
696
+ for mod_child in child.children:
697
+ if mod_child.type in [
698
+ "public",
699
+ "private",
700
+ "protected",
701
+ "static",
702
+ "final",
703
+ "abstract",
704
+ "synchronized",
705
+ "volatile",
706
+ "transient",
707
+ ]:
708
+ modifiers.append(mod_child.type)
709
+ elif mod_child.type not in ["marker_annotation"]:
710
+ mod_text = self._get_node_text_optimized(mod_child)
711
+ if mod_text in [
712
+ "public",
713
+ "private",
714
+ "protected",
715
+ "static",
716
+ "final",
717
+ "abstract",
718
+ "synchronized",
719
+ "volatile",
720
+ "transient",
721
+ ]:
722
+ modifiers.append(mod_text)
723
+ return modifiers
724
+
725
+ def _extract_package_info(self, node: "tree_sitter.Node") -> None:
726
+ """Extract package information (from AdvancedAnalyzer)"""
727
+ try:
728
+ package_text = self._get_node_text_optimized(node)
729
+ match = re.search(r"package\s+([\w.]+)", package_text)
730
+ if match:
731
+ self.current_package = match.group(1)
732
+ except (AttributeError, ValueError, IndexError) as e:
733
+ log_debug(f"Failed to extract package info: {e}")
734
+ except Exception as e:
735
+ log_error(f"Unexpected error in package extraction: {e}")
736
+
737
+ def _extract_package_element(self, node: "tree_sitter.Node") -> Package | None:
738
+ """Extract package element for inclusion in results"""
739
+ try:
740
+ package_text = self._get_node_text_optimized(node)
741
+ match = re.search(r"package\s+([\w.]+)", package_text)
742
+ if match:
743
+ package_name = match.group(1)
744
+ return Package(
745
+ name=package_name,
746
+ start_line=node.start_point[0] + 1,
747
+ end_line=node.end_point[0] + 1,
748
+ raw_text=package_text,
749
+ language="java",
750
+ )
751
+ except (AttributeError, ValueError, IndexError) as e:
752
+ log_debug(f"Failed to extract package element: {e}")
753
+ except Exception as e:
754
+ log_error(f"Unexpected error in package element extraction: {e}")
755
+ return None
756
+
757
+ def _determine_visibility(self, modifiers: list[str]) -> str:
758
+ """Determine visibility from modifiers"""
759
+ if "public" in modifiers:
760
+ return "public"
761
+ elif "private" in modifiers:
762
+ return "private"
763
+ elif "protected" in modifiers:
764
+ return "protected"
765
+ else:
766
+ return "package" # Default package visibility
767
+
768
+ def _find_annotations_for_line_cached(
769
+ self, target_line: int
770
+ ) -> list[dict[str, Any]]:
771
+ """Find annotations for specified line with caching (from AdvancedAnalyzer)"""
772
+ if target_line in self._annotation_cache:
773
+ return self._annotation_cache[target_line]
774
+
775
+ result_annotations = []
776
+ for annotation in self.annotations:
777
+ line_distance = target_line - annotation.get("end_line", 0)
778
+ if 1 <= line_distance <= 5:
779
+ result_annotations.append(annotation)
780
+
781
+ self._annotation_cache[target_line] = result_annotations
782
+ return result_annotations
783
+
784
+ def _calculate_complexity_optimized(self, node: "tree_sitter.Node") -> int:
785
+ """Calculate cyclomatic complexity efficiently (from AdvancedAnalyzer)"""
786
+ complexity = 1
787
+ try:
788
+ node_text = self._get_node_text_optimized(node).lower()
789
+ keywords = ["if", "while", "for", "catch", "case", "switch"]
790
+ for keyword in keywords:
791
+ complexity += node_text.count(keyword)
792
+ except (AttributeError, TypeError) as e:
793
+ log_debug(f"Failed to calculate complexity: {e}")
794
+ except Exception as e:
795
+ log_error(f"Unexpected error in complexity calculation: {e}")
796
+ return complexity
797
+
798
+ def _extract_javadoc_for_line(self, target_line: int) -> str | None:
799
+ """Extract JavaDoc comment immediately before the specified line"""
800
+ try:
801
+ if not self.content_lines or target_line <= 1:
802
+ return None
803
+
804
+ # Search backwards from target_line
805
+ javadoc_lines = []
806
+ current_line = target_line - 1
807
+
808
+ # Skip empty lines
809
+ while current_line > 0:
810
+ line = self.content_lines[current_line - 1].strip()
811
+ if line:
812
+ break
813
+ current_line -= 1
814
+
815
+ # Check for JavaDoc end
816
+ if current_line > 0:
817
+ line = self.content_lines[current_line - 1].strip()
818
+ if line.endswith("*/"):
819
+ # This might be a JavaDoc comment
820
+ javadoc_lines.append(self.content_lines[current_line - 1])
821
+ current_line -= 1
822
+
823
+ # Collect JavaDoc content
824
+ while current_line > 0:
825
+ line_content = self.content_lines[current_line - 1]
826
+ line_stripped = line_content.strip()
827
+ javadoc_lines.append(line_content)
828
+
829
+ if line_stripped.startswith("/**"):
830
+ # Found the start of JavaDoc
831
+ javadoc_lines.reverse()
832
+ javadoc_text = "\n".join(javadoc_lines)
833
+
834
+ # Clean up the JavaDoc
835
+ return self._clean_javadoc(javadoc_text)
836
+ current_line -= 1
837
+
838
+ return None
839
+
840
+ except Exception as e:
841
+ log_debug(f"Failed to extract JavaDoc: {e}")
842
+ return None
843
+
844
+ def _clean_javadoc(self, javadoc_text: str) -> str:
845
+ """Clean JavaDoc text by removing comment markers"""
846
+ if not javadoc_text:
847
+ return ""
848
+
849
+ lines = javadoc_text.split("\n")
850
+ cleaned_lines = []
851
+
852
+ for line in lines:
853
+ # Remove leading/trailing whitespace
854
+ line = line.strip()
855
+
856
+ # Remove comment markers
857
+ if line.startswith("/**"):
858
+ line = line[3:].strip()
859
+ elif line.startswith("*/"):
860
+ line = line[2:].strip()
861
+ elif line.startswith("*"):
862
+ line = line[1:].strip()
863
+
864
+ if line: # Only add non-empty lines
865
+ cleaned_lines.append(line)
866
+
867
+ return " ".join(cleaned_lines) if cleaned_lines else ""
868
+
869
+ def _is_nested_class(self, node: "tree_sitter.Node") -> bool:
870
+ """Check if this is a nested class (from AdvancedAnalyzer)"""
871
+ current = node.parent
872
+ while current:
873
+ if current.type in [
874
+ "class_declaration",
875
+ "interface_declaration",
876
+ "enum_declaration",
877
+ ]:
878
+ return True
879
+ current = current.parent
880
+ return False
881
+
882
+ def _find_parent_class(self, node: "tree_sitter.Node") -> str | None:
883
+ """Find parent class name (from AdvancedAnalyzer)"""
884
+ current = node.parent
885
+ while current:
886
+ if current.type in [
887
+ "class_declaration",
888
+ "interface_declaration",
889
+ "enum_declaration",
890
+ ]:
891
+ return self._extract_class_name(current)
892
+ current = current.parent
893
+ return None
894
+
895
+ def _extract_class_name(self, node: "tree_sitter.Node") -> str | None:
896
+ """Extract class name from node (from AdvancedAnalyzer)"""
897
+ for child in node.children:
898
+ if child.type == "identifier":
899
+ return self._get_node_text_optimized(child)
900
+ return None
901
+
902
+ def _extract_annotation_optimized(
903
+ self, node: "tree_sitter.Node"
904
+ ) -> dict[str, Any] | None:
905
+ """Extract annotation information optimized (from AdvancedAnalyzer)"""
906
+ try:
907
+ start_line = node.start_point[0] + 1
908
+ end_line = node.end_point[0] + 1
909
+ raw_text = self._get_node_text_optimized(node)
910
+
911
+ # Extract annotation name efficiently
912
+ name_match = re.search(r"@(\w+)", raw_text)
913
+ if not name_match:
914
+ return None
915
+
916
+ annotation_name = name_match.group(1)
917
+
918
+ # Extract parameters efficiently
919
+ parameters = []
920
+ param_match = re.search(r"\((.*?)\)", raw_text, re.DOTALL)
921
+ if param_match:
922
+ param_text = param_match.group(1).strip()
923
+ if param_text:
924
+ # Simple parameter parsing
925
+ if "=" in param_text:
926
+ parameters = [
927
+ p.strip() for p in re.split(r",(?![^()]*\))", param_text)
928
+ ]
929
+ else:
930
+ parameters = [param_text]
931
+
932
+ return {
933
+ "name": annotation_name,
934
+ "parameters": parameters,
935
+ "start_line": start_line,
936
+ "end_line": end_line,
937
+ "raw_text": raw_text,
938
+ }
939
+ except (AttributeError, IndexError, ValueError) as e:
940
+ log_debug(f"Failed to extract annotation from node: {e}")
941
+ return None
942
+ except Exception as e:
943
+ log_error(f"Unexpected exception in annotation extraction: {e}")
944
+ return None
945
+
946
+ def _extract_import_info(
947
+ self, node: "tree_sitter.Node", source_code: str
948
+ ) -> Import | None:
949
+ """Extract import information (from AdvancedAnalyzer)"""
950
+ try:
951
+ import_text = self._get_node_text_optimized(node)
952
+ # Simple approach: get everything until semicolon then process
953
+ import_content = import_text.strip()
954
+ if import_content.endswith(";"):
955
+ import_content = import_content[:-1]
956
+
957
+ if "static" in import_content:
958
+ # Static import
959
+ static_match = re.search(r"import\s+static\s+([\w.]+)", import_content)
960
+ if static_match:
961
+ import_name = static_match.group(1)
962
+ # Handle wildcard case
963
+ if import_content.endswith(".*"):
964
+ import_name = import_name.replace(".*", "")
965
+ # For static wildcard, remove last element
966
+ parts = import_name.split(".")
967
+ if len(parts) > 1:
968
+ import_name = ".".join(parts[:-1])
969
+
970
+ return Import(
971
+ name=import_name,
972
+ start_line=node.start_point[0] + 1,
973
+ end_line=node.end_point[0] + 1,
974
+ raw_text=import_text,
975
+ language="java",
976
+ module_name=import_name,
977
+ is_static=True,
978
+ is_wildcard=import_content.endswith(".*"),
979
+ import_statement=import_content,
980
+ )
981
+ else:
982
+ # Normal import
983
+ normal_match = re.search(r"import\s+([\w.]+)", import_content)
984
+ if normal_match:
985
+ import_name = normal_match.group(1)
986
+ # Handle wildcard case
987
+ if import_content.endswith(".*"):
988
+ if import_name.endswith(".*"):
989
+ import_name = import_name[:-2] # Remove trailing .*
990
+ elif import_name.endswith("."):
991
+ import_name = import_name[:-1] # Remove trailing .
992
+
993
+ return Import(
994
+ name=import_name,
995
+ start_line=node.start_point[0] + 1,
996
+ end_line=node.end_point[0] + 1,
997
+ raw_text=import_text,
998
+ language="java",
999
+ module_name=import_name,
1000
+ is_static=False,
1001
+ is_wildcard=import_content.endswith(".*"),
1002
+ import_statement=import_content,
1003
+ )
1004
+ except (AttributeError, ValueError, IndexError) as e:
1005
+ log_debug(f"Failed to extract import info: {e}")
1006
+ except Exception as e:
1007
+ log_error(f"Unexpected error in import extraction: {e}")
1008
+ return None
1009
+
1010
+
1011
+ class JavaPlugin(LanguagePlugin):
1012
+ """Java language plugin for the new architecture"""
1013
+
1014
+ def __init__(self) -> None:
1015
+ """Initialize the Java plugin"""
1016
+ super().__init__()
1017
+ self._language_cache: tree_sitter.Language | None = None
1018
+
1019
+ def get_language_name(self) -> str:
1020
+ """Return the name of the programming language this plugin supports"""
1021
+ return "java"
1022
+
1023
+ def get_file_extensions(self) -> list[str]:
1024
+ """Return list of file extensions this plugin supports"""
1025
+ return [".java", ".jsp", ".jspx"]
1026
+
1027
+ def create_extractor(self) -> ElementExtractor:
1028
+ """Create and return an element extractor for this language"""
1029
+ return JavaElementExtractor()
1030
+
1031
+ def get_tree_sitter_language(self) -> Optional["tree_sitter.Language"]:
1032
+ """Get the Tree-sitter language object for Java"""
1033
+ if self._language_cache is None:
1034
+ try:
1035
+ import tree_sitter_java as tsjava
1036
+
1037
+ self._language_cache = tsjava.language() # type: ignore
1038
+ except ImportError:
1039
+ log_error("tree-sitter-java not available")
1040
+ return None
1041
+ except Exception as e:
1042
+ log_error(f"Failed to load Java language: {e}")
1043
+ return None
1044
+ return self._language_cache
1045
+
1046
+ def get_supported_queries(self) -> list[str]:
1047
+ """Get list of supported query names for this language"""
1048
+ return ["class", "method", "field", "import"]
1049
+
1050
+ def is_applicable(self, file_path: str) -> bool:
1051
+ """Check if this plugin is applicable for the given file"""
1052
+ return any(
1053
+ file_path.lower().endswith(ext.lower())
1054
+ for ext in self.get_file_extensions()
1055
+ )
1056
+
1057
+ def get_plugin_info(self) -> dict:
1058
+ """Get information about this plugin"""
1059
+ return {
1060
+ "name": "Java Plugin",
1061
+ "language": self.get_language_name(),
1062
+ "extensions": self.get_file_extensions(),
1063
+ "version": "2.0.0",
1064
+ "supported_queries": self.get_supported_queries(),
1065
+ }
1066
+
1067
+ async def analyze_file(
1068
+ self, file_path: str, request: "AnalysisRequest"
1069
+ ) -> "AnalysisResult":
1070
+ """
1071
+ Analyze a Java file and return analysis results.
1072
+
1073
+ Args:
1074
+ file_path: Path to the Java file to analyze
1075
+ request: Analysis request object
1076
+
1077
+ Returns:
1078
+ AnalysisResult object containing the analysis results
1079
+ """
1080
+ try:
1081
+ from ..core.parser import Parser
1082
+ from ..models import AnalysisResult
1083
+
1084
+ log_debug(f"Java Plugin: Starting analysis of {file_path}")
1085
+
1086
+ # Read file content
1087
+ with open(file_path, encoding="utf-8") as f:
1088
+ source_code = f.read()
1089
+
1090
+ log_debug(f"Java Plugin: Read {len(source_code)} characters from file")
1091
+
1092
+ # Parse the file
1093
+ parser = Parser()
1094
+ parse_result = parser.parse_code(source_code, "java")
1095
+
1096
+ log_debug(f"Java Plugin: Parse result success: {parse_result.success}")
1097
+
1098
+ if not parse_result.success:
1099
+ log_error(f"Java Plugin: Parse failed: {parse_result.error_message}")
1100
+ return AnalysisResult(
1101
+ file_path=file_path,
1102
+ language="java",
1103
+ line_count=len(source_code.splitlines()),
1104
+ elements=[],
1105
+ node_count=0,
1106
+ query_results={},
1107
+ source_code=source_code,
1108
+ success=False,
1109
+ error_message=parse_result.error_message,
1110
+ )
1111
+
1112
+ # Extract elements
1113
+ extractor = self.create_extractor()
1114
+
1115
+ if parse_result.tree:
1116
+ log_debug("Java Plugin: Extracting functions...")
1117
+ functions = extractor.extract_functions(parse_result.tree, source_code)
1118
+ log_debug(f"Java Plugin: Found {len(functions)} functions")
1119
+
1120
+ log_debug("Java Plugin: Extracting classes...")
1121
+ classes = extractor.extract_classes(parse_result.tree, source_code)
1122
+ log_debug(f"Java Plugin: Found {len(classes)} classes")
1123
+
1124
+ log_debug("Java Plugin: Extracting variables...")
1125
+ variables = extractor.extract_variables(parse_result.tree, source_code)
1126
+ log_debug(f"Java Plugin: Found {len(variables)} variables")
1127
+
1128
+ log_debug("Java Plugin: Extracting imports...")
1129
+ imports = extractor.extract_imports(parse_result.tree, source_code)
1130
+ log_debug(f"Java Plugin: Found {len(imports)} imports")
1131
+ else:
1132
+ functions = []
1133
+ classes = []
1134
+ variables = []
1135
+ imports = []
1136
+
1137
+ # Combine all elements
1138
+ all_elements: list[CodeElement] = []
1139
+ all_elements.extend(functions)
1140
+ all_elements.extend(classes)
1141
+ all_elements.extend(variables)
1142
+ all_elements.extend(imports)
1143
+ log_debug(f"Java Plugin: Total elements: {len(all_elements)}")
1144
+
1145
+ return AnalysisResult(
1146
+ file_path=file_path,
1147
+ language="java",
1148
+ line_count=len(source_code.splitlines()),
1149
+ elements=all_elements,
1150
+ node_count=(
1151
+ parse_result.tree.root_node.child_count if parse_result.tree else 0
1152
+ ),
1153
+ query_results={},
1154
+ source_code=source_code,
1155
+ success=True,
1156
+ error_message=None,
1157
+ )
1158
+
1159
+ except Exception as e:
1160
+ log_error(f"Failed to analyze Java file {file_path}: {e}")
1161
+ import traceback
1162
+
1163
+ log_error(f"Java Plugin traceback: {traceback.format_exc()}")
1164
+ return AnalysisResult(
1165
+ file_path=file_path,
1166
+ language="java",
1167
+ line_count=0,
1168
+ elements=[],
1169
+ node_count=0,
1170
+ query_results={},
1171
+ source_code="",
1172
+ success=False,
1173
+ error_message=str(e),
1174
+ )