tree-sitter-analyzer 0.9.3__py3-none-any.whl → 0.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (32) hide show
  1. tree_sitter_analyzer/cli/commands/default_command.py +18 -18
  2. tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -141
  3. tree_sitter_analyzer/cli/commands/query_command.py +92 -88
  4. tree_sitter_analyzer/cli/commands/table_command.py +235 -235
  5. tree_sitter_analyzer/cli/info_commands.py +121 -121
  6. tree_sitter_analyzer/cli_main.py +307 -307
  7. tree_sitter_analyzer/core/analysis_engine.py +584 -584
  8. tree_sitter_analyzer/core/cache_service.py +5 -4
  9. tree_sitter_analyzer/core/query.py +502 -502
  10. tree_sitter_analyzer/encoding_utils.py +6 -2
  11. tree_sitter_analyzer/exceptions.py +400 -406
  12. tree_sitter_analyzer/formatters/java_formatter.py +291 -291
  13. tree_sitter_analyzer/formatters/python_formatter.py +259 -259
  14. tree_sitter_analyzer/interfaces/mcp_server.py +426 -425
  15. tree_sitter_analyzer/language_detector.py +398 -398
  16. tree_sitter_analyzer/language_loader.py +224 -224
  17. tree_sitter_analyzer/languages/java_plugin.py +1202 -1202
  18. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +559 -555
  19. tree_sitter_analyzer/mcp/server.py +30 -9
  20. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +21 -4
  21. tree_sitter_analyzer/mcp/tools/table_format_tool.py +22 -4
  22. tree_sitter_analyzer/mcp/utils/error_handler.py +569 -567
  23. tree_sitter_analyzer/models.py +470 -470
  24. tree_sitter_analyzer/security/__init__.py +22 -22
  25. tree_sitter_analyzer/security/boundary_manager.py +243 -243
  26. tree_sitter_analyzer/security/regex_checker.py +297 -292
  27. tree_sitter_analyzer/table_formatter.py +703 -652
  28. tree_sitter_analyzer/utils.py +50 -19
  29. {tree_sitter_analyzer-0.9.3.dist-info → tree_sitter_analyzer-0.9.4.dist-info}/METADATA +1 -1
  30. {tree_sitter_analyzer-0.9.3.dist-info → tree_sitter_analyzer-0.9.4.dist-info}/RECORD +32 -32
  31. {tree_sitter_analyzer-0.9.3.dist-info → tree_sitter_analyzer-0.9.4.dist-info}/WHEEL +0 -0
  32. {tree_sitter_analyzer-0.9.3.dist-info → tree_sitter_analyzer-0.9.4.dist-info}/entry_points.txt +0 -0
@@ -1,1202 +1,1202 @@
1
- #!/usr/bin/env python3
2
- """
3
- Java Language Plugin
4
-
5
- Provides Java-specific parsing and element extraction functionality.
6
- Migrated from AdvancedAnalyzer implementation for future independence.
7
- """
8
-
9
- import re
10
- from typing import TYPE_CHECKING, Any, Optional
11
-
12
- if TYPE_CHECKING:
13
- import tree_sitter
14
-
15
- from ..core.analysis_engine import AnalysisRequest
16
- from ..models import AnalysisResult
17
-
18
- from ..encoding_utils import extract_text_slice, safe_encode
19
- from ..models import Class, CodeElement, Function, Import, Package, Variable
20
- from ..plugins.base import ElementExtractor, LanguagePlugin
21
- from ..utils import log_debug, log_error, log_warning
22
-
23
-
24
- class JavaElementExtractor(ElementExtractor):
25
- """Java-specific element extractor with AdvancedAnalyzer implementation"""
26
-
27
- def __init__(self) -> None:
28
- """Initialize the Java element extractor."""
29
- self.current_package: str = ""
30
- self.current_file: str = ""
31
- self.source_code: str = ""
32
- self.content_lines: list[str] = []
33
- self.imports: list[str] = []
34
-
35
- # Performance optimization caches (from AdvancedAnalyzer)
36
- self._node_text_cache: dict[int, str] = {}
37
- self._processed_nodes: set[int] = set()
38
- self._element_cache: dict[tuple[int, str], Any] = {}
39
- self._file_encoding: str | None = None
40
- self._annotation_cache: dict[int, list[dict[str, Any]]] = {}
41
- self._signature_cache: dict[int, str] = {}
42
-
43
- # Extracted annotations for cross-referencing
44
- self.annotations: list[dict[str, Any]] = []
45
-
46
- def extract_annotations(
47
- self, tree: "tree_sitter.Tree", source_code: str
48
- ) -> list[dict[str, Any]]:
49
- """Extract Java annotations using AdvancedAnalyzer implementation"""
50
- self.source_code = source_code
51
- self.content_lines = source_code.split("\n")
52
- self._reset_caches()
53
-
54
- annotations: list[dict[str, Any]] = []
55
-
56
- # Use AdvancedAnalyzer's optimized traversal for annotations
57
- extractors = {
58
- "annotation": self._extract_annotation_optimized,
59
- "marker_annotation": self._extract_annotation_optimized,
60
- }
61
-
62
- self._traverse_and_extract_iterative(
63
- tree.root_node, extractors, annotations, "annotation"
64
- )
65
-
66
- # Store annotations for cross-referencing
67
- self.annotations = annotations
68
-
69
- log_debug(f"Extracted {len(annotations)} annotations")
70
- return annotations
71
-
72
- def extract_functions(
73
- self, tree: "tree_sitter.Tree", source_code: str
74
- ) -> list[Function]:
75
- """Extract Java method definitions using AdvancedAnalyzer implementation"""
76
- self.source_code = source_code
77
- self.content_lines = source_code.split("\n")
78
- self._reset_caches()
79
-
80
- functions: list[Function] = []
81
-
82
- # Use AdvancedAnalyzer's optimized traversal
83
- extractors = {
84
- "method_declaration": self._extract_method_optimized,
85
- "constructor_declaration": self._extract_method_optimized,
86
- }
87
-
88
- self._traverse_and_extract_iterative(
89
- tree.root_node, extractors, functions, "method"
90
- )
91
-
92
- log_debug(f"Extracted {len(functions)} methods")
93
- return functions
94
-
95
- def extract_classes(
96
- self, tree: "tree_sitter.Tree", source_code: str
97
- ) -> list[Class]:
98
- """Extract Java class definitions using AdvancedAnalyzer implementation"""
99
- self.source_code = source_code
100
- self.content_lines = source_code.split("\n")
101
- self._reset_caches()
102
-
103
- # Ensure package information is extracted before processing classes
104
- # This fixes the issue where current_package is empty when extract_classes
105
- # is called independently or before extract_imports
106
- if not self.current_package:
107
- self._extract_package_from_tree(tree)
108
-
109
- classes: list[Class] = []
110
-
111
- # Use AdvancedAnalyzer's optimized traversal
112
- extractors = {
113
- "class_declaration": self._extract_class_optimized,
114
- "interface_declaration": self._extract_class_optimized,
115
- "enum_declaration": self._extract_class_optimized,
116
- }
117
-
118
- self._traverse_and_extract_iterative(
119
- tree.root_node, extractors, classes, "class"
120
- )
121
-
122
- log_debug(f"Extracted {len(classes)} classes")
123
- return classes
124
-
125
- def extract_variables(
126
- self, tree: "tree_sitter.Tree", source_code: str
127
- ) -> list[Variable]:
128
- """Extract Java field definitions using AdvancedAnalyzer implementation"""
129
- self.source_code = source_code
130
- self.content_lines = source_code.split("\n")
131
- self._reset_caches()
132
-
133
- variables: list[Variable] = []
134
-
135
- # Use AdvancedAnalyzer's optimized traversal
136
- extractors = {
137
- "field_declaration": self._extract_field_optimized,
138
- }
139
-
140
- log_debug("Starting field extraction with iterative traversal")
141
- self._traverse_and_extract_iterative(
142
- tree.root_node, extractors, variables, "field"
143
- )
144
-
145
- log_debug(f"Extracted {len(variables)} fields")
146
- for i, var in enumerate(variables[:3]):
147
- log_debug(f"Field {i}: {var.name} ({var.variable_type})")
148
- return variables
149
-
150
- def extract_imports(
151
- self, tree: "tree_sitter.Tree", source_code: str
152
- ) -> list[Import]:
153
- """Extract Java import statements"""
154
- self.source_code = source_code
155
- self.content_lines = source_code.split("\n")
156
-
157
- imports: list[Import] = []
158
-
159
- # Extract package and imports efficiently (from AdvancedAnalyzer)
160
- for child in tree.root_node.children:
161
- if child.type == "package_declaration":
162
- self._extract_package_info(child)
163
- elif child.type == "import_declaration":
164
- import_info = self._extract_import_info(child, source_code)
165
- if import_info:
166
- imports.append(import_info)
167
- elif child.type in [
168
- "class_declaration",
169
- "interface_declaration",
170
- "enum_declaration",
171
- ]:
172
- # After package and imports come class declarations, so stop
173
- break
174
-
175
- log_debug(f"Extracted {len(imports)} imports")
176
- return imports
177
-
178
- def extract_packages(
179
- self, tree: "tree_sitter.Tree", source_code: str
180
- ) -> list[Package]:
181
- """Extract Java package declarations"""
182
- self.source_code = source_code
183
- self.content_lines = source_code.split("\n")
184
-
185
- packages: list[Package] = []
186
-
187
- # Extract package declaration
188
- for child in tree.root_node.children:
189
- if child.type == "package_declaration":
190
- package_info = self._extract_package_element(child)
191
- if package_info:
192
- packages.append(package_info)
193
- break # Only one package declaration per file
194
-
195
- log_debug(f"Extracted {len(packages)} packages")
196
- return packages
197
-
198
- def _reset_caches(self) -> None:
199
- """Reset performance caches"""
200
- self._node_text_cache.clear()
201
- self._processed_nodes.clear()
202
- self._element_cache.clear()
203
- self._annotation_cache.clear()
204
- self._signature_cache.clear()
205
- self.annotations.clear()
206
-
207
- def _traverse_and_extract_iterative(
208
- self,
209
- root_node: "tree_sitter.Node",
210
- extractors: dict[str, Any],
211
- results: list[Any],
212
- element_type: str,
213
- ) -> None:
214
- """
215
- Iterative node traversal and extraction (from AdvancedAnalyzer)
216
- Uses batch processing for optimal performance
217
- """
218
- if not root_node:
219
- return # type: ignore[unreachable]
220
-
221
- # Target node types for extraction
222
- target_node_types = set(extractors.keys())
223
-
224
- # Container node types that may contain target nodes (from AdvancedAnalyzer)
225
- container_node_types = {
226
- "program",
227
- "class_body",
228
- "interface_body",
229
- "enum_body",
230
- "class_declaration",
231
- "interface_declaration",
232
- "enum_declaration",
233
- "method_declaration",
234
- "constructor_declaration",
235
- "block",
236
- "modifiers", # Annotation nodes can appear inside modifiers
237
- }
238
-
239
- # Iterative DFS stack: (node, depth)
240
- node_stack = [(root_node, 0)]
241
- processed_nodes = 0
242
- max_depth = 50 # Prevent infinite loops
243
-
244
- # Batch processing containers (from AdvancedAnalyzer)
245
- field_batch = []
246
-
247
- while node_stack:
248
- current_node, depth = node_stack.pop()
249
-
250
- # Safety check for maximum depth
251
- if depth > max_depth:
252
- log_warning(f"Maximum traversal depth ({max_depth}) exceeded")
253
- continue
254
-
255
- processed_nodes += 1
256
- node_type = current_node.type
257
-
258
- # Early termination: skip nodes that don't contain target elements
259
- if (
260
- depth > 0
261
- and node_type not in target_node_types
262
- and node_type not in container_node_types
263
- ):
264
- continue
265
-
266
- # Collect target nodes for batch processing (from AdvancedAnalyzer)
267
- if node_type in target_node_types:
268
- if element_type == "field" and node_type == "field_declaration":
269
- field_batch.append(current_node)
270
- else:
271
- # Process non-field elements immediately
272
- node_id = id(current_node)
273
-
274
- # Skip if already processed
275
- if node_id in self._processed_nodes:
276
- continue
277
-
278
- # Check element cache first
279
- cache_key = (node_id, element_type)
280
- if cache_key in self._element_cache:
281
- element = self._element_cache[cache_key]
282
- if element:
283
- if isinstance(element, list):
284
- results.extend(element)
285
- else:
286
- results.append(element)
287
- self._processed_nodes.add(node_id)
288
- continue
289
-
290
- # Extract and cache
291
- extractor = extractors.get(node_type)
292
- if extractor:
293
- element = extractor(current_node)
294
- self._element_cache[cache_key] = element
295
- if element:
296
- if isinstance(element, list):
297
- results.extend(element)
298
- else:
299
- results.append(element)
300
- self._processed_nodes.add(node_id)
301
-
302
- # Add children to stack (reversed for correct DFS traversal)
303
- if current_node.children:
304
- for child in reversed(current_node.children):
305
- node_stack.append((child, depth + 1))
306
-
307
- # Process field batch when it reaches optimal size (from AdvancedAnalyzer)
308
- if len(field_batch) >= 10:
309
- self._process_field_batch(field_batch, extractors, results)
310
- field_batch.clear()
311
-
312
- # Process remaining field batch (from AdvancedAnalyzer)
313
- if field_batch:
314
- self._process_field_batch(field_batch, extractors, results)
315
-
316
- log_debug(f"Iterative traversal processed {processed_nodes} nodes")
317
-
318
- def _process_field_batch(
319
- self, batch: list["tree_sitter.Node"], extractors: dict, results: list[Any]
320
- ) -> None:
321
- """Process field nodes with caching (from AdvancedAnalyzer)"""
322
- for node in batch:
323
- node_id = id(node)
324
-
325
- # Skip if already processed
326
- if node_id in self._processed_nodes:
327
- continue
328
-
329
- # Check element cache first
330
- cache_key = (node_id, "field")
331
- if cache_key in self._element_cache:
332
- elements = self._element_cache[cache_key]
333
- if elements:
334
- if isinstance(elements, list):
335
- results.extend(elements)
336
- else:
337
- results.append(elements)
338
- self._processed_nodes.add(node_id)
339
- continue
340
-
341
- # Extract and cache
342
- extractor = extractors.get(node.type)
343
- if extractor:
344
- elements = extractor(node)
345
- self._element_cache[cache_key] = elements
346
- if elements:
347
- if isinstance(elements, list):
348
- results.extend(elements)
349
- else:
350
- results.append(elements)
351
- self._processed_nodes.add(node_id)
352
-
353
- def _get_node_text_optimized(self, node: "tree_sitter.Node") -> str:
354
- """Get node text with optimized caching (from AdvancedAnalyzer)"""
355
- node_id = id(node)
356
-
357
- # Check cache first
358
- if node_id in self._node_text_cache:
359
- return self._node_text_cache[node_id]
360
-
361
- try:
362
- # Use encoding utilities for text extraction
363
- start_byte = node.start_byte
364
- end_byte = node.end_byte
365
-
366
- encoding = self._file_encoding or "utf-8"
367
- content_bytes = safe_encode("\n".join(self.content_lines), encoding)
368
- text = extract_text_slice(content_bytes, start_byte, end_byte, encoding)
369
-
370
- self._node_text_cache[node_id] = text
371
- return text
372
- except Exception as e:
373
- log_error(f"Error in _get_node_text_optimized: {e}")
374
- # Fallback to simple text extraction
375
- try:
376
- start_point = node.start_point
377
- end_point = node.end_point
378
-
379
- if start_point[0] == end_point[0]:
380
- # Single line
381
- line = self.content_lines[start_point[0]]
382
- return line[start_point[1] : end_point[1]]
383
- else:
384
- # Multiple lines
385
- lines = []
386
- for i in range(start_point[0], end_point[0] + 1):
387
- if i < len(self.content_lines):
388
- line = self.content_lines[i]
389
- if i == start_point[0]:
390
- lines.append(line[start_point[1] :])
391
- elif i == end_point[0]:
392
- lines.append(line[: end_point[1]])
393
- else:
394
- lines.append(line)
395
- return "\n".join(lines)
396
- except Exception as fallback_error:
397
- log_error(f"Fallback text extraction also failed: {fallback_error}")
398
- return ""
399
-
400
- def _extract_class_optimized(self, node: "tree_sitter.Node") -> Class | None:
401
- """Extract class information optimized (from AdvancedAnalyzer)"""
402
- try:
403
- start_line = node.start_point[0] + 1
404
- end_line = node.end_point[0] + 1
405
-
406
- # Extract class name efficiently
407
- class_name = None
408
- for child in node.children:
409
- if child.type == "identifier":
410
- class_name = self._get_node_text_optimized(child)
411
- break
412
-
413
- if not class_name:
414
- return None
415
-
416
- # Determine package name
417
- package_name = self.current_package
418
- full_qualified_name = (
419
- f"{package_name}.{class_name}" if package_name else class_name
420
- )
421
-
422
- # Determine class type (optimized: dictionary lookup)
423
- class_type_map = {
424
- "class_declaration": "class",
425
- "interface_declaration": "interface",
426
- "enum_declaration": "enum",
427
- }
428
- class_type = class_type_map.get(node.type, "class")
429
-
430
- # Extract modifiers efficiently
431
- modifiers = self._extract_modifiers_optimized(node)
432
- visibility = self._determine_visibility(modifiers)
433
-
434
- # Extract superclass and interfaces (optimized: single pass)
435
- extends_class = None
436
- implements_interfaces = []
437
-
438
- for child in node.children:
439
- if child.type == "superclass":
440
- extends_text = self._get_node_text_optimized(child)
441
- match = re.search(r"\b[A-Z]\w*", extends_text)
442
- if match:
443
- extends_class = match.group(0)
444
- elif child.type == "super_interfaces":
445
- implements_text = self._get_node_text_optimized(child)
446
- implements_interfaces = re.findall(r"\b[A-Z]\w*", implements_text)
447
-
448
- # Extract annotations for this class
449
- class_annotations = self._find_annotations_for_line_cached(start_line)
450
-
451
- # Check if this is a nested class
452
- is_nested = self._is_nested_class(node)
453
- parent_class = self._find_parent_class(node) if is_nested else None
454
-
455
- # Extract raw text
456
- start_line_idx = max(0, start_line - 1)
457
- end_line_idx = min(len(self.content_lines), end_line)
458
- raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
459
-
460
- return Class(
461
- name=class_name,
462
- start_line=start_line,
463
- end_line=end_line,
464
- raw_text=raw_text,
465
- language="java",
466
- class_type=class_type,
467
- full_qualified_name=full_qualified_name,
468
- package_name=package_name,
469
- superclass=extends_class,
470
- interfaces=implements_interfaces,
471
- modifiers=modifiers,
472
- visibility=visibility,
473
- # Java-specific detailed information
474
- annotations=class_annotations,
475
- is_nested=is_nested,
476
- parent_class=parent_class,
477
- extends_class=extends_class, # Alias for superclass
478
- implements_interfaces=implements_interfaces, # Alias for interfaces
479
- )
480
- except (AttributeError, ValueError, TypeError) as e:
481
- log_debug(f"Failed to extract class info: {e}")
482
- return None
483
- except Exception as e:
484
- log_error(f"Unexpected error in class extraction: {e}")
485
- return None
486
-
487
- def _extract_method_optimized(self, node: "tree_sitter.Node") -> Function | None:
488
- """Extract method information optimized (from AdvancedAnalyzer)"""
489
- try:
490
- start_line = node.start_point[0] + 1
491
- end_line = node.end_point[0] + 1
492
-
493
- # Extract method information efficiently
494
- method_info = self._parse_method_signature_optimized(node)
495
- if not method_info:
496
- return None
497
-
498
- method_name, return_type, parameters, modifiers, throws = method_info
499
- is_constructor = node.type == "constructor_declaration"
500
- visibility = self._determine_visibility(modifiers)
501
-
502
- # Extract annotations for this method
503
- method_annotations = self._find_annotations_for_line_cached(start_line)
504
-
505
- # Calculate complexity score
506
- complexity_score = self._calculate_complexity_optimized(node)
507
-
508
- # Extract JavaDoc
509
- javadoc = self._extract_javadoc_for_line(start_line)
510
-
511
- # Extract raw text
512
- start_line_idx = max(0, start_line - 1)
513
- end_line_idx = min(len(self.content_lines), end_line)
514
- raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
515
-
516
- return Function(
517
- name=method_name,
518
- start_line=start_line,
519
- end_line=end_line,
520
- raw_text=raw_text,
521
- language="java",
522
- parameters=parameters,
523
- return_type=return_type if not is_constructor else "void",
524
- modifiers=modifiers,
525
- is_static="static" in modifiers,
526
- is_private="private" in modifiers,
527
- is_public="public" in modifiers,
528
- is_constructor=is_constructor,
529
- visibility=visibility,
530
- docstring=javadoc,
531
- # Java-specific detailed information
532
- annotations=method_annotations,
533
- throws=throws,
534
- complexity_score=complexity_score,
535
- is_abstract="abstract" in modifiers,
536
- is_final="final" in modifiers,
537
- )
538
- except (AttributeError, ValueError, TypeError) as e:
539
- log_debug(f"Failed to extract method info: {e}")
540
- return None
541
- except Exception as e:
542
- log_error(f"Unexpected error in method extraction: {e}")
543
- return None
544
-
545
- def _extract_field_optimized(self, node: "tree_sitter.Node") -> list[Variable]:
546
- """Extract field information optimized (from AdvancedAnalyzer)"""
547
- fields: list[Variable] = []
548
- try:
549
- start_line = node.start_point[0] + 1
550
- end_line = node.end_point[0] + 1
551
-
552
- # Parse field declaration using AdvancedAnalyzer method
553
- field_info = self._parse_field_declaration_optimized(node)
554
- if not field_info:
555
- return fields
556
-
557
- field_type, variable_names, modifiers = field_info
558
- visibility = self._determine_visibility(modifiers)
559
-
560
- # Extract annotations for this field
561
- field_annotations = self._find_annotations_for_line_cached(start_line)
562
-
563
- # Extract JavaDoc for this field
564
- field_javadoc = self._extract_javadoc_for_line(start_line)
565
-
566
- # Create Variable object for each variable (matching AdvancedAnalyzer structure)
567
- for var_name in variable_names:
568
- # Extract raw text
569
- start_line_idx = max(0, start_line - 1)
570
- end_line_idx = min(len(self.content_lines), end_line)
571
- raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
572
-
573
- field = Variable(
574
- name=var_name,
575
- start_line=start_line,
576
- end_line=end_line,
577
- raw_text=raw_text,
578
- language="java",
579
- variable_type=field_type,
580
- modifiers=modifiers,
581
- is_static="static" in modifiers,
582
- is_constant="final" in modifiers,
583
- visibility=visibility,
584
- docstring=field_javadoc,
585
- # Java-specific detailed information
586
- annotations=field_annotations,
587
- is_final="final" in modifiers,
588
- field_type=field_type, # Alias for variable_type
589
- )
590
- fields.append(field)
591
- except (AttributeError, ValueError, TypeError) as e:
592
- log_debug(f"Failed to extract field info: {e}")
593
- except Exception as e:
594
- log_error(f"Unexpected error in field extraction: {e}")
595
-
596
- return fields
597
-
598
- def _parse_method_signature_optimized(
599
- self, node: "tree_sitter.Node"
600
- ) -> tuple[str, str, list[str], list[str], list[str]] | None:
601
- """Parse method signature optimized (from AdvancedAnalyzer)"""
602
- try:
603
- # Extract method name
604
- method_name = None
605
- for child in node.children:
606
- if child.type == "identifier":
607
- method_name = self._get_node_text_optimized(child)
608
- break
609
-
610
- if not method_name:
611
- return None
612
-
613
- # Extract return type
614
- return_type = "void"
615
- for child in node.children:
616
- if child.type in [
617
- "type_identifier",
618
- "void_type",
619
- "primitive_type",
620
- "integral_type",
621
- "boolean_type",
622
- "floating_point_type",
623
- "array_type",
624
- ]:
625
- return_type = self._get_node_text_optimized(child)
626
- break
627
- elif child.type == "generic_type":
628
- return_type = self._get_node_text_optimized(child)
629
- break
630
-
631
- # Extract parameters
632
- parameters = []
633
- for child in node.children:
634
- if child.type == "formal_parameters":
635
- for param in child.children:
636
- if param.type == "formal_parameter":
637
- param_text = self._get_node_text_optimized(param)
638
- parameters.append(param_text)
639
-
640
- # Extract modifiers
641
- modifiers = self._extract_modifiers_optimized(node)
642
-
643
- # Extract throws clause
644
- throws = []
645
- for child in node.children:
646
- if child.type == "throws":
647
- throws_text = self._get_node_text_optimized(child)
648
- exceptions = re.findall(r"\b[A-Z]\w*Exception\b", throws_text)
649
- throws.extend(exceptions)
650
-
651
- return method_name, return_type, parameters, modifiers, throws
652
- except Exception:
653
- return None
654
-
655
- def _parse_field_declaration_optimized(
656
- self, node: "tree_sitter.Node"
657
- ) -> tuple[str, list[str], list[str]] | None:
658
- """Parse field declaration optimized (from AdvancedAnalyzer)"""
659
- try:
660
- # Extract type (exactly as in AdvancedAnalyzer)
661
- field_type = None
662
- for child in node.children:
663
- if child.type in [
664
- "type_identifier",
665
- "primitive_type",
666
- "integral_type",
667
- "generic_type",
668
- "boolean_type",
669
- "floating_point_type",
670
- "array_type",
671
- ]:
672
- field_type = self._get_node_text_optimized(child)
673
- break
674
-
675
- if not field_type:
676
- return None
677
-
678
- # Extract variable names (exactly as in AdvancedAnalyzer)
679
- variable_names = []
680
- for child in node.children:
681
- if child.type == "variable_declarator":
682
- for grandchild in child.children:
683
- if grandchild.type == "identifier":
684
- var_name = self._get_node_text_optimized(grandchild)
685
- variable_names.append(var_name)
686
-
687
- if not variable_names:
688
- return None
689
-
690
- # Extract modifiers (exactly as in AdvancedAnalyzer)
691
- modifiers = self._extract_modifiers_optimized(node)
692
-
693
- return field_type, variable_names, modifiers
694
- except Exception:
695
- return None
696
-
697
- def _extract_modifiers_optimized(self, node: "tree_sitter.Node") -> list[str]:
698
- """Extract modifiers efficiently (from AdvancedAnalyzer)"""
699
- modifiers = []
700
- for child in node.children:
701
- if child.type == "modifiers":
702
- for mod_child in child.children:
703
- if mod_child.type in [
704
- "public",
705
- "private",
706
- "protected",
707
- "static",
708
- "final",
709
- "abstract",
710
- "synchronized",
711
- "volatile",
712
- "transient",
713
- ]:
714
- modifiers.append(mod_child.type)
715
- elif mod_child.type not in ["marker_annotation"]:
716
- mod_text = self._get_node_text_optimized(mod_child)
717
- if mod_text in [
718
- "public",
719
- "private",
720
- "protected",
721
- "static",
722
- "final",
723
- "abstract",
724
- "synchronized",
725
- "volatile",
726
- "transient",
727
- ]:
728
- modifiers.append(mod_text)
729
- return modifiers
730
-
731
- def _extract_package_info(self, node: "tree_sitter.Node") -> None:
732
- """Extract package information (from AdvancedAnalyzer)"""
733
- try:
734
- package_text = self._get_node_text_optimized(node)
735
- match = re.search(r"package\s+([\w.]+)", package_text)
736
- if match:
737
- self.current_package = match.group(1)
738
- except (AttributeError, ValueError, IndexError) as e:
739
- log_debug(f"Failed to extract package info: {e}")
740
- except Exception as e:
741
- log_error(f"Unexpected error in package extraction: {e}")
742
-
743
- def _extract_package_element(self, node: "tree_sitter.Node") -> Package | None:
744
- """Extract package element for inclusion in results"""
745
- try:
746
- package_text = self._get_node_text_optimized(node)
747
- match = re.search(r"package\s+([\w.]+)", package_text)
748
- if match:
749
- package_name = match.group(1)
750
- return Package(
751
- name=package_name,
752
- start_line=node.start_point[0] + 1,
753
- end_line=node.end_point[0] + 1,
754
- raw_text=package_text,
755
- language="java",
756
- )
757
- except (AttributeError, ValueError, IndexError) as e:
758
- log_debug(f"Failed to extract package element: {e}")
759
- except Exception as e:
760
- log_error(f"Unexpected error in package element extraction: {e}")
761
- return None
762
-
763
- def _extract_package_from_tree(self, tree: "tree_sitter.Tree") -> None:
764
- """
765
- Extract package information from the tree and set current_package.
766
-
767
- This method ensures that package information is available for class extraction
768
- regardless of the order in which extraction methods are called.
769
- """
770
- try:
771
- # Look for package declaration in the root node's children
772
- for child in tree.root_node.children:
773
- if child.type == "package_declaration":
774
- self._extract_package_info(child)
775
- break # Only one package declaration per file
776
- except Exception as e:
777
- log_debug(f"Failed to extract package from tree: {e}")
778
-
779
- def _determine_visibility(self, modifiers: list[str]) -> str:
780
- """Determine visibility from modifiers"""
781
- if "public" in modifiers:
782
- return "public"
783
- elif "private" in modifiers:
784
- return "private"
785
- elif "protected" in modifiers:
786
- return "protected"
787
- else:
788
- return "package" # Default package visibility
789
-
790
- def _find_annotations_for_line_cached(
791
- self, target_line: int
792
- ) -> list[dict[str, Any]]:
793
- """Find annotations for specified line with caching (from AdvancedAnalyzer)"""
794
- if target_line in self._annotation_cache:
795
- return self._annotation_cache[target_line]
796
-
797
- result_annotations = []
798
- for annotation in self.annotations:
799
- line_distance = target_line - annotation.get("end_line", 0)
800
- if 1 <= line_distance <= 5:
801
- result_annotations.append(annotation)
802
-
803
- self._annotation_cache[target_line] = result_annotations
804
- return result_annotations
805
-
806
- def _calculate_complexity_optimized(self, node: "tree_sitter.Node") -> int:
807
- """Calculate cyclomatic complexity efficiently (from AdvancedAnalyzer)"""
808
- complexity = 1
809
- try:
810
- node_text = self._get_node_text_optimized(node).lower()
811
- keywords = ["if", "while", "for", "catch", "case", "switch"]
812
- for keyword in keywords:
813
- complexity += node_text.count(keyword)
814
- except (AttributeError, TypeError) as e:
815
- log_debug(f"Failed to calculate complexity: {e}")
816
- except Exception as e:
817
- log_error(f"Unexpected error in complexity calculation: {e}")
818
- return complexity
819
-
820
- def _extract_javadoc_for_line(self, target_line: int) -> str | None:
821
- """Extract JavaDoc comment immediately before the specified line"""
822
- try:
823
- if not self.content_lines or target_line <= 1:
824
- return None
825
-
826
- # Search backwards from target_line
827
- javadoc_lines = []
828
- current_line = target_line - 1
829
-
830
- # Skip empty lines
831
- while current_line > 0:
832
- line = self.content_lines[current_line - 1].strip()
833
- if line:
834
- break
835
- current_line -= 1
836
-
837
- # Check for JavaDoc end
838
- if current_line > 0:
839
- line = self.content_lines[current_line - 1].strip()
840
- if line.endswith("*/"):
841
- # This might be a JavaDoc comment
842
- javadoc_lines.append(self.content_lines[current_line - 1])
843
- current_line -= 1
844
-
845
- # Collect JavaDoc content
846
- while current_line > 0:
847
- line_content = self.content_lines[current_line - 1]
848
- line_stripped = line_content.strip()
849
- javadoc_lines.append(line_content)
850
-
851
- if line_stripped.startswith("/**"):
852
- # Found the start of JavaDoc
853
- javadoc_lines.reverse()
854
- javadoc_text = "\n".join(javadoc_lines)
855
-
856
- # Clean up the JavaDoc
857
- return self._clean_javadoc(javadoc_text)
858
- current_line -= 1
859
-
860
- return None
861
-
862
- except Exception as e:
863
- log_debug(f"Failed to extract JavaDoc: {e}")
864
- return None
865
-
866
- def _clean_javadoc(self, javadoc_text: str) -> str:
867
- """Clean JavaDoc text by removing comment markers"""
868
- if not javadoc_text:
869
- return ""
870
-
871
- lines = javadoc_text.split("\n")
872
- cleaned_lines = []
873
-
874
- for line in lines:
875
- # Remove leading/trailing whitespace
876
- line = line.strip()
877
-
878
- # Remove comment markers
879
- if line.startswith("/**"):
880
- line = line[3:].strip()
881
- elif line.startswith("*/"):
882
- line = line[2:].strip()
883
- elif line.startswith("*"):
884
- line = line[1:].strip()
885
-
886
- if line: # Only add non-empty lines
887
- cleaned_lines.append(line)
888
-
889
- return " ".join(cleaned_lines) if cleaned_lines else ""
890
-
891
- def _is_nested_class(self, node: "tree_sitter.Node") -> bool:
892
- """Check if this is a nested class (from AdvancedAnalyzer)"""
893
- current = node.parent
894
- while current:
895
- if current.type in [
896
- "class_declaration",
897
- "interface_declaration",
898
- "enum_declaration",
899
- ]:
900
- return True
901
- current = current.parent
902
- return False
903
-
904
- def _find_parent_class(self, node: "tree_sitter.Node") -> str | None:
905
- """Find parent class name (from AdvancedAnalyzer)"""
906
- current = node.parent
907
- while current:
908
- if current.type in [
909
- "class_declaration",
910
- "interface_declaration",
911
- "enum_declaration",
912
- ]:
913
- return self._extract_class_name(current)
914
- current = current.parent
915
- return None
916
-
917
- def _extract_class_name(self, node: "tree_sitter.Node") -> str | None:
918
- """Extract class name from node (from AdvancedAnalyzer)"""
919
- for child in node.children:
920
- if child.type == "identifier":
921
- return self._get_node_text_optimized(child)
922
- return None
923
-
924
- def _extract_annotation_optimized(
925
- self, node: "tree_sitter.Node"
926
- ) -> dict[str, Any] | None:
927
- """Extract annotation information optimized (from AdvancedAnalyzer)"""
928
- try:
929
- start_line = node.start_point[0] + 1
930
- end_line = node.end_point[0] + 1
931
- raw_text = self._get_node_text_optimized(node)
932
-
933
- # Extract annotation name efficiently
934
- name_match = re.search(r"@(\w+)", raw_text)
935
- if not name_match:
936
- return None
937
-
938
- annotation_name = name_match.group(1)
939
-
940
- # Extract parameters efficiently
941
- parameters = []
942
- param_match = re.search(r"\((.*?)\)", raw_text, re.DOTALL)
943
- if param_match:
944
- param_text = param_match.group(1).strip()
945
- if param_text:
946
- # Simple parameter parsing
947
- if "=" in param_text:
948
- parameters = [
949
- p.strip() for p in re.split(r",(?![^()]*\))", param_text)
950
- ]
951
- else:
952
- parameters = [param_text]
953
-
954
- return {
955
- "name": annotation_name,
956
- "parameters": parameters,
957
- "start_line": start_line,
958
- "end_line": end_line,
959
- "raw_text": raw_text,
960
- }
961
- except (AttributeError, IndexError, ValueError) as e:
962
- log_debug(f"Failed to extract annotation from node: {e}")
963
- return None
964
- except Exception as e:
965
- log_error(f"Unexpected exception in annotation extraction: {e}")
966
- return None
967
-
968
- def _extract_import_info(
969
- self, node: "tree_sitter.Node", source_code: str
970
- ) -> Import | None:
971
- """Extract import information (from AdvancedAnalyzer)"""
972
- try:
973
- import_text = self._get_node_text_optimized(node)
974
- # Simple approach: get everything until semicolon then process
975
- import_content = import_text.strip()
976
- if import_content.endswith(";"):
977
- import_content = import_content[:-1]
978
-
979
- if "static" in import_content:
980
- # Static import
981
- static_match = re.search(r"import\s+static\s+([\w.]+)", import_content)
982
- if static_match:
983
- import_name = static_match.group(1)
984
- # Handle wildcard case
985
- if import_content.endswith(".*"):
986
- import_name = import_name.replace(".*", "")
987
- # For static wildcard, remove last element
988
- parts = import_name.split(".")
989
- if len(parts) > 1:
990
- import_name = ".".join(parts[:-1])
991
-
992
- return Import(
993
- name=import_name,
994
- start_line=node.start_point[0] + 1,
995
- end_line=node.end_point[0] + 1,
996
- raw_text=import_text,
997
- language="java",
998
- module_name=import_name,
999
- is_static=True,
1000
- is_wildcard=import_content.endswith(".*"),
1001
- import_statement=import_content,
1002
- )
1003
- else:
1004
- # Normal import
1005
- normal_match = re.search(r"import\s+([\w.]+)", import_content)
1006
- if normal_match:
1007
- import_name = normal_match.group(1)
1008
- # Handle wildcard case
1009
- if import_content.endswith(".*"):
1010
- if import_name.endswith(".*"):
1011
- import_name = import_name[:-2] # Remove trailing .*
1012
- elif import_name.endswith("."):
1013
- import_name = import_name[:-1] # Remove trailing .
1014
-
1015
- return Import(
1016
- name=import_name,
1017
- start_line=node.start_point[0] + 1,
1018
- end_line=node.end_point[0] + 1,
1019
- raw_text=import_text,
1020
- language="java",
1021
- module_name=import_name,
1022
- is_static=False,
1023
- is_wildcard=import_content.endswith(".*"),
1024
- import_statement=import_content,
1025
- )
1026
- except (AttributeError, ValueError, IndexError) as e:
1027
- log_debug(f"Failed to extract import info: {e}")
1028
- except Exception as e:
1029
- log_error(f"Unexpected error in import extraction: {e}")
1030
- return None
1031
-
1032
-
1033
- class JavaPlugin(LanguagePlugin):
1034
- """Java language plugin for the new architecture"""
1035
-
1036
- def __init__(self) -> None:
1037
- """Initialize the Java plugin"""
1038
- super().__init__()
1039
- self._language_cache: tree_sitter.Language | None = None
1040
-
1041
- def get_language_name(self) -> str:
1042
- """Return the name of the programming language this plugin supports"""
1043
- return "java"
1044
-
1045
- def get_file_extensions(self) -> list[str]:
1046
- """Return list of file extensions this plugin supports"""
1047
- return [".java", ".jsp", ".jspx"]
1048
-
1049
- def create_extractor(self) -> ElementExtractor:
1050
- """Create and return an element extractor for this language"""
1051
- return JavaElementExtractor()
1052
-
1053
- def get_tree_sitter_language(self) -> Optional["tree_sitter.Language"]:
1054
- """Get the Tree-sitter language object for Java"""
1055
- if self._language_cache is None:
1056
- try:
1057
- import tree_sitter_java as tsjava
1058
-
1059
- self._language_cache = tsjava.language() # type: ignore
1060
- except ImportError:
1061
- log_error("tree-sitter-java not available")
1062
- return None
1063
- except Exception as e:
1064
- log_error(f"Failed to load Java language: {e}")
1065
- return None
1066
- return self._language_cache
1067
-
1068
- def get_supported_queries(self) -> list[str]:
1069
- """Get list of supported query names for this language"""
1070
- return ["class", "method", "field", "import"]
1071
-
1072
- def is_applicable(self, file_path: str) -> bool:
1073
- """Check if this plugin is applicable for the given file"""
1074
- return any(
1075
- file_path.lower().endswith(ext.lower())
1076
- for ext in self.get_file_extensions()
1077
- )
1078
-
1079
- def get_plugin_info(self) -> dict:
1080
- """Get information about this plugin"""
1081
- return {
1082
- "name": "Java Plugin",
1083
- "language": self.get_language_name(),
1084
- "extensions": self.get_file_extensions(),
1085
- "version": "2.0.0",
1086
- "supported_queries": self.get_supported_queries(),
1087
- }
1088
-
1089
- async def analyze_file(
1090
- self, file_path: str, request: "AnalysisRequest"
1091
- ) -> "AnalysisResult":
1092
- """
1093
- Analyze a Java file and return analysis results.
1094
-
1095
- Args:
1096
- file_path: Path to the Java file to analyze
1097
- request: Analysis request object
1098
-
1099
- Returns:
1100
- AnalysisResult object containing the analysis results
1101
- """
1102
- try:
1103
- from ..core.parser import Parser
1104
- from ..models import AnalysisResult
1105
-
1106
- log_debug(f"Java Plugin: Starting analysis of {file_path}")
1107
-
1108
- # Read file content
1109
- with open(file_path, encoding="utf-8") as f:
1110
- source_code = f.read()
1111
-
1112
- log_debug(f"Java Plugin: Read {len(source_code)} characters from file")
1113
-
1114
- # Parse the file
1115
- parser = Parser()
1116
- parse_result = parser.parse_code(source_code, "java")
1117
-
1118
- log_debug(f"Java Plugin: Parse result success: {parse_result.success}")
1119
-
1120
- if not parse_result.success:
1121
- log_error(f"Java Plugin: Parse failed: {parse_result.error_message}")
1122
- return AnalysisResult(
1123
- file_path=file_path,
1124
- language="java",
1125
- line_count=len(source_code.splitlines()),
1126
- elements=[],
1127
- node_count=0,
1128
- query_results={},
1129
- source_code=source_code,
1130
- success=False,
1131
- error_message=parse_result.error_message,
1132
- )
1133
-
1134
- # Extract elements
1135
- extractor = self.create_extractor()
1136
-
1137
- if parse_result.tree:
1138
- log_debug("Java Plugin: Extracting packages...")
1139
- packages = extractor.extract_packages(parse_result.tree, source_code)
1140
- log_debug(f"Java Plugin: Found {len(packages)} packages")
1141
-
1142
- log_debug("Java Plugin: Extracting functions...")
1143
- functions = extractor.extract_functions(parse_result.tree, source_code)
1144
- log_debug(f"Java Plugin: Found {len(functions)} functions")
1145
-
1146
- log_debug("Java Plugin: Extracting classes...")
1147
- classes = extractor.extract_classes(parse_result.tree, source_code)
1148
- log_debug(f"Java Plugin: Found {len(classes)} classes")
1149
-
1150
- log_debug("Java Plugin: Extracting variables...")
1151
- variables = extractor.extract_variables(parse_result.tree, source_code)
1152
- log_debug(f"Java Plugin: Found {len(variables)} variables")
1153
-
1154
- log_debug("Java Plugin: Extracting imports...")
1155
- imports = extractor.extract_imports(parse_result.tree, source_code)
1156
- log_debug(f"Java Plugin: Found {len(imports)} imports")
1157
- else:
1158
- packages = []
1159
- functions = []
1160
- classes = []
1161
- variables = []
1162
- imports = []
1163
-
1164
- # Combine all elements
1165
- all_elements: list[CodeElement] = []
1166
- all_elements.extend(packages)
1167
- all_elements.extend(functions)
1168
- all_elements.extend(classes)
1169
- all_elements.extend(variables)
1170
- all_elements.extend(imports)
1171
- log_debug(f"Java Plugin: Total elements: {len(all_elements)}")
1172
-
1173
- return AnalysisResult(
1174
- file_path=file_path,
1175
- language="java",
1176
- line_count=len(source_code.splitlines()),
1177
- elements=all_elements,
1178
- node_count=(
1179
- parse_result.tree.root_node.child_count if parse_result.tree else 0
1180
- ),
1181
- query_results={},
1182
- source_code=source_code,
1183
- success=True,
1184
- error_message=None,
1185
- )
1186
-
1187
- except Exception as e:
1188
- log_error(f"Failed to analyze Java file {file_path}: {e}")
1189
- import traceback
1190
-
1191
- log_error(f"Java Plugin traceback: {traceback.format_exc()}")
1192
- return AnalysisResult(
1193
- file_path=file_path,
1194
- language="java",
1195
- line_count=0,
1196
- elements=[],
1197
- node_count=0,
1198
- query_results={},
1199
- source_code="",
1200
- success=False,
1201
- error_message=str(e),
1202
- )
1
+ #!/usr/bin/env python3
2
+ """
3
+ Java Language Plugin
4
+
5
+ Provides Java-specific parsing and element extraction functionality.
6
+ Migrated from AdvancedAnalyzer implementation for future independence.
7
+ """
8
+
9
+ import re
10
+ from typing import TYPE_CHECKING, Any, Optional
11
+
12
+ if TYPE_CHECKING:
13
+ import tree_sitter
14
+
15
+ from ..core.analysis_engine import AnalysisRequest
16
+ from ..models import AnalysisResult
17
+
18
+ from ..encoding_utils import extract_text_slice, safe_encode
19
+ from ..models import Class, CodeElement, Function, Import, Package, Variable
20
+ from ..plugins.base import ElementExtractor, LanguagePlugin
21
+ from ..utils import log_debug, log_error, log_warning
22
+
23
+
24
+ class JavaElementExtractor(ElementExtractor):
25
+ """Java-specific element extractor with AdvancedAnalyzer implementation"""
26
+
27
+ def __init__(self) -> None:
28
+ """Initialize the Java element extractor."""
29
+ self.current_package: str = ""
30
+ self.current_file: str = ""
31
+ self.source_code: str = ""
32
+ self.content_lines: list[str] = []
33
+ self.imports: list[str] = []
34
+
35
+ # Performance optimization caches (from AdvancedAnalyzer)
36
+ self._node_text_cache: dict[int, str] = {}
37
+ self._processed_nodes: set[int] = set()
38
+ self._element_cache: dict[tuple[int, str], Any] = {}
39
+ self._file_encoding: str | None = None
40
+ self._annotation_cache: dict[int, list[dict[str, Any]]] = {}
41
+ self._signature_cache: dict[int, str] = {}
42
+
43
+ # Extracted annotations for cross-referencing
44
+ self.annotations: list[dict[str, Any]] = []
45
+
46
+ def extract_annotations(
47
+ self, tree: "tree_sitter.Tree", source_code: str
48
+ ) -> list[dict[str, Any]]:
49
+ """Extract Java annotations using AdvancedAnalyzer implementation"""
50
+ self.source_code = source_code
51
+ self.content_lines = source_code.split("\n")
52
+ self._reset_caches()
53
+
54
+ annotations: list[dict[str, Any]] = []
55
+
56
+ # Use AdvancedAnalyzer's optimized traversal for annotations
57
+ extractors = {
58
+ "annotation": self._extract_annotation_optimized,
59
+ "marker_annotation": self._extract_annotation_optimized,
60
+ }
61
+
62
+ self._traverse_and_extract_iterative(
63
+ tree.root_node, extractors, annotations, "annotation"
64
+ )
65
+
66
+ # Store annotations for cross-referencing
67
+ self.annotations = annotations
68
+
69
+ log_debug(f"Extracted {len(annotations)} annotations")
70
+ return annotations
71
+
72
+ def extract_functions(
73
+ self, tree: "tree_sitter.Tree", source_code: str
74
+ ) -> list[Function]:
75
+ """Extract Java method definitions using AdvancedAnalyzer implementation"""
76
+ self.source_code = source_code
77
+ self.content_lines = source_code.split("\n")
78
+ self._reset_caches()
79
+
80
+ functions: list[Function] = []
81
+
82
+ # Use AdvancedAnalyzer's optimized traversal
83
+ extractors = {
84
+ "method_declaration": self._extract_method_optimized,
85
+ "constructor_declaration": self._extract_method_optimized,
86
+ }
87
+
88
+ self._traverse_and_extract_iterative(
89
+ tree.root_node, extractors, functions, "method"
90
+ )
91
+
92
+ log_debug(f"Extracted {len(functions)} methods")
93
+ return functions
94
+
95
+ def extract_classes(
96
+ self, tree: "tree_sitter.Tree", source_code: str
97
+ ) -> list[Class]:
98
+ """Extract Java class definitions using AdvancedAnalyzer implementation"""
99
+ self.source_code = source_code
100
+ self.content_lines = source_code.split("\n")
101
+ self._reset_caches()
102
+
103
+ # Ensure package information is extracted before processing classes
104
+ # This fixes the issue where current_package is empty when extract_classes
105
+ # is called independently or before extract_imports
106
+ if not self.current_package:
107
+ self._extract_package_from_tree(tree)
108
+
109
+ classes: list[Class] = []
110
+
111
+ # Use AdvancedAnalyzer's optimized traversal
112
+ extractors = {
113
+ "class_declaration": self._extract_class_optimized,
114
+ "interface_declaration": self._extract_class_optimized,
115
+ "enum_declaration": self._extract_class_optimized,
116
+ }
117
+
118
+ self._traverse_and_extract_iterative(
119
+ tree.root_node, extractors, classes, "class"
120
+ )
121
+
122
+ log_debug(f"Extracted {len(classes)} classes")
123
+ return classes
124
+
125
+ def extract_variables(
126
+ self, tree: "tree_sitter.Tree", source_code: str
127
+ ) -> list[Variable]:
128
+ """Extract Java field definitions using AdvancedAnalyzer implementation"""
129
+ self.source_code = source_code
130
+ self.content_lines = source_code.split("\n")
131
+ self._reset_caches()
132
+
133
+ variables: list[Variable] = []
134
+
135
+ # Use AdvancedAnalyzer's optimized traversal
136
+ extractors = {
137
+ "field_declaration": self._extract_field_optimized,
138
+ }
139
+
140
+ log_debug("Starting field extraction with iterative traversal")
141
+ self._traverse_and_extract_iterative(
142
+ tree.root_node, extractors, variables, "field"
143
+ )
144
+
145
+ log_debug(f"Extracted {len(variables)} fields")
146
+ for i, var in enumerate(variables[:3]):
147
+ log_debug(f"Field {i}: {var.name} ({var.variable_type})")
148
+ return variables
149
+
150
+ def extract_imports(
151
+ self, tree: "tree_sitter.Tree", source_code: str
152
+ ) -> list[Import]:
153
+ """Extract Java import statements"""
154
+ self.source_code = source_code
155
+ self.content_lines = source_code.split("\n")
156
+
157
+ imports: list[Import] = []
158
+
159
+ # Extract package and imports efficiently (from AdvancedAnalyzer)
160
+ for child in tree.root_node.children:
161
+ if child.type == "package_declaration":
162
+ self._extract_package_info(child)
163
+ elif child.type == "import_declaration":
164
+ import_info = self._extract_import_info(child, source_code)
165
+ if import_info:
166
+ imports.append(import_info)
167
+ elif child.type in [
168
+ "class_declaration",
169
+ "interface_declaration",
170
+ "enum_declaration",
171
+ ]:
172
+ # After package and imports come class declarations, so stop
173
+ break
174
+
175
+ log_debug(f"Extracted {len(imports)} imports")
176
+ return imports
177
+
178
+ def extract_packages(
179
+ self, tree: "tree_sitter.Tree", source_code: str
180
+ ) -> list[Package]:
181
+ """Extract Java package declarations"""
182
+ self.source_code = source_code
183
+ self.content_lines = source_code.split("\n")
184
+
185
+ packages: list[Package] = []
186
+
187
+ # Extract package declaration
188
+ for child in tree.root_node.children:
189
+ if child.type == "package_declaration":
190
+ package_info = self._extract_package_element(child)
191
+ if package_info:
192
+ packages.append(package_info)
193
+ break # Only one package declaration per file
194
+
195
+ log_debug(f"Extracted {len(packages)} packages")
196
+ return packages
197
+
198
+ def _reset_caches(self) -> None:
199
+ """Reset performance caches"""
200
+ self._node_text_cache.clear()
201
+ self._processed_nodes.clear()
202
+ self._element_cache.clear()
203
+ self._annotation_cache.clear()
204
+ self._signature_cache.clear()
205
+ self.annotations.clear()
206
+
207
+ def _traverse_and_extract_iterative(
208
+ self,
209
+ root_node: "tree_sitter.Node",
210
+ extractors: dict[str, Any],
211
+ results: list[Any],
212
+ element_type: str,
213
+ ) -> None:
214
+ """
215
+ Iterative node traversal and extraction (from AdvancedAnalyzer)
216
+ Uses batch processing for optimal performance
217
+ """
218
+ if not root_node:
219
+ return # type: ignore[unreachable]
220
+
221
+ # Target node types for extraction
222
+ target_node_types = set(extractors.keys())
223
+
224
+ # Container node types that may contain target nodes (from AdvancedAnalyzer)
225
+ container_node_types = {
226
+ "program",
227
+ "class_body",
228
+ "interface_body",
229
+ "enum_body",
230
+ "class_declaration",
231
+ "interface_declaration",
232
+ "enum_declaration",
233
+ "method_declaration",
234
+ "constructor_declaration",
235
+ "block",
236
+ "modifiers", # Annotation nodes can appear inside modifiers
237
+ }
238
+
239
+ # Iterative DFS stack: (node, depth)
240
+ node_stack = [(root_node, 0)]
241
+ processed_nodes = 0
242
+ max_depth = 50 # Prevent infinite loops
243
+
244
+ # Batch processing containers (from AdvancedAnalyzer)
245
+ field_batch = []
246
+
247
+ while node_stack:
248
+ current_node, depth = node_stack.pop()
249
+
250
+ # Safety check for maximum depth
251
+ if depth > max_depth:
252
+ log_warning(f"Maximum traversal depth ({max_depth}) exceeded")
253
+ continue
254
+
255
+ processed_nodes += 1
256
+ node_type = current_node.type
257
+
258
+ # Early termination: skip nodes that don't contain target elements
259
+ if (
260
+ depth > 0
261
+ and node_type not in target_node_types
262
+ and node_type not in container_node_types
263
+ ):
264
+ continue
265
+
266
+ # Collect target nodes for batch processing (from AdvancedAnalyzer)
267
+ if node_type in target_node_types:
268
+ if element_type == "field" and node_type == "field_declaration":
269
+ field_batch.append(current_node)
270
+ else:
271
+ # Process non-field elements immediately
272
+ node_id = id(current_node)
273
+
274
+ # Skip if already processed
275
+ if node_id in self._processed_nodes:
276
+ continue
277
+
278
+ # Check element cache first
279
+ cache_key = (node_id, element_type)
280
+ if cache_key in self._element_cache:
281
+ element = self._element_cache[cache_key]
282
+ if element:
283
+ if isinstance(element, list):
284
+ results.extend(element)
285
+ else:
286
+ results.append(element)
287
+ self._processed_nodes.add(node_id)
288
+ continue
289
+
290
+ # Extract and cache
291
+ extractor = extractors.get(node_type)
292
+ if extractor:
293
+ element = extractor(current_node)
294
+ self._element_cache[cache_key] = element
295
+ if element:
296
+ if isinstance(element, list):
297
+ results.extend(element)
298
+ else:
299
+ results.append(element)
300
+ self._processed_nodes.add(node_id)
301
+
302
+ # Add children to stack (reversed for correct DFS traversal)
303
+ if current_node.children:
304
+ for child in reversed(current_node.children):
305
+ node_stack.append((child, depth + 1))
306
+
307
+ # Process field batch when it reaches optimal size (from AdvancedAnalyzer)
308
+ if len(field_batch) >= 10:
309
+ self._process_field_batch(field_batch, extractors, results)
310
+ field_batch.clear()
311
+
312
+ # Process remaining field batch (from AdvancedAnalyzer)
313
+ if field_batch:
314
+ self._process_field_batch(field_batch, extractors, results)
315
+
316
+ log_debug(f"Iterative traversal processed {processed_nodes} nodes")
317
+
318
+ def _process_field_batch(
319
+ self, batch: list["tree_sitter.Node"], extractors: dict, results: list[Any]
320
+ ) -> None:
321
+ """Process field nodes with caching (from AdvancedAnalyzer)"""
322
+ for node in batch:
323
+ node_id = id(node)
324
+
325
+ # Skip if already processed
326
+ if node_id in self._processed_nodes:
327
+ continue
328
+
329
+ # Check element cache first
330
+ cache_key = (node_id, "field")
331
+ if cache_key in self._element_cache:
332
+ elements = self._element_cache[cache_key]
333
+ if elements:
334
+ if isinstance(elements, list):
335
+ results.extend(elements)
336
+ else:
337
+ results.append(elements)
338
+ self._processed_nodes.add(node_id)
339
+ continue
340
+
341
+ # Extract and cache
342
+ extractor = extractors.get(node.type)
343
+ if extractor:
344
+ elements = extractor(node)
345
+ self._element_cache[cache_key] = elements
346
+ if elements:
347
+ if isinstance(elements, list):
348
+ results.extend(elements)
349
+ else:
350
+ results.append(elements)
351
+ self._processed_nodes.add(node_id)
352
+
353
+ def _get_node_text_optimized(self, node: "tree_sitter.Node") -> str:
354
+ """Get node text with optimized caching (from AdvancedAnalyzer)"""
355
+ node_id = id(node)
356
+
357
+ # Check cache first
358
+ if node_id in self._node_text_cache:
359
+ return self._node_text_cache[node_id]
360
+
361
+ try:
362
+ # Use encoding utilities for text extraction
363
+ start_byte = node.start_byte
364
+ end_byte = node.end_byte
365
+
366
+ encoding = self._file_encoding or "utf-8"
367
+ content_bytes = safe_encode("\n".join(self.content_lines), encoding)
368
+ text = extract_text_slice(content_bytes, start_byte, end_byte, encoding)
369
+
370
+ self._node_text_cache[node_id] = text
371
+ return text
372
+ except Exception as e:
373
+ log_error(f"Error in _get_node_text_optimized: {e}")
374
+ # Fallback to simple text extraction
375
+ try:
376
+ start_point = node.start_point
377
+ end_point = node.end_point
378
+
379
+ if start_point[0] == end_point[0]:
380
+ # Single line
381
+ line = self.content_lines[start_point[0]]
382
+ return line[start_point[1] : end_point[1]]
383
+ else:
384
+ # Multiple lines
385
+ lines = []
386
+ for i in range(start_point[0], end_point[0] + 1):
387
+ if i < len(self.content_lines):
388
+ line = self.content_lines[i]
389
+ if i == start_point[0]:
390
+ lines.append(line[start_point[1] :])
391
+ elif i == end_point[0]:
392
+ lines.append(line[: end_point[1]])
393
+ else:
394
+ lines.append(line)
395
+ return "\n".join(lines)
396
+ except Exception as fallback_error:
397
+ log_error(f"Fallback text extraction also failed: {fallback_error}")
398
+ return ""
399
+
400
+ def _extract_class_optimized(self, node: "tree_sitter.Node") -> Class | None:
401
+ """Extract class information optimized (from AdvancedAnalyzer)"""
402
+ try:
403
+ start_line = node.start_point[0] + 1
404
+ end_line = node.end_point[0] + 1
405
+
406
+ # Extract class name efficiently
407
+ class_name = None
408
+ for child in node.children:
409
+ if child.type == "identifier":
410
+ class_name = self._get_node_text_optimized(child)
411
+ break
412
+
413
+ if not class_name:
414
+ return None
415
+
416
+ # Determine package name
417
+ package_name = self.current_package
418
+ full_qualified_name = (
419
+ f"{package_name}.{class_name}" if package_name else class_name
420
+ )
421
+
422
+ # Determine class type (optimized: dictionary lookup)
423
+ class_type_map = {
424
+ "class_declaration": "class",
425
+ "interface_declaration": "interface",
426
+ "enum_declaration": "enum",
427
+ }
428
+ class_type = class_type_map.get(node.type, "class")
429
+
430
+ # Extract modifiers efficiently
431
+ modifiers = self._extract_modifiers_optimized(node)
432
+ visibility = self._determine_visibility(modifiers)
433
+
434
+ # Extract superclass and interfaces (optimized: single pass)
435
+ extends_class = None
436
+ implements_interfaces = []
437
+
438
+ for child in node.children:
439
+ if child.type == "superclass":
440
+ extends_text = self._get_node_text_optimized(child)
441
+ match = re.search(r"\b[A-Z]\w*", extends_text)
442
+ if match:
443
+ extends_class = match.group(0)
444
+ elif child.type == "super_interfaces":
445
+ implements_text = self._get_node_text_optimized(child)
446
+ implements_interfaces = re.findall(r"\b[A-Z]\w*", implements_text)
447
+
448
+ # Extract annotations for this class
449
+ class_annotations = self._find_annotations_for_line_cached(start_line)
450
+
451
+ # Check if this is a nested class
452
+ is_nested = self._is_nested_class(node)
453
+ parent_class = self._find_parent_class(node) if is_nested else None
454
+
455
+ # Extract raw text
456
+ start_line_idx = max(0, start_line - 1)
457
+ end_line_idx = min(len(self.content_lines), end_line)
458
+ raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
459
+
460
+ return Class(
461
+ name=class_name,
462
+ start_line=start_line,
463
+ end_line=end_line,
464
+ raw_text=raw_text,
465
+ language="java",
466
+ class_type=class_type,
467
+ full_qualified_name=full_qualified_name,
468
+ package_name=package_name,
469
+ superclass=extends_class,
470
+ interfaces=implements_interfaces,
471
+ modifiers=modifiers,
472
+ visibility=visibility,
473
+ # Java-specific detailed information
474
+ annotations=class_annotations,
475
+ is_nested=is_nested,
476
+ parent_class=parent_class,
477
+ extends_class=extends_class, # Alias for superclass
478
+ implements_interfaces=implements_interfaces, # Alias for interfaces
479
+ )
480
+ except (AttributeError, ValueError, TypeError) as e:
481
+ log_debug(f"Failed to extract class info: {e}")
482
+ return None
483
+ except Exception as e:
484
+ log_error(f"Unexpected error in class extraction: {e}")
485
+ return None
486
+
487
+ def _extract_method_optimized(self, node: "tree_sitter.Node") -> Function | None:
488
+ """Extract method information optimized (from AdvancedAnalyzer)"""
489
+ try:
490
+ start_line = node.start_point[0] + 1
491
+ end_line = node.end_point[0] + 1
492
+
493
+ # Extract method information efficiently
494
+ method_info = self._parse_method_signature_optimized(node)
495
+ if not method_info:
496
+ return None
497
+
498
+ method_name, return_type, parameters, modifiers, throws = method_info
499
+ is_constructor = node.type == "constructor_declaration"
500
+ visibility = self._determine_visibility(modifiers)
501
+
502
+ # Extract annotations for this method
503
+ method_annotations = self._find_annotations_for_line_cached(start_line)
504
+
505
+ # Calculate complexity score
506
+ complexity_score = self._calculate_complexity_optimized(node)
507
+
508
+ # Extract JavaDoc
509
+ javadoc = self._extract_javadoc_for_line(start_line)
510
+
511
+ # Extract raw text
512
+ start_line_idx = max(0, start_line - 1)
513
+ end_line_idx = min(len(self.content_lines), end_line)
514
+ raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
515
+
516
+ return Function(
517
+ name=method_name,
518
+ start_line=start_line,
519
+ end_line=end_line,
520
+ raw_text=raw_text,
521
+ language="java",
522
+ parameters=parameters,
523
+ return_type=return_type if not is_constructor else "void",
524
+ modifiers=modifiers,
525
+ is_static="static" in modifiers,
526
+ is_private="private" in modifiers,
527
+ is_public="public" in modifiers,
528
+ is_constructor=is_constructor,
529
+ visibility=visibility,
530
+ docstring=javadoc,
531
+ # Java-specific detailed information
532
+ annotations=method_annotations,
533
+ throws=throws,
534
+ complexity_score=complexity_score,
535
+ is_abstract="abstract" in modifiers,
536
+ is_final="final" in modifiers,
537
+ )
538
+ except (AttributeError, ValueError, TypeError) as e:
539
+ log_debug(f"Failed to extract method info: {e}")
540
+ return None
541
+ except Exception as e:
542
+ log_error(f"Unexpected error in method extraction: {e}")
543
+ return None
544
+
545
+ def _extract_field_optimized(self, node: "tree_sitter.Node") -> list[Variable]:
546
+ """Extract field information optimized (from AdvancedAnalyzer)"""
547
+ fields: list[Variable] = []
548
+ try:
549
+ start_line = node.start_point[0] + 1
550
+ end_line = node.end_point[0] + 1
551
+
552
+ # Parse field declaration using AdvancedAnalyzer method
553
+ field_info = self._parse_field_declaration_optimized(node)
554
+ if not field_info:
555
+ return fields
556
+
557
+ field_type, variable_names, modifiers = field_info
558
+ visibility = self._determine_visibility(modifiers)
559
+
560
+ # Extract annotations for this field
561
+ field_annotations = self._find_annotations_for_line_cached(start_line)
562
+
563
+ # Extract JavaDoc for this field
564
+ field_javadoc = self._extract_javadoc_for_line(start_line)
565
+
566
+ # Create Variable object for each variable (matching AdvancedAnalyzer structure)
567
+ for var_name in variable_names:
568
+ # Extract raw text
569
+ start_line_idx = max(0, start_line - 1)
570
+ end_line_idx = min(len(self.content_lines), end_line)
571
+ raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
572
+
573
+ field = Variable(
574
+ name=var_name,
575
+ start_line=start_line,
576
+ end_line=end_line,
577
+ raw_text=raw_text,
578
+ language="java",
579
+ variable_type=field_type,
580
+ modifiers=modifiers,
581
+ is_static="static" in modifiers,
582
+ is_constant="final" in modifiers,
583
+ visibility=visibility,
584
+ docstring=field_javadoc,
585
+ # Java-specific detailed information
586
+ annotations=field_annotations,
587
+ is_final="final" in modifiers,
588
+ field_type=field_type, # Alias for variable_type
589
+ )
590
+ fields.append(field)
591
+ except (AttributeError, ValueError, TypeError) as e:
592
+ log_debug(f"Failed to extract field info: {e}")
593
+ except Exception as e:
594
+ log_error(f"Unexpected error in field extraction: {e}")
595
+
596
+ return fields
597
+
598
+ def _parse_method_signature_optimized(
599
+ self, node: "tree_sitter.Node"
600
+ ) -> tuple[str, str, list[str], list[str], list[str]] | None:
601
+ """Parse method signature optimized (from AdvancedAnalyzer)"""
602
+ try:
603
+ # Extract method name
604
+ method_name = None
605
+ for child in node.children:
606
+ if child.type == "identifier":
607
+ method_name = self._get_node_text_optimized(child)
608
+ break
609
+
610
+ if not method_name:
611
+ return None
612
+
613
+ # Extract return type
614
+ return_type = "void"
615
+ for child in node.children:
616
+ if child.type in [
617
+ "type_identifier",
618
+ "void_type",
619
+ "primitive_type",
620
+ "integral_type",
621
+ "boolean_type",
622
+ "floating_point_type",
623
+ "array_type",
624
+ ]:
625
+ return_type = self._get_node_text_optimized(child)
626
+ break
627
+ elif child.type == "generic_type":
628
+ return_type = self._get_node_text_optimized(child)
629
+ break
630
+
631
+ # Extract parameters
632
+ parameters = []
633
+ for child in node.children:
634
+ if child.type == "formal_parameters":
635
+ for param in child.children:
636
+ if param.type == "formal_parameter":
637
+ param_text = self._get_node_text_optimized(param)
638
+ parameters.append(param_text)
639
+
640
+ # Extract modifiers
641
+ modifiers = self._extract_modifiers_optimized(node)
642
+
643
+ # Extract throws clause
644
+ throws = []
645
+ for child in node.children:
646
+ if child.type == "throws":
647
+ throws_text = self._get_node_text_optimized(child)
648
+ exceptions = re.findall(r"\b[A-Z]\w*Exception\b", throws_text)
649
+ throws.extend(exceptions)
650
+
651
+ return method_name, return_type, parameters, modifiers, throws
652
+ except Exception:
653
+ return None
654
+
655
+ def _parse_field_declaration_optimized(
656
+ self, node: "tree_sitter.Node"
657
+ ) -> tuple[str, list[str], list[str]] | None:
658
+ """Parse field declaration optimized (from AdvancedAnalyzer)"""
659
+ try:
660
+ # Extract type (exactly as in AdvancedAnalyzer)
661
+ field_type = None
662
+ for child in node.children:
663
+ if child.type in [
664
+ "type_identifier",
665
+ "primitive_type",
666
+ "integral_type",
667
+ "generic_type",
668
+ "boolean_type",
669
+ "floating_point_type",
670
+ "array_type",
671
+ ]:
672
+ field_type = self._get_node_text_optimized(child)
673
+ break
674
+
675
+ if not field_type:
676
+ return None
677
+
678
+ # Extract variable names (exactly as in AdvancedAnalyzer)
679
+ variable_names = []
680
+ for child in node.children:
681
+ if child.type == "variable_declarator":
682
+ for grandchild in child.children:
683
+ if grandchild.type == "identifier":
684
+ var_name = self._get_node_text_optimized(grandchild)
685
+ variable_names.append(var_name)
686
+
687
+ if not variable_names:
688
+ return None
689
+
690
+ # Extract modifiers (exactly as in AdvancedAnalyzer)
691
+ modifiers = self._extract_modifiers_optimized(node)
692
+
693
+ return field_type, variable_names, modifiers
694
+ except Exception:
695
+ return None
696
+
697
+ def _extract_modifiers_optimized(self, node: "tree_sitter.Node") -> list[str]:
698
+ """Extract modifiers efficiently (from AdvancedAnalyzer)"""
699
+ modifiers = []
700
+ for child in node.children:
701
+ if child.type == "modifiers":
702
+ for mod_child in child.children:
703
+ if mod_child.type in [
704
+ "public",
705
+ "private",
706
+ "protected",
707
+ "static",
708
+ "final",
709
+ "abstract",
710
+ "synchronized",
711
+ "volatile",
712
+ "transient",
713
+ ]:
714
+ modifiers.append(mod_child.type)
715
+ elif mod_child.type not in ["marker_annotation"]:
716
+ mod_text = self._get_node_text_optimized(mod_child)
717
+ if mod_text in [
718
+ "public",
719
+ "private",
720
+ "protected",
721
+ "static",
722
+ "final",
723
+ "abstract",
724
+ "synchronized",
725
+ "volatile",
726
+ "transient",
727
+ ]:
728
+ modifiers.append(mod_text)
729
+ return modifiers
730
+
731
+ def _extract_package_info(self, node: "tree_sitter.Node") -> None:
732
+ """Extract package information (from AdvancedAnalyzer)"""
733
+ try:
734
+ package_text = self._get_node_text_optimized(node)
735
+ match = re.search(r"package\s+([\w.]+)", package_text)
736
+ if match:
737
+ self.current_package = match.group(1)
738
+ except (AttributeError, ValueError, IndexError) as e:
739
+ log_debug(f"Failed to extract package info: {e}")
740
+ except Exception as e:
741
+ log_error(f"Unexpected error in package extraction: {e}")
742
+
743
+ def _extract_package_element(self, node: "tree_sitter.Node") -> Package | None:
744
+ """Extract package element for inclusion in results"""
745
+ try:
746
+ package_text = self._get_node_text_optimized(node)
747
+ match = re.search(r"package\s+([\w.]+)", package_text)
748
+ if match:
749
+ package_name = match.group(1)
750
+ return Package(
751
+ name=package_name,
752
+ start_line=node.start_point[0] + 1,
753
+ end_line=node.end_point[0] + 1,
754
+ raw_text=package_text,
755
+ language="java",
756
+ )
757
+ except (AttributeError, ValueError, IndexError) as e:
758
+ log_debug(f"Failed to extract package element: {e}")
759
+ except Exception as e:
760
+ log_error(f"Unexpected error in package element extraction: {e}")
761
+ return None
762
+
763
+ def _extract_package_from_tree(self, tree: "tree_sitter.Tree") -> None:
764
+ """
765
+ Extract package information from the tree and set current_package.
766
+
767
+ This method ensures that package information is available for class extraction
768
+ regardless of the order in which extraction methods are called.
769
+ """
770
+ try:
771
+ # Look for package declaration in the root node's children
772
+ for child in tree.root_node.children:
773
+ if child.type == "package_declaration":
774
+ self._extract_package_info(child)
775
+ break # Only one package declaration per file
776
+ except Exception as e:
777
+ log_debug(f"Failed to extract package from tree: {e}")
778
+
779
+ def _determine_visibility(self, modifiers: list[str]) -> str:
780
+ """Determine visibility from modifiers"""
781
+ if "public" in modifiers:
782
+ return "public"
783
+ elif "private" in modifiers:
784
+ return "private"
785
+ elif "protected" in modifiers:
786
+ return "protected"
787
+ else:
788
+ return "package" # Default package visibility
789
+
790
+ def _find_annotations_for_line_cached(
791
+ self, target_line: int
792
+ ) -> list[dict[str, Any]]:
793
+ """Find annotations for specified line with caching (from AdvancedAnalyzer)"""
794
+ if target_line in self._annotation_cache:
795
+ return self._annotation_cache[target_line]
796
+
797
+ result_annotations = []
798
+ for annotation in self.annotations:
799
+ line_distance = target_line - annotation.get("end_line", 0)
800
+ if 1 <= line_distance <= 5:
801
+ result_annotations.append(annotation)
802
+
803
+ self._annotation_cache[target_line] = result_annotations
804
+ return result_annotations
805
+
806
+ def _calculate_complexity_optimized(self, node: "tree_sitter.Node") -> int:
807
+ """Calculate cyclomatic complexity efficiently (from AdvancedAnalyzer)"""
808
+ complexity = 1
809
+ try:
810
+ node_text = self._get_node_text_optimized(node).lower()
811
+ keywords = ["if", "while", "for", "catch", "case", "switch"]
812
+ for keyword in keywords:
813
+ complexity += node_text.count(keyword)
814
+ except (AttributeError, TypeError) as e:
815
+ log_debug(f"Failed to calculate complexity: {e}")
816
+ except Exception as e:
817
+ log_error(f"Unexpected error in complexity calculation: {e}")
818
+ return complexity
819
+
820
+ def _extract_javadoc_for_line(self, target_line: int) -> str | None:
821
+ """Extract JavaDoc comment immediately before the specified line"""
822
+ try:
823
+ if not self.content_lines or target_line <= 1:
824
+ return None
825
+
826
+ # Search backwards from target_line
827
+ javadoc_lines = []
828
+ current_line = target_line - 1
829
+
830
+ # Skip empty lines
831
+ while current_line > 0:
832
+ line = self.content_lines[current_line - 1].strip()
833
+ if line:
834
+ break
835
+ current_line -= 1
836
+
837
+ # Check for JavaDoc end
838
+ if current_line > 0:
839
+ line = self.content_lines[current_line - 1].strip()
840
+ if line.endswith("*/"):
841
+ # This might be a JavaDoc comment
842
+ javadoc_lines.append(self.content_lines[current_line - 1])
843
+ current_line -= 1
844
+
845
+ # Collect JavaDoc content
846
+ while current_line > 0:
847
+ line_content = self.content_lines[current_line - 1]
848
+ line_stripped = line_content.strip()
849
+ javadoc_lines.append(line_content)
850
+
851
+ if line_stripped.startswith("/**"):
852
+ # Found the start of JavaDoc
853
+ javadoc_lines.reverse()
854
+ javadoc_text = "\n".join(javadoc_lines)
855
+
856
+ # Clean up the JavaDoc
857
+ return self._clean_javadoc(javadoc_text)
858
+ current_line -= 1
859
+
860
+ return None
861
+
862
+ except Exception as e:
863
+ log_debug(f"Failed to extract JavaDoc: {e}")
864
+ return None
865
+
866
+ def _clean_javadoc(self, javadoc_text: str) -> str:
867
+ """Clean JavaDoc text by removing comment markers"""
868
+ if not javadoc_text:
869
+ return ""
870
+
871
+ lines = javadoc_text.split("\n")
872
+ cleaned_lines = []
873
+
874
+ for line in lines:
875
+ # Remove leading/trailing whitespace
876
+ line = line.strip()
877
+
878
+ # Remove comment markers
879
+ if line.startswith("/**"):
880
+ line = line[3:].strip()
881
+ elif line.startswith("*/"):
882
+ line = line[2:].strip()
883
+ elif line.startswith("*"):
884
+ line = line[1:].strip()
885
+
886
+ if line: # Only add non-empty lines
887
+ cleaned_lines.append(line)
888
+
889
+ return " ".join(cleaned_lines) if cleaned_lines else ""
890
+
891
+ def _is_nested_class(self, node: "tree_sitter.Node") -> bool:
892
+ """Check if this is a nested class (from AdvancedAnalyzer)"""
893
+ current = node.parent
894
+ while current:
895
+ if current.type in [
896
+ "class_declaration",
897
+ "interface_declaration",
898
+ "enum_declaration",
899
+ ]:
900
+ return True
901
+ current = current.parent
902
+ return False
903
+
904
+ def _find_parent_class(self, node: "tree_sitter.Node") -> str | None:
905
+ """Find parent class name (from AdvancedAnalyzer)"""
906
+ current = node.parent
907
+ while current:
908
+ if current.type in [
909
+ "class_declaration",
910
+ "interface_declaration",
911
+ "enum_declaration",
912
+ ]:
913
+ return self._extract_class_name(current)
914
+ current = current.parent
915
+ return None
916
+
917
+ def _extract_class_name(self, node: "tree_sitter.Node") -> str | None:
918
+ """Extract class name from node (from AdvancedAnalyzer)"""
919
+ for child in node.children:
920
+ if child.type == "identifier":
921
+ return self._get_node_text_optimized(child)
922
+ return None
923
+
924
+ def _extract_annotation_optimized(
925
+ self, node: "tree_sitter.Node"
926
+ ) -> dict[str, Any] | None:
927
+ """Extract annotation information optimized (from AdvancedAnalyzer)"""
928
+ try:
929
+ start_line = node.start_point[0] + 1
930
+ end_line = node.end_point[0] + 1
931
+ raw_text = self._get_node_text_optimized(node)
932
+
933
+ # Extract annotation name efficiently
934
+ name_match = re.search(r"@(\w+)", raw_text)
935
+ if not name_match:
936
+ return None
937
+
938
+ annotation_name = name_match.group(1)
939
+
940
+ # Extract parameters efficiently
941
+ parameters = []
942
+ param_match = re.search(r"\((.*?)\)", raw_text, re.DOTALL)
943
+ if param_match:
944
+ param_text = param_match.group(1).strip()
945
+ if param_text:
946
+ # Simple parameter parsing
947
+ if "=" in param_text:
948
+ parameters = [
949
+ p.strip() for p in re.split(r",(?![^()]*\))", param_text)
950
+ ]
951
+ else:
952
+ parameters = [param_text]
953
+
954
+ return {
955
+ "name": annotation_name,
956
+ "parameters": parameters,
957
+ "start_line": start_line,
958
+ "end_line": end_line,
959
+ "raw_text": raw_text,
960
+ }
961
+ except (AttributeError, IndexError, ValueError) as e:
962
+ log_debug(f"Failed to extract annotation from node: {e}")
963
+ return None
964
+ except Exception as e:
965
+ log_error(f"Unexpected exception in annotation extraction: {e}")
966
+ return None
967
+
968
+ def _extract_import_info(
969
+ self, node: "tree_sitter.Node", source_code: str
970
+ ) -> Import | None:
971
+ """Extract import information (from AdvancedAnalyzer)"""
972
+ try:
973
+ import_text = self._get_node_text_optimized(node)
974
+ # Simple approach: get everything until semicolon then process
975
+ import_content = import_text.strip()
976
+ if import_content.endswith(";"):
977
+ import_content = import_content[:-1]
978
+
979
+ if "static" in import_content:
980
+ # Static import
981
+ static_match = re.search(r"import\s+static\s+([\w.]+)", import_content)
982
+ if static_match:
983
+ import_name = static_match.group(1)
984
+ # Handle wildcard case
985
+ if import_content.endswith(".*"):
986
+ import_name = import_name.replace(".*", "")
987
+ # For static wildcard, remove last element
988
+ parts = import_name.split(".")
989
+ if len(parts) > 1:
990
+ import_name = ".".join(parts[:-1])
991
+
992
+ return Import(
993
+ name=import_name,
994
+ start_line=node.start_point[0] + 1,
995
+ end_line=node.end_point[0] + 1,
996
+ raw_text=import_text,
997
+ language="java",
998
+ module_name=import_name,
999
+ is_static=True,
1000
+ is_wildcard=import_content.endswith(".*"),
1001
+ import_statement=import_content,
1002
+ )
1003
+ else:
1004
+ # Normal import
1005
+ normal_match = re.search(r"import\s+([\w.]+)", import_content)
1006
+ if normal_match:
1007
+ import_name = normal_match.group(1)
1008
+ # Handle wildcard case
1009
+ if import_content.endswith(".*"):
1010
+ if import_name.endswith(".*"):
1011
+ import_name = import_name[:-2] # Remove trailing .*
1012
+ elif import_name.endswith("."):
1013
+ import_name = import_name[:-1] # Remove trailing .
1014
+
1015
+ return Import(
1016
+ name=import_name,
1017
+ start_line=node.start_point[0] + 1,
1018
+ end_line=node.end_point[0] + 1,
1019
+ raw_text=import_text,
1020
+ language="java",
1021
+ module_name=import_name,
1022
+ is_static=False,
1023
+ is_wildcard=import_content.endswith(".*"),
1024
+ import_statement=import_content,
1025
+ )
1026
+ except (AttributeError, ValueError, IndexError) as e:
1027
+ log_debug(f"Failed to extract import info: {e}")
1028
+ except Exception as e:
1029
+ log_error(f"Unexpected error in import extraction: {e}")
1030
+ return None
1031
+
1032
+
1033
+ class JavaPlugin(LanguagePlugin):
1034
+ """Java language plugin for the new architecture"""
1035
+
1036
+ def __init__(self) -> None:
1037
+ """Initialize the Java plugin"""
1038
+ super().__init__()
1039
+ self._language_cache: tree_sitter.Language | None = None
1040
+
1041
+ def get_language_name(self) -> str:
1042
+ """Return the name of the programming language this plugin supports"""
1043
+ return "java"
1044
+
1045
+ def get_file_extensions(self) -> list[str]:
1046
+ """Return list of file extensions this plugin supports"""
1047
+ return [".java", ".jsp", ".jspx"]
1048
+
1049
+ def create_extractor(self) -> ElementExtractor:
1050
+ """Create and return an element extractor for this language"""
1051
+ return JavaElementExtractor()
1052
+
1053
+ def get_tree_sitter_language(self) -> Optional["tree_sitter.Language"]:
1054
+ """Get the Tree-sitter language object for Java"""
1055
+ if self._language_cache is None:
1056
+ try:
1057
+ import tree_sitter_java as tsjava
1058
+
1059
+ self._language_cache = tsjava.language() # type: ignore
1060
+ except ImportError:
1061
+ log_error("tree-sitter-java not available")
1062
+ return None
1063
+ except Exception as e:
1064
+ log_error(f"Failed to load Java language: {e}")
1065
+ return None
1066
+ return self._language_cache
1067
+
1068
+ def get_supported_queries(self) -> list[str]:
1069
+ """Get list of supported query names for this language"""
1070
+ return ["class", "method", "field", "import"]
1071
+
1072
+ def is_applicable(self, file_path: str) -> bool:
1073
+ """Check if this plugin is applicable for the given file"""
1074
+ return any(
1075
+ file_path.lower().endswith(ext.lower())
1076
+ for ext in self.get_file_extensions()
1077
+ )
1078
+
1079
+ def get_plugin_info(self) -> dict:
1080
+ """Get information about this plugin"""
1081
+ return {
1082
+ "name": "Java Plugin",
1083
+ "language": self.get_language_name(),
1084
+ "extensions": self.get_file_extensions(),
1085
+ "version": "2.0.0",
1086
+ "supported_queries": self.get_supported_queries(),
1087
+ }
1088
+
1089
+ async def analyze_file(
1090
+ self, file_path: str, request: "AnalysisRequest"
1091
+ ) -> "AnalysisResult":
1092
+ """
1093
+ Analyze a Java file and return analysis results.
1094
+
1095
+ Args:
1096
+ file_path: Path to the Java file to analyze
1097
+ request: Analysis request object
1098
+
1099
+ Returns:
1100
+ AnalysisResult object containing the analysis results
1101
+ """
1102
+ try:
1103
+ from ..core.parser import Parser
1104
+ from ..models import AnalysisResult
1105
+
1106
+ log_debug(f"Java Plugin: Starting analysis of {file_path}")
1107
+
1108
+ # Read file content
1109
+ with open(file_path, encoding="utf-8") as f:
1110
+ source_code = f.read()
1111
+
1112
+ log_debug(f"Java Plugin: Read {len(source_code)} characters from file")
1113
+
1114
+ # Parse the file
1115
+ parser = Parser()
1116
+ parse_result = parser.parse_code(source_code, "java")
1117
+
1118
+ log_debug(f"Java Plugin: Parse result success: {parse_result.success}")
1119
+
1120
+ if not parse_result.success:
1121
+ log_error(f"Java Plugin: Parse failed: {parse_result.error_message}")
1122
+ return AnalysisResult(
1123
+ file_path=file_path,
1124
+ language="java",
1125
+ line_count=len(source_code.splitlines()),
1126
+ elements=[],
1127
+ node_count=0,
1128
+ query_results={},
1129
+ source_code=source_code,
1130
+ success=False,
1131
+ error_message=parse_result.error_message,
1132
+ )
1133
+
1134
+ # Extract elements
1135
+ extractor = self.create_extractor()
1136
+
1137
+ if parse_result.tree:
1138
+ log_debug("Java Plugin: Extracting packages...")
1139
+ packages = extractor.extract_packages(parse_result.tree, source_code)
1140
+ log_debug(f"Java Plugin: Found {len(packages)} packages")
1141
+
1142
+ log_debug("Java Plugin: Extracting functions...")
1143
+ functions = extractor.extract_functions(parse_result.tree, source_code)
1144
+ log_debug(f"Java Plugin: Found {len(functions)} functions")
1145
+
1146
+ log_debug("Java Plugin: Extracting classes...")
1147
+ classes = extractor.extract_classes(parse_result.tree, source_code)
1148
+ log_debug(f"Java Plugin: Found {len(classes)} classes")
1149
+
1150
+ log_debug("Java Plugin: Extracting variables...")
1151
+ variables = extractor.extract_variables(parse_result.tree, source_code)
1152
+ log_debug(f"Java Plugin: Found {len(variables)} variables")
1153
+
1154
+ log_debug("Java Plugin: Extracting imports...")
1155
+ imports = extractor.extract_imports(parse_result.tree, source_code)
1156
+ log_debug(f"Java Plugin: Found {len(imports)} imports")
1157
+ else:
1158
+ packages = []
1159
+ functions = []
1160
+ classes = []
1161
+ variables = []
1162
+ imports = []
1163
+
1164
+ # Combine all elements
1165
+ all_elements: list[CodeElement] = []
1166
+ all_elements.extend(packages)
1167
+ all_elements.extend(functions)
1168
+ all_elements.extend(classes)
1169
+ all_elements.extend(variables)
1170
+ all_elements.extend(imports)
1171
+ log_debug(f"Java Plugin: Total elements: {len(all_elements)}")
1172
+
1173
+ return AnalysisResult(
1174
+ file_path=file_path,
1175
+ language="java",
1176
+ line_count=len(source_code.splitlines()),
1177
+ elements=all_elements,
1178
+ node_count=(
1179
+ parse_result.tree.root_node.child_count if parse_result.tree else 0
1180
+ ),
1181
+ query_results={},
1182
+ source_code=source_code,
1183
+ success=True,
1184
+ error_message=None,
1185
+ )
1186
+
1187
+ except Exception as e:
1188
+ log_error(f"Failed to analyze Java file {file_path}: {e}")
1189
+ import traceback
1190
+
1191
+ log_error(f"Java Plugin traceback: {traceback.format_exc()}")
1192
+ return AnalysisResult(
1193
+ file_path=file_path,
1194
+ language="java",
1195
+ line_count=0,
1196
+ elements=[],
1197
+ node_count=0,
1198
+ query_results={},
1199
+ source_code="",
1200
+ success=False,
1201
+ error_message=str(e),
1202
+ )