tree-sitter-analyzer 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (78) hide show
  1. tree_sitter_analyzer/__init__.py +121 -0
  2. tree_sitter_analyzer/__main__.py +12 -0
  3. tree_sitter_analyzer/api.py +539 -0
  4. tree_sitter_analyzer/cli/__init__.py +39 -0
  5. tree_sitter_analyzer/cli/__main__.py +13 -0
  6. tree_sitter_analyzer/cli/commands/__init__.py +27 -0
  7. tree_sitter_analyzer/cli/commands/advanced_command.py +88 -0
  8. tree_sitter_analyzer/cli/commands/base_command.py +155 -0
  9. tree_sitter_analyzer/cli/commands/default_command.py +19 -0
  10. tree_sitter_analyzer/cli/commands/partial_read_command.py +133 -0
  11. tree_sitter_analyzer/cli/commands/query_command.py +82 -0
  12. tree_sitter_analyzer/cli/commands/structure_command.py +121 -0
  13. tree_sitter_analyzer/cli/commands/summary_command.py +93 -0
  14. tree_sitter_analyzer/cli/commands/table_command.py +233 -0
  15. tree_sitter_analyzer/cli/info_commands.py +121 -0
  16. tree_sitter_analyzer/cli_main.py +276 -0
  17. tree_sitter_analyzer/core/__init__.py +20 -0
  18. tree_sitter_analyzer/core/analysis_engine.py +574 -0
  19. tree_sitter_analyzer/core/cache_service.py +330 -0
  20. tree_sitter_analyzer/core/engine.py +560 -0
  21. tree_sitter_analyzer/core/parser.py +288 -0
  22. tree_sitter_analyzer/core/query.py +502 -0
  23. tree_sitter_analyzer/encoding_utils.py +460 -0
  24. tree_sitter_analyzer/exceptions.py +340 -0
  25. tree_sitter_analyzer/file_handler.py +222 -0
  26. tree_sitter_analyzer/formatters/__init__.py +1 -0
  27. tree_sitter_analyzer/formatters/base_formatter.py +168 -0
  28. tree_sitter_analyzer/formatters/formatter_factory.py +74 -0
  29. tree_sitter_analyzer/formatters/java_formatter.py +270 -0
  30. tree_sitter_analyzer/formatters/python_formatter.py +235 -0
  31. tree_sitter_analyzer/interfaces/__init__.py +10 -0
  32. tree_sitter_analyzer/interfaces/cli.py +557 -0
  33. tree_sitter_analyzer/interfaces/cli_adapter.py +319 -0
  34. tree_sitter_analyzer/interfaces/mcp_adapter.py +170 -0
  35. tree_sitter_analyzer/interfaces/mcp_server.py +416 -0
  36. tree_sitter_analyzer/java_analyzer.py +219 -0
  37. tree_sitter_analyzer/language_detector.py +400 -0
  38. tree_sitter_analyzer/language_loader.py +228 -0
  39. tree_sitter_analyzer/languages/__init__.py +11 -0
  40. tree_sitter_analyzer/languages/java_plugin.py +1113 -0
  41. tree_sitter_analyzer/languages/python_plugin.py +712 -0
  42. tree_sitter_analyzer/mcp/__init__.py +32 -0
  43. tree_sitter_analyzer/mcp/resources/__init__.py +47 -0
  44. tree_sitter_analyzer/mcp/resources/code_file_resource.py +213 -0
  45. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +550 -0
  46. tree_sitter_analyzer/mcp/server.py +319 -0
  47. tree_sitter_analyzer/mcp/tools/__init__.py +36 -0
  48. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +558 -0
  49. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +245 -0
  50. tree_sitter_analyzer/mcp/tools/base_tool.py +55 -0
  51. tree_sitter_analyzer/mcp/tools/get_positions_tool.py +448 -0
  52. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +302 -0
  53. tree_sitter_analyzer/mcp/tools/table_format_tool.py +359 -0
  54. tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +476 -0
  55. tree_sitter_analyzer/mcp/utils/__init__.py +106 -0
  56. tree_sitter_analyzer/mcp/utils/error_handler.py +549 -0
  57. tree_sitter_analyzer/models.py +481 -0
  58. tree_sitter_analyzer/output_manager.py +264 -0
  59. tree_sitter_analyzer/plugins/__init__.py +334 -0
  60. tree_sitter_analyzer/plugins/base.py +446 -0
  61. tree_sitter_analyzer/plugins/java_plugin.py +625 -0
  62. tree_sitter_analyzer/plugins/javascript_plugin.py +439 -0
  63. tree_sitter_analyzer/plugins/manager.py +355 -0
  64. tree_sitter_analyzer/plugins/plugin_loader.py +83 -0
  65. tree_sitter_analyzer/plugins/python_plugin.py +598 -0
  66. tree_sitter_analyzer/plugins/registry.py +366 -0
  67. tree_sitter_analyzer/queries/__init__.py +27 -0
  68. tree_sitter_analyzer/queries/java.py +394 -0
  69. tree_sitter_analyzer/queries/javascript.py +149 -0
  70. tree_sitter_analyzer/queries/python.py +286 -0
  71. tree_sitter_analyzer/queries/typescript.py +230 -0
  72. tree_sitter_analyzer/query_loader.py +260 -0
  73. tree_sitter_analyzer/table_formatter.py +448 -0
  74. tree_sitter_analyzer/utils.py +201 -0
  75. tree_sitter_analyzer-0.1.0.dist-info/METADATA +581 -0
  76. tree_sitter_analyzer-0.1.0.dist-info/RECORD +78 -0
  77. tree_sitter_analyzer-0.1.0.dist-info/WHEEL +4 -0
  78. tree_sitter_analyzer-0.1.0.dist-info/entry_points.txt +8 -0
@@ -0,0 +1,1113 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Java Language Plugin
5
+
6
+ Provides Java-specific parsing and element extraction functionality.
7
+ Migrated from AdvancedAnalyzer implementation for future independence.
8
+ """
9
+
10
+ import re
11
+ from functools import lru_cache
12
+ from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Any
13
+
14
+ if TYPE_CHECKING:
15
+ import tree_sitter
16
+ from ..models import AnalysisRequest, AnalysisResult
17
+
18
+ from ..plugins.base import LanguagePlugin, ElementExtractor
19
+ from ..models import (
20
+ Class,
21
+ Function,
22
+ Import,
23
+ Variable,
24
+ Package,
25
+ )
26
+ from ..utils import log_debug, log_error, log_warning
27
+ from ..encoding_utils import extract_text_slice, safe_encode
28
+
29
+
30
+ class JavaElementExtractor(ElementExtractor):
31
+ """Java-specific element extractor with AdvancedAnalyzer implementation"""
32
+
33
+ def __init__(self) -> None:
34
+ """Initialize the Java element extractor."""
35
+ self.current_package: str = ""
36
+ self.current_file: str = ""
37
+ self.source_code: str = ""
38
+ self.content_lines: List[str] = []
39
+ self.imports: List[str] = []
40
+
41
+ # Performance optimization caches (from AdvancedAnalyzer)
42
+ self._node_text_cache: Dict[int, str] = {}
43
+ self._processed_nodes: Set[int] = set()
44
+ self._element_cache: Dict[Tuple[int, str], Any] = {}
45
+ self._file_encoding: Optional[str] = None
46
+ self._annotation_cache: Dict[int, List[Dict[str, Any]]] = {}
47
+ self._signature_cache: Dict[int, str] = {}
48
+
49
+ # Extracted annotations for cross-referencing
50
+ self.annotations: List[Dict[str, Any]] = []
51
+
52
+ def extract_annotations(
53
+ self, tree: "tree_sitter.Tree", source_code: str
54
+ ) -> List[Dict[str, Any]]:
55
+ """Extract Java annotations using AdvancedAnalyzer implementation"""
56
+ self.source_code = source_code
57
+ self.content_lines = source_code.split("\n")
58
+ self._reset_caches()
59
+
60
+ annotations: List[Dict[str, Any]] = []
61
+
62
+ # Use AdvancedAnalyzer's optimized traversal for annotations
63
+ extractors = {
64
+ "annotation": self._extract_annotation_optimized,
65
+ "marker_annotation": self._extract_annotation_optimized,
66
+ }
67
+
68
+ self._traverse_and_extract_iterative(tree.root_node, extractors, annotations, "annotation")
69
+
70
+ # Store annotations for cross-referencing
71
+ self.annotations = annotations
72
+
73
+ log_debug(f"Extracted {len(annotations)} annotations")
74
+ return annotations
75
+
76
+ def extract_functions(
77
+ self, tree: "tree_sitter.Tree", source_code: str
78
+ ) -> List[Function]:
79
+ """Extract Java method definitions using AdvancedAnalyzer implementation"""
80
+ self.source_code = source_code
81
+ self.content_lines = source_code.split("\n")
82
+ self._reset_caches()
83
+
84
+ functions: List[Function] = []
85
+
86
+ # Use AdvancedAnalyzer's optimized traversal
87
+ extractors = {
88
+ "method_declaration": self._extract_method_optimized,
89
+ "constructor_declaration": self._extract_method_optimized,
90
+ }
91
+
92
+ self._traverse_and_extract_iterative(tree.root_node, extractors, functions, "method")
93
+
94
+ log_debug(f"Extracted {len(functions)} methods")
95
+ return functions
96
+
97
+ def extract_classes(
98
+ self, tree: "tree_sitter.Tree", source_code: str
99
+ ) -> List[Class]:
100
+ """Extract Java class definitions using AdvancedAnalyzer implementation"""
101
+ self.source_code = source_code
102
+ self.content_lines = source_code.split("\n")
103
+ self._reset_caches()
104
+
105
+ classes: List[Class] = []
106
+
107
+ # Use AdvancedAnalyzer's optimized traversal
108
+ extractors = {
109
+ "class_declaration": self._extract_class_optimized,
110
+ "interface_declaration": self._extract_class_optimized,
111
+ "enum_declaration": self._extract_class_optimized,
112
+ }
113
+
114
+ self._traverse_and_extract_iterative(tree.root_node, extractors, classes, "class")
115
+
116
+ log_debug(f"Extracted {len(classes)} classes")
117
+ return classes
118
+
119
+ def extract_variables(
120
+ self, tree: "tree_sitter.Tree", source_code: str
121
+ ) -> List[Variable]:
122
+ """Extract Java field definitions using AdvancedAnalyzer implementation"""
123
+ self.source_code = source_code
124
+ self.content_lines = source_code.split("\n")
125
+ self._reset_caches()
126
+
127
+ variables: List[Variable] = []
128
+
129
+ # Use AdvancedAnalyzer's optimized traversal
130
+ extractors = {
131
+ "field_declaration": self._extract_field_optimized,
132
+ }
133
+
134
+ log_debug("Starting field extraction with iterative traversal")
135
+ self._traverse_and_extract_iterative(tree.root_node, extractors, variables, "field")
136
+
137
+ log_debug(f"Extracted {len(variables)} fields")
138
+ for i, var in enumerate(variables[:3]):
139
+ log_debug(f"Field {i}: {var.name} ({var.variable_type})")
140
+ return variables
141
+
142
+ def extract_imports(
143
+ self, tree: "tree_sitter.Tree", source_code: str
144
+ ) -> List[Import]:
145
+ """Extract Java import statements"""
146
+ self.source_code = source_code
147
+ self.content_lines = source_code.split("\n")
148
+
149
+ imports: List[Import] = []
150
+
151
+ # Extract package and imports efficiently (from AdvancedAnalyzer)
152
+ for child in tree.root_node.children:
153
+ if child.type == "package_declaration":
154
+ self._extract_package_info(child)
155
+ elif child.type == "import_declaration":
156
+ import_info = self._extract_import_info(child, source_code)
157
+ if import_info:
158
+ imports.append(import_info)
159
+ elif child.type in [
160
+ "class_declaration",
161
+ "interface_declaration",
162
+ "enum_declaration",
163
+ ]:
164
+ # After package and imports come class declarations, so stop
165
+ break
166
+
167
+ log_debug(f"Extracted {len(imports)} imports")
168
+ return imports
169
+
170
+ def extract_packages(
171
+ self, tree: "tree_sitter.Tree", source_code: str
172
+ ) -> List[Package]:
173
+ """Extract Java package declarations"""
174
+ self.source_code = source_code
175
+ self.content_lines = source_code.split("\n")
176
+
177
+ packages: List[Package] = []
178
+
179
+ # Extract package declaration
180
+ for child in tree.root_node.children:
181
+ if child.type == "package_declaration":
182
+ package_info = self._extract_package_element(child)
183
+ if package_info:
184
+ packages.append(package_info)
185
+ break # Only one package declaration per file
186
+
187
+ log_debug(f"Extracted {len(packages)} packages")
188
+ return packages
189
+
190
+ def _reset_caches(self) -> None:
191
+ """Reset performance caches"""
192
+ self._node_text_cache.clear()
193
+ self._processed_nodes.clear()
194
+ self._element_cache.clear()
195
+ self._annotation_cache.clear()
196
+ self._signature_cache.clear()
197
+ self.annotations.clear()
198
+
199
+ def _traverse_and_extract_iterative(
200
+ self,
201
+ root_node: "tree_sitter.Node",
202
+ extractors: Dict[str, Any],
203
+ results: List[Any],
204
+ element_type: str
205
+ ) -> None:
206
+ """
207
+ Iterative node traversal and extraction (from AdvancedAnalyzer)
208
+ Uses batch processing for optimal performance
209
+ """
210
+ if not root_node:
211
+ return
212
+
213
+ # Target node types for extraction
214
+ target_node_types = set(extractors.keys())
215
+
216
+ # Container node types that may contain target nodes (from AdvancedAnalyzer)
217
+ container_node_types = {
218
+ "program",
219
+ "class_body",
220
+ "interface_body",
221
+ "enum_body",
222
+ "class_declaration",
223
+ "interface_declaration",
224
+ "enum_declaration",
225
+ "method_declaration",
226
+ "constructor_declaration",
227
+ "block",
228
+ "modifiers", # アノテーションは修飾子に含まれることがある
229
+ }
230
+
231
+ # Iterative DFS stack: (node, depth)
232
+ node_stack = [(root_node, 0)]
233
+ processed_nodes = 0
234
+ max_depth = 50 # Prevent infinite loops
235
+
236
+ # Batch processing containers (from AdvancedAnalyzer)
237
+ field_batch = []
238
+
239
+ while node_stack:
240
+ current_node, depth = node_stack.pop()
241
+
242
+ # Safety check for maximum depth
243
+ if depth > max_depth:
244
+ log_warning(f"Maximum traversal depth ({max_depth}) exceeded")
245
+ continue
246
+
247
+ processed_nodes += 1
248
+ node_type = current_node.type
249
+
250
+ # Early termination: skip nodes that don't contain target elements
251
+ if (
252
+ depth > 0
253
+ and node_type not in target_node_types
254
+ and node_type not in container_node_types
255
+ ):
256
+ continue
257
+
258
+ # Collect target nodes for batch processing (from AdvancedAnalyzer)
259
+ if node_type in target_node_types:
260
+ if element_type == "field" and node_type == "field_declaration":
261
+ field_batch.append(current_node)
262
+ else:
263
+ # Process non-field elements immediately
264
+ node_id = id(current_node)
265
+
266
+ # Skip if already processed
267
+ if node_id in self._processed_nodes:
268
+ continue
269
+
270
+ # Check element cache first
271
+ cache_key = (node_id, element_type)
272
+ if cache_key in self._element_cache:
273
+ element = self._element_cache[cache_key]
274
+ if element:
275
+ if isinstance(element, list):
276
+ results.extend(element)
277
+ else:
278
+ results.append(element)
279
+ self._processed_nodes.add(node_id)
280
+ continue
281
+
282
+ # Extract and cache
283
+ extractor = extractors.get(node_type)
284
+ if extractor:
285
+ element = extractor(current_node)
286
+ self._element_cache[cache_key] = element
287
+ if element:
288
+ if isinstance(element, list):
289
+ results.extend(element)
290
+ else:
291
+ results.append(element)
292
+ self._processed_nodes.add(node_id)
293
+
294
+ # Add children to stack (reversed for correct DFS traversal)
295
+ if current_node.children:
296
+ for child in reversed(current_node.children):
297
+ node_stack.append((child, depth + 1))
298
+
299
+ # Process field batch when it reaches optimal size (from AdvancedAnalyzer)
300
+ if len(field_batch) >= 10:
301
+ self._process_field_batch(field_batch, extractors, results)
302
+ field_batch.clear()
303
+
304
+ # Process remaining field batch (from AdvancedAnalyzer)
305
+ if field_batch:
306
+ self._process_field_batch(field_batch, extractors, results)
307
+
308
+ log_debug(f"Iterative traversal processed {processed_nodes} nodes")
309
+
310
+ def _process_field_batch(self, batch: List["tree_sitter.Node"], extractors: Dict, results: List[Any]) -> None:
311
+ """Process field nodes with caching (from AdvancedAnalyzer)"""
312
+ for node in batch:
313
+ node_id = id(node)
314
+
315
+ # Skip if already processed
316
+ if node_id in self._processed_nodes:
317
+ continue
318
+
319
+ # Check element cache first
320
+ cache_key = (node_id, "field")
321
+ if cache_key in self._element_cache:
322
+ elements = self._element_cache[cache_key]
323
+ if elements:
324
+ if isinstance(elements, list):
325
+ results.extend(elements)
326
+ else:
327
+ results.append(elements)
328
+ self._processed_nodes.add(node_id)
329
+ continue
330
+
331
+ # Extract and cache
332
+ extractor = extractors.get(node.type)
333
+ if extractor:
334
+ elements = extractor(node)
335
+ self._element_cache[cache_key] = elements
336
+ if elements:
337
+ if isinstance(elements, list):
338
+ results.extend(elements)
339
+ else:
340
+ results.append(elements)
341
+ self._processed_nodes.add(node_id)
342
+
343
+ def _get_node_text_optimized(self, node: "tree_sitter.Node") -> str:
344
+ """Get node text with optimized caching (from AdvancedAnalyzer)"""
345
+ node_id = id(node)
346
+
347
+ # Check cache first
348
+ if node_id in self._node_text_cache:
349
+ return self._node_text_cache[node_id]
350
+
351
+ try:
352
+ # Use encoding utilities for text extraction
353
+ start_byte = node.start_byte
354
+ end_byte = node.end_byte
355
+
356
+ encoding = self._file_encoding or "utf-8"
357
+ content_bytes = safe_encode("\n".join(self.content_lines), encoding)
358
+ text = extract_text_slice(content_bytes, start_byte, end_byte, encoding)
359
+
360
+ self._node_text_cache[node_id] = text
361
+ return text
362
+ except Exception as e:
363
+ log_error(f"Error in _get_node_text_optimized: {e}")
364
+ # Fallback to simple text extraction
365
+ try:
366
+ start_point = node.start_point
367
+ end_point = node.end_point
368
+
369
+ if start_point[0] == end_point[0]:
370
+ # Single line
371
+ line = self.content_lines[start_point[0]]
372
+ return line[start_point[1] : end_point[1]]
373
+ else:
374
+ # Multiple lines
375
+ lines = []
376
+ for i in range(start_point[0], end_point[0] + 1):
377
+ if i < len(self.content_lines):
378
+ line = self.content_lines[i]
379
+ if i == start_point[0]:
380
+ lines.append(line[start_point[1] :])
381
+ elif i == end_point[0]:
382
+ lines.append(line[: end_point[1]])
383
+ else:
384
+ lines.append(line)
385
+ return "\n".join(lines)
386
+ except Exception as fallback_error:
387
+ log_error(f"Fallback text extraction also failed: {fallback_error}")
388
+ return ""
389
+
390
+ def _extract_class_optimized(self, node: "tree_sitter.Node") -> Optional[Class]:
391
+ """Extract class information optimized (from AdvancedAnalyzer)"""
392
+ try:
393
+ start_line = node.start_point[0] + 1
394
+ end_line = node.end_point[0] + 1
395
+
396
+ # Extract class name efficiently
397
+ class_name = None
398
+ for child in node.children:
399
+ if child.type == "identifier":
400
+ class_name = self._get_node_text_optimized(child)
401
+ break
402
+
403
+ if not class_name:
404
+ return None
405
+
406
+ # Determine package name
407
+ package_name = self.current_package
408
+ full_qualified_name = (
409
+ f"{package_name}.{class_name}" if package_name else class_name
410
+ )
411
+
412
+ # Determine class type (optimized: dictionary lookup)
413
+ class_type_map = {
414
+ "class_declaration": "class",
415
+ "interface_declaration": "interface",
416
+ "enum_declaration": "enum",
417
+ }
418
+ class_type = class_type_map.get(node.type, "class")
419
+
420
+ # Extract modifiers efficiently
421
+ modifiers = self._extract_modifiers_optimized(node)
422
+ visibility = self._determine_visibility(modifiers)
423
+
424
+ # Extract superclass and interfaces (optimized: single pass)
425
+ extends_class = None
426
+ implements_interfaces = []
427
+
428
+ for child in node.children:
429
+ if child.type == "superclass":
430
+ extends_text = self._get_node_text_optimized(child)
431
+ match = re.search(r"\b[A-Z]\w*", extends_text)
432
+ if match:
433
+ extends_class = match.group(0)
434
+ elif child.type == "super_interfaces":
435
+ implements_text = self._get_node_text_optimized(child)
436
+ implements_interfaces = re.findall(r"\b[A-Z]\w*", implements_text)
437
+
438
+ # Extract annotations for this class
439
+ class_annotations = self._find_annotations_for_line_cached(start_line)
440
+
441
+ # Check if this is a nested class
442
+ is_nested = self._is_nested_class(node)
443
+ parent_class = self._find_parent_class(node) if is_nested else None
444
+
445
+ # Extract raw text
446
+ start_line_idx = max(0, start_line - 1)
447
+ end_line_idx = min(len(self.content_lines), end_line)
448
+ raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
449
+
450
+ return Class(
451
+ name=class_name,
452
+ start_line=start_line,
453
+ end_line=end_line,
454
+ raw_text=raw_text,
455
+ language="java",
456
+ class_type=class_type,
457
+ full_qualified_name=full_qualified_name,
458
+ package_name=package_name,
459
+ superclass=extends_class,
460
+ interfaces=implements_interfaces,
461
+ modifiers=modifiers,
462
+ visibility=visibility,
463
+ # Java-specific detailed information
464
+ annotations=class_annotations,
465
+ is_nested=is_nested,
466
+ parent_class=parent_class,
467
+ extends_class=extends_class, # Alias for superclass
468
+ implements_interfaces=implements_interfaces, # Alias for interfaces
469
+ )
470
+ except (AttributeError, ValueError, TypeError) as e:
471
+ log_debug(f"Failed to extract class info: {e}")
472
+ return None
473
+ except Exception as e:
474
+ log_error(f"Unexpected error in class extraction: {e}")
475
+ return None
476
+
477
+ def _extract_method_optimized(self, node: "tree_sitter.Node") -> Optional[Function]:
478
+ """Extract method information optimized (from AdvancedAnalyzer)"""
479
+ try:
480
+ start_line = node.start_point[0] + 1
481
+ end_line = node.end_point[0] + 1
482
+
483
+ # Extract method information efficiently
484
+ method_info = self._parse_method_signature_optimized(node)
485
+ if not method_info:
486
+ return None
487
+
488
+ method_name, return_type, parameters, modifiers, throws = method_info
489
+ is_constructor = node.type == "constructor_declaration"
490
+ visibility = self._determine_visibility(modifiers)
491
+
492
+ # Extract annotations for this method
493
+ method_annotations = self._find_annotations_for_line_cached(start_line)
494
+
495
+ # Calculate complexity score
496
+ complexity_score = self._calculate_complexity_optimized(node)
497
+
498
+ # Extract JavaDoc
499
+ javadoc = self._extract_javadoc_for_line(start_line)
500
+
501
+ # Extract raw text
502
+ start_line_idx = max(0, start_line - 1)
503
+ end_line_idx = min(len(self.content_lines), end_line)
504
+ raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
505
+
506
+ return Function(
507
+ name=method_name,
508
+ start_line=start_line,
509
+ end_line=end_line,
510
+ raw_text=raw_text,
511
+ language="java",
512
+ parameters=parameters,
513
+ return_type=return_type if not is_constructor else "void",
514
+ modifiers=modifiers,
515
+ is_static="static" in modifiers,
516
+ is_private="private" in modifiers,
517
+ is_public="public" in modifiers,
518
+ is_constructor=is_constructor,
519
+ visibility=visibility,
520
+ docstring=javadoc,
521
+ # Java-specific detailed information
522
+ annotations=method_annotations,
523
+ throws=throws,
524
+ complexity_score=complexity_score,
525
+ is_abstract="abstract" in modifiers,
526
+ is_final="final" in modifiers,
527
+ )
528
+ except (AttributeError, ValueError, TypeError) as e:
529
+ log_debug(f"Failed to extract method info: {e}")
530
+ return None
531
+ except Exception as e:
532
+ log_error(f"Unexpected error in method extraction: {e}")
533
+ return None
534
+
535
+ def _extract_field_optimized(self, node: "tree_sitter.Node") -> List[Variable]:
536
+ """Extract field information optimized (from AdvancedAnalyzer)"""
537
+ fields: List[Variable] = []
538
+ try:
539
+ start_line = node.start_point[0] + 1
540
+ end_line = node.end_point[0] + 1
541
+
542
+ # Parse field declaration using AdvancedAnalyzer method
543
+ field_info = self._parse_field_declaration_optimized(node)
544
+ if not field_info:
545
+ return fields
546
+
547
+ field_type, variable_names, modifiers = field_info
548
+ visibility = self._determine_visibility(modifiers)
549
+
550
+ # Extract annotations for this field
551
+ field_annotations = self._find_annotations_for_line_cached(start_line)
552
+
553
+ # Extract JavaDoc for this field
554
+ field_javadoc = self._extract_javadoc_for_line(start_line)
555
+
556
+ # Create Variable object for each variable (matching AdvancedAnalyzer structure)
557
+ for var_name in variable_names:
558
+ # Extract raw text
559
+ start_line_idx = max(0, start_line - 1)
560
+ end_line_idx = min(len(self.content_lines), end_line)
561
+ raw_text = "\n".join(self.content_lines[start_line_idx:end_line_idx])
562
+
563
+ field = Variable(
564
+ name=var_name,
565
+ start_line=start_line,
566
+ end_line=end_line,
567
+ raw_text=raw_text,
568
+ language="java",
569
+ variable_type=field_type,
570
+ modifiers=modifiers,
571
+ is_static="static" in modifiers,
572
+ is_constant="final" in modifiers,
573
+ visibility=visibility,
574
+ docstring=field_javadoc,
575
+ # Java-specific detailed information
576
+ annotations=field_annotations,
577
+ is_final="final" in modifiers,
578
+ field_type=field_type, # Alias for variable_type
579
+ )
580
+ fields.append(field)
581
+ except (AttributeError, ValueError, TypeError) as e:
582
+ log_debug(f"Failed to extract field info: {e}")
583
+ except Exception as e:
584
+ log_error(f"Unexpected error in field extraction: {e}")
585
+
586
+ return fields
587
+
588
+ def _parse_method_signature_optimized(
589
+ self, node: "tree_sitter.Node"
590
+ ) -> Optional[Tuple[str, str, List[str], List[str], List[str]]]:
591
+ """Parse method signature optimized (from AdvancedAnalyzer)"""
592
+ try:
593
+ # Extract method name
594
+ method_name = None
595
+ for child in node.children:
596
+ if child.type == "identifier":
597
+ method_name = self._get_node_text_optimized(child)
598
+ break
599
+
600
+ if not method_name:
601
+ return None
602
+
603
+ # Extract return type
604
+ return_type = "void"
605
+ for child in node.children:
606
+ if child.type in [
607
+ "type_identifier",
608
+ "void_type",
609
+ "primitive_type",
610
+ "integral_type",
611
+ ]:
612
+ return_type = self._get_node_text_optimized(child)
613
+ break
614
+ elif child.type == "generic_type":
615
+ return_type = self._get_node_text_optimized(child)
616
+ break
617
+
618
+ # Extract parameters
619
+ parameters = []
620
+ for child in node.children:
621
+ if child.type == "formal_parameters":
622
+ for param in child.children:
623
+ if param.type == "formal_parameter":
624
+ param_text = self._get_node_text_optimized(param)
625
+ parameters.append(param_text)
626
+
627
+ # Extract modifiers
628
+ modifiers = self._extract_modifiers_optimized(node)
629
+
630
+ # Extract throws clause
631
+ throws = []
632
+ for child in node.children:
633
+ if child.type == "throws":
634
+ throws_text = self._get_node_text_optimized(child)
635
+ exceptions = re.findall(r"\b[A-Z]\w*Exception\b", throws_text)
636
+ throws.extend(exceptions)
637
+
638
+ return method_name, return_type, parameters, modifiers, throws
639
+ except Exception:
640
+ return None
641
+
642
+ def _parse_field_declaration_optimized(
643
+ self, node: "tree_sitter.Node"
644
+ ) -> Optional[Tuple[str, List[str], List[str]]]:
645
+ """Parse field declaration optimized (from AdvancedAnalyzer)"""
646
+ try:
647
+ # Extract type (exactly as in AdvancedAnalyzer)
648
+ field_type = None
649
+ for child in node.children:
650
+ if child.type in ["type_identifier", "primitive_type", "integral_type"]:
651
+ field_type = self._get_node_text_optimized(child)
652
+ break
653
+
654
+ if not field_type:
655
+ return None
656
+
657
+ # Extract variable names (exactly as in AdvancedAnalyzer)
658
+ variable_names = []
659
+ for child in node.children:
660
+ if child.type == "variable_declarator":
661
+ for grandchild in child.children:
662
+ if grandchild.type == "identifier":
663
+ var_name = self._get_node_text_optimized(grandchild)
664
+ variable_names.append(var_name)
665
+
666
+ if not variable_names:
667
+ return None
668
+
669
+ # Extract modifiers (exactly as in AdvancedAnalyzer)
670
+ modifiers = self._extract_modifiers_optimized(node)
671
+
672
+ return field_type, variable_names, modifiers
673
+ except Exception:
674
+ return None
675
+
676
+ def _extract_modifiers_optimized(self, node: "tree_sitter.Node") -> List[str]:
677
+ """Extract modifiers efficiently (from AdvancedAnalyzer)"""
678
+ modifiers = []
679
+ for child in node.children:
680
+ if child.type == "modifiers":
681
+ for mod_child in child.children:
682
+ if mod_child.type in [
683
+ "public",
684
+ "private",
685
+ "protected",
686
+ "static",
687
+ "final",
688
+ "abstract",
689
+ "synchronized",
690
+ "volatile",
691
+ "transient",
692
+ ]:
693
+ modifiers.append(mod_child.type)
694
+ elif mod_child.type not in ["marker_annotation"]:
695
+ mod_text = self._get_node_text_optimized(mod_child)
696
+ if mod_text in [
697
+ "public",
698
+ "private",
699
+ "protected",
700
+ "static",
701
+ "final",
702
+ "abstract",
703
+ "synchronized",
704
+ "volatile",
705
+ "transient",
706
+ ]:
707
+ modifiers.append(mod_text)
708
+ return modifiers
709
+
710
+ def _extract_package_info(self, node: "tree_sitter.Node") -> None:
711
+ """Extract package information (from AdvancedAnalyzer)"""
712
+ try:
713
+ package_text = self._get_node_text_optimized(node)
714
+ match = re.search(r"package\s+([\w.]+)", package_text)
715
+ if match:
716
+ self.current_package = match.group(1)
717
+ except (AttributeError, ValueError, IndexError) as e:
718
+ log_debug(f"Failed to extract package info: {e}")
719
+ except Exception as e:
720
+ log_error(f"Unexpected error in package extraction: {e}")
721
+
722
+ def _extract_package_element(self, node: "tree_sitter.Node") -> Optional[Package]:
723
+ """Extract package element for inclusion in results"""
724
+ try:
725
+ package_text = self._get_node_text_optimized(node)
726
+ match = re.search(r"package\s+([\w.]+)", package_text)
727
+ if match:
728
+ package_name = match.group(1)
729
+ return Package(
730
+ name=package_name,
731
+ start_line=node.start_point[0] + 1,
732
+ end_line=node.end_point[0] + 1,
733
+ raw_text=package_text,
734
+ language="java",
735
+ )
736
+ except (AttributeError, ValueError, IndexError) as e:
737
+ log_debug(f"Failed to extract package element: {e}")
738
+ except Exception as e:
739
+ log_error(f"Unexpected error in package element extraction: {e}")
740
+ return None
741
+
742
+ def _determine_visibility(self, modifiers: List[str]) -> str:
743
+ """Determine visibility from modifiers"""
744
+ if "public" in modifiers:
745
+ return "public"
746
+ elif "private" in modifiers:
747
+ return "private"
748
+ elif "protected" in modifiers:
749
+ return "protected"
750
+ else:
751
+ return "package" # Default package visibility
752
+
753
+ def _find_annotations_for_line_cached(self, target_line: int) -> List[Dict[str, Any]]:
754
+ """Find annotations for specified line with caching (from AdvancedAnalyzer)"""
755
+ if target_line in self._annotation_cache:
756
+ return self._annotation_cache[target_line]
757
+
758
+ result_annotations = []
759
+ for annotation in self.annotations:
760
+ line_distance = target_line - annotation.get("end_line", 0)
761
+ if 1 <= line_distance <= 5:
762
+ result_annotations.append(annotation)
763
+
764
+ self._annotation_cache[target_line] = result_annotations
765
+ return result_annotations
766
+
767
+ def _calculate_complexity_optimized(self, node: "tree_sitter.Node") -> int:
768
+ """Calculate cyclomatic complexity efficiently (from AdvancedAnalyzer)"""
769
+ complexity = 1
770
+ try:
771
+ node_text = self._get_node_text_optimized(node).lower()
772
+ keywords = ["if", "while", "for", "catch", "case", "switch"]
773
+ for keyword in keywords:
774
+ complexity += node_text.count(keyword)
775
+ except (AttributeError, TypeError) as e:
776
+ log_debug(f"Failed to calculate complexity: {e}")
777
+ except Exception as e:
778
+ log_error(f"Unexpected error in complexity calculation: {e}")
779
+ return complexity
780
+
781
+ def _extract_javadoc_for_line(self, target_line: int) -> Optional[str]:
782
+ """Extract JavaDoc comment immediately before the specified line (from AdvancedAnalyzer)"""
783
+ try:
784
+ # Search backwards from target_line
785
+ javadoc_lines = []
786
+ current_line = target_line - 1
787
+
788
+ # Skip empty lines
789
+ while current_line > 0 and current_line <= len(self.content_lines):
790
+ line = self.content_lines[current_line - 1].strip()
791
+ if line:
792
+ break
793
+ current_line -= 1
794
+
795
+ # Check for JavaDoc end
796
+ if current_line > 0 and current_line <= len(self.content_lines):
797
+ line = self.content_lines[current_line - 1].strip()
798
+ if line.endswith("*/"):
799
+ javadoc_lines.append(line)
800
+ current_line -= 1
801
+
802
+ # Collect JavaDoc content
803
+ while current_line > 0:
804
+ line = self.content_lines[current_line - 1].strip()
805
+ javadoc_lines.append(line)
806
+ if line.startswith("/**"):
807
+ break
808
+ current_line -= 1
809
+
810
+ if javadoc_lines and javadoc_lines[-1].startswith("/**"):
811
+ # Reverse to correct order
812
+ javadoc_lines.reverse()
813
+ return "\n".join(javadoc_lines)
814
+
815
+ return None
816
+
817
+ except Exception as e:
818
+ log_debug(f"Failed to extract JavaDoc: {e}")
819
+ return None
820
+
821
+ def _is_nested_class(self, node: "tree_sitter.Node") -> bool:
822
+ """Check if this is a nested class (from AdvancedAnalyzer)"""
823
+ current = node.parent
824
+ while current:
825
+ if current.type in [
826
+ "class_declaration",
827
+ "interface_declaration",
828
+ "enum_declaration",
829
+ ]:
830
+ return True
831
+ current = current.parent
832
+ return False
833
+
834
+ def _find_parent_class(self, node: "tree_sitter.Node") -> Optional[str]:
835
+ """Find parent class name (from AdvancedAnalyzer)"""
836
+ current = node.parent
837
+ while current:
838
+ if current.type in [
839
+ "class_declaration",
840
+ "interface_declaration",
841
+ "enum_declaration",
842
+ ]:
843
+ return self._extract_class_name(current)
844
+ current = current.parent
845
+ return None
846
+
847
+ def _extract_class_name(self, node: "tree_sitter.Node") -> Optional[str]:
848
+ """Extract class name from node (from AdvancedAnalyzer)"""
849
+ for child in node.children:
850
+ if child.type == "identifier":
851
+ return self._get_node_text_optimized(child)
852
+ return None
853
+
854
+ def _extract_annotation_optimized(self, node: "tree_sitter.Node") -> Optional[Dict[str, Any]]:
855
+ """Extract annotation information optimized (from AdvancedAnalyzer)"""
856
+ try:
857
+ start_line = node.start_point[0] + 1
858
+ end_line = node.end_point[0] + 1
859
+ raw_text = self._get_node_text_optimized(node)
860
+
861
+ # Extract annotation name efficiently
862
+ name_match = re.search(r"@(\w+)", raw_text)
863
+ if not name_match:
864
+ return None
865
+
866
+ annotation_name = name_match.group(1)
867
+
868
+ # Extract parameters efficiently
869
+ parameters = []
870
+ param_match = re.search(r"\((.*?)\)", raw_text, re.DOTALL)
871
+ if param_match:
872
+ param_text = param_match.group(1).strip()
873
+ if param_text:
874
+ # Simple parameter parsing
875
+ if "=" in param_text:
876
+ parameters = [
877
+ p.strip() for p in re.split(r",(?![^()]*\))", param_text)
878
+ ]
879
+ else:
880
+ parameters = [param_text]
881
+
882
+ return {
883
+ "name": annotation_name,
884
+ "parameters": parameters,
885
+ "start_line": start_line,
886
+ "end_line": end_line,
887
+ "raw_text": raw_text,
888
+ }
889
+ except (AttributeError, IndexError, ValueError) as e:
890
+ log_debug(f"Failed to extract annotation from node: {e}")
891
+ return None
892
+ except Exception as e:
893
+ log_error(f"Unexpected exception in annotation extraction: {e}")
894
+ return None
895
+
896
+ def _extract_import_info(
897
+ self, node: "tree_sitter.Node", source_code: str
898
+ ) -> Optional[Import]:
899
+ """Extract import information (from AdvancedAnalyzer)"""
900
+ try:
901
+ import_text = self._get_node_text_optimized(node)
902
+ # Simple approach: get everything until semicolon then process
903
+ import_content = import_text.strip()
904
+ if import_content.endswith(";"):
905
+ import_content = import_content[:-1]
906
+
907
+ if "static" in import_content:
908
+ # Static import
909
+ static_match = re.search(r"import\s+static\s+([\w.]+)", import_content)
910
+ if static_match:
911
+ import_name = static_match.group(1)
912
+ # Handle wildcard case
913
+ if import_content.endswith(".*"):
914
+ import_name = import_name.replace(".*", "")
915
+ # For static wildcard, remove last element
916
+ parts = import_name.split(".")
917
+ if len(parts) > 1:
918
+ import_name = ".".join(parts[:-1])
919
+
920
+ return Import(
921
+ name=import_name,
922
+ start_line=node.start_point[0] + 1,
923
+ end_line=node.end_point[0] + 1,
924
+ raw_text=import_text,
925
+ language="java",
926
+ module_name=import_name,
927
+ is_static=True,
928
+ is_wildcard=import_content.endswith(".*"),
929
+ import_statement=import_content,
930
+ )
931
+ else:
932
+ # Normal import
933
+ normal_match = re.search(r"import\s+([\w.]+)", import_content)
934
+ if normal_match:
935
+ import_name = normal_match.group(1)
936
+ # Handle wildcard case
937
+ if import_content.endswith(".*"):
938
+ if import_name.endswith(".*"):
939
+ import_name = import_name[:-2] # Remove trailing .*
940
+ elif import_name.endswith("."):
941
+ import_name = import_name[:-1] # Remove trailing .
942
+
943
+ return Import(
944
+ name=import_name,
945
+ start_line=node.start_point[0] + 1,
946
+ end_line=node.end_point[0] + 1,
947
+ raw_text=import_text,
948
+ language="java",
949
+ module_name=import_name,
950
+ is_static=False,
951
+ is_wildcard=import_content.endswith(".*"),
952
+ import_statement=import_content,
953
+ )
954
+ except (AttributeError, ValueError, IndexError) as e:
955
+ log_debug(f"Failed to extract import info: {e}")
956
+ except Exception as e:
957
+ log_error(f"Unexpected error in import extraction: {e}")
958
+ return None
959
+
960
+
961
+ class JavaPlugin(LanguagePlugin):
962
+ """Java language plugin for the new architecture"""
963
+
964
+ def __init__(self) -> None:
965
+ """Initialize the Java plugin"""
966
+ super().__init__()
967
+ self._language_cache: Optional["tree_sitter.Language"] = None
968
+
969
+ def get_language_name(self) -> str:
970
+ """Return the name of the programming language this plugin supports"""
971
+ return "java"
972
+
973
+ def get_file_extensions(self) -> List[str]:
974
+ """Return list of file extensions this plugin supports"""
975
+ return [".java", ".jsp", ".jspx"]
976
+
977
+ def create_extractor(self) -> ElementExtractor:
978
+ """Create and return an element extractor for this language"""
979
+ return JavaElementExtractor()
980
+
981
+ def get_tree_sitter_language(self) -> Optional["tree_sitter.Language"]:
982
+ """Get the Tree-sitter language object for Java"""
983
+ if self._language_cache is None:
984
+ try:
985
+ import tree_sitter_java as tsjava
986
+ self._language_cache = tsjava.language()
987
+ except ImportError:
988
+ log_error("tree-sitter-java not available")
989
+ return None
990
+ except Exception as e:
991
+ log_error(f"Failed to load Java language: {e}")
992
+ return None
993
+ return self._language_cache
994
+
995
+ def get_supported_queries(self) -> List[str]:
996
+ """Get list of supported query names for this language"""
997
+ return ["class", "method", "field", "import"]
998
+
999
+ def is_applicable(self, file_path: str) -> bool:
1000
+ """Check if this plugin is applicable for the given file"""
1001
+ return any(file_path.lower().endswith(ext.lower()) for ext in self.get_file_extensions())
1002
+
1003
+ def get_plugin_info(self) -> dict:
1004
+ """Get information about this plugin"""
1005
+ return {
1006
+ "name": "Java Plugin",
1007
+ "language": self.get_language_name(),
1008
+ "extensions": self.get_file_extensions(),
1009
+ "version": "2.0.0",
1010
+ "supported_queries": self.get_supported_queries()
1011
+ }
1012
+
1013
+ async def analyze_file(self, file_path: str, request: 'AnalysisRequest') -> 'AnalysisResult':
1014
+ """
1015
+ Analyze a Java file and return analysis results.
1016
+
1017
+ Args:
1018
+ file_path: Path to the Java file to analyze
1019
+ request: Analysis request object
1020
+
1021
+ Returns:
1022
+ AnalysisResult object containing the analysis results
1023
+ """
1024
+ try:
1025
+ from ..models import AnalysisResult
1026
+ from ..core.parser import Parser
1027
+
1028
+ log_debug(f"Java Plugin: Starting analysis of {file_path}")
1029
+
1030
+ # Read file content
1031
+ with open(file_path, 'r', encoding='utf-8') as f:
1032
+ source_code = f.read()
1033
+
1034
+ log_debug(f"Java Plugin: Read {len(source_code)} characters from file")
1035
+
1036
+ # Parse the file
1037
+ parser = Parser()
1038
+ parse_result = parser.parse_code(source_code, "java")
1039
+
1040
+ log_debug(f"Java Plugin: Parse result success: {parse_result.success}")
1041
+
1042
+ if not parse_result.success:
1043
+ log_error(f"Java Plugin: Parse failed: {parse_result.error_message}")
1044
+ return AnalysisResult(
1045
+ file_path=file_path,
1046
+ language="java",
1047
+ line_count=len(source_code.splitlines()),
1048
+ elements=[],
1049
+ node_count=0,
1050
+ query_results={},
1051
+ source_code=source_code,
1052
+ success=False,
1053
+ error_message=parse_result.error_message
1054
+ )
1055
+
1056
+ # Extract elements
1057
+ extractor = self.create_extractor()
1058
+
1059
+ log_debug("Java Plugin: Extracting annotations...")
1060
+ annotations = extractor.extract_annotations(parse_result.tree, source_code)
1061
+ log_debug(f"Java Plugin: Found {len(annotations)} annotations")
1062
+
1063
+ log_debug("Java Plugin: Extracting packages...")
1064
+ packages = extractor.extract_packages(parse_result.tree, source_code)
1065
+ log_debug(f"Java Plugin: Found {len(packages)} packages")
1066
+
1067
+ log_debug("Java Plugin: Extracting functions...")
1068
+ functions = extractor.extract_functions(parse_result.tree, source_code)
1069
+ log_debug(f"Java Plugin: Found {len(functions)} functions")
1070
+
1071
+ log_debug("Java Plugin: Extracting classes...")
1072
+ classes = extractor.extract_classes(parse_result.tree, source_code)
1073
+ log_debug(f"Java Plugin: Found {len(classes)} classes")
1074
+
1075
+ log_debug("Java Plugin: Extracting variables...")
1076
+ variables = extractor.extract_variables(parse_result.tree, source_code)
1077
+ log_debug(f"Java Plugin: Found {len(variables)} variables")
1078
+
1079
+ log_debug("Java Plugin: Extracting imports...")
1080
+ imports = extractor.extract_imports(parse_result.tree, source_code)
1081
+ log_debug(f"Java Plugin: Found {len(imports)} imports")
1082
+
1083
+ # Combine all elements (annotations are stored in extractor for cross-referencing)
1084
+ all_elements = packages + functions + classes + variables + imports
1085
+ log_debug(f"Java Plugin: Total elements: {len(all_elements)}")
1086
+
1087
+ return AnalysisResult(
1088
+ file_path=file_path,
1089
+ language="java",
1090
+ line_count=len(source_code.splitlines()),
1091
+ elements=all_elements,
1092
+ node_count=parse_result.tree.root_node.child_count if parse_result.tree else 0,
1093
+ query_results={},
1094
+ source_code=source_code,
1095
+ success=True,
1096
+ error_message=None
1097
+ )
1098
+
1099
+ except Exception as e:
1100
+ log_error(f"Failed to analyze Java file {file_path}: {e}")
1101
+ import traceback
1102
+ log_error(f"Java Plugin traceback: {traceback.format_exc()}")
1103
+ return AnalysisResult(
1104
+ file_path=file_path,
1105
+ language="java",
1106
+ line_count=0,
1107
+ elements=[],
1108
+ node_count=0,
1109
+ query_results={},
1110
+ source_code="",
1111
+ success=False,
1112
+ error_message=str(e)
1113
+ )