tree-sitter-analyzer 0.1.3__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (79) hide show
  1. tree_sitter_analyzer/__init__.py +133 -121
  2. tree_sitter_analyzer/__main__.py +11 -12
  3. tree_sitter_analyzer/api.py +531 -539
  4. tree_sitter_analyzer/cli/__init__.py +39 -39
  5. tree_sitter_analyzer/cli/__main__.py +12 -13
  6. tree_sitter_analyzer/cli/commands/__init__.py +26 -27
  7. tree_sitter_analyzer/cli/commands/advanced_command.py +88 -88
  8. tree_sitter_analyzer/cli/commands/base_command.py +160 -155
  9. tree_sitter_analyzer/cli/commands/default_command.py +18 -19
  10. tree_sitter_analyzer/cli/commands/partial_read_command.py +141 -133
  11. tree_sitter_analyzer/cli/commands/query_command.py +81 -82
  12. tree_sitter_analyzer/cli/commands/structure_command.py +138 -121
  13. tree_sitter_analyzer/cli/commands/summary_command.py +101 -93
  14. tree_sitter_analyzer/cli/commands/table_command.py +232 -233
  15. tree_sitter_analyzer/cli/info_commands.py +120 -121
  16. tree_sitter_analyzer/cli_main.py +277 -276
  17. tree_sitter_analyzer/core/__init__.py +15 -20
  18. tree_sitter_analyzer/core/analysis_engine.py +591 -574
  19. tree_sitter_analyzer/core/cache_service.py +320 -330
  20. tree_sitter_analyzer/core/engine.py +557 -560
  21. tree_sitter_analyzer/core/parser.py +293 -288
  22. tree_sitter_analyzer/core/query.py +494 -502
  23. tree_sitter_analyzer/encoding_utils.py +458 -460
  24. tree_sitter_analyzer/exceptions.py +337 -340
  25. tree_sitter_analyzer/file_handler.py +217 -222
  26. tree_sitter_analyzer/formatters/__init__.py +1 -1
  27. tree_sitter_analyzer/formatters/base_formatter.py +167 -168
  28. tree_sitter_analyzer/formatters/formatter_factory.py +78 -74
  29. tree_sitter_analyzer/formatters/java_formatter.py +287 -270
  30. tree_sitter_analyzer/formatters/python_formatter.py +255 -235
  31. tree_sitter_analyzer/interfaces/__init__.py +9 -10
  32. tree_sitter_analyzer/interfaces/cli.py +528 -557
  33. tree_sitter_analyzer/interfaces/cli_adapter.py +322 -319
  34. tree_sitter_analyzer/interfaces/mcp_adapter.py +180 -170
  35. tree_sitter_analyzer/interfaces/mcp_server.py +405 -416
  36. tree_sitter_analyzer/java_analyzer.py +218 -219
  37. tree_sitter_analyzer/language_detector.py +398 -400
  38. tree_sitter_analyzer/language_loader.py +224 -228
  39. tree_sitter_analyzer/languages/__init__.py +10 -11
  40. tree_sitter_analyzer/languages/java_plugin.py +1129 -1113
  41. tree_sitter_analyzer/languages/python_plugin.py +737 -712
  42. tree_sitter_analyzer/mcp/__init__.py +31 -32
  43. tree_sitter_analyzer/mcp/resources/__init__.py +44 -47
  44. tree_sitter_analyzer/mcp/resources/code_file_resource.py +212 -213
  45. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +560 -550
  46. tree_sitter_analyzer/mcp/server.py +333 -345
  47. tree_sitter_analyzer/mcp/tools/__init__.py +30 -31
  48. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +621 -557
  49. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +242 -245
  50. tree_sitter_analyzer/mcp/tools/base_tool.py +54 -55
  51. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +300 -302
  52. tree_sitter_analyzer/mcp/tools/table_format_tool.py +362 -359
  53. tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +543 -476
  54. tree_sitter_analyzer/mcp/utils/__init__.py +105 -106
  55. tree_sitter_analyzer/mcp/utils/error_handler.py +549 -549
  56. tree_sitter_analyzer/models.py +470 -481
  57. tree_sitter_analyzer/output_manager.py +261 -264
  58. tree_sitter_analyzer/plugins/__init__.py +333 -334
  59. tree_sitter_analyzer/plugins/base.py +477 -446
  60. tree_sitter_analyzer/plugins/java_plugin.py +608 -625
  61. tree_sitter_analyzer/plugins/javascript_plugin.py +446 -439
  62. tree_sitter_analyzer/plugins/manager.py +362 -355
  63. tree_sitter_analyzer/plugins/plugin_loader.py +85 -83
  64. tree_sitter_analyzer/plugins/python_plugin.py +606 -598
  65. tree_sitter_analyzer/plugins/registry.py +374 -366
  66. tree_sitter_analyzer/queries/__init__.py +26 -27
  67. tree_sitter_analyzer/queries/java.py +391 -394
  68. tree_sitter_analyzer/queries/javascript.py +148 -149
  69. tree_sitter_analyzer/queries/python.py +285 -286
  70. tree_sitter_analyzer/queries/typescript.py +229 -230
  71. tree_sitter_analyzer/query_loader.py +254 -260
  72. tree_sitter_analyzer/table_formatter.py +468 -448
  73. tree_sitter_analyzer/utils.py +277 -277
  74. tree_sitter_analyzer-0.3.0.dist-info/METADATA +346 -0
  75. tree_sitter_analyzer-0.3.0.dist-info/RECORD +77 -0
  76. tree_sitter_analyzer-0.1.3.dist-info/METADATA +0 -444
  77. tree_sitter_analyzer-0.1.3.dist-info/RECORD +0 -77
  78. {tree_sitter_analyzer-0.1.3.dist-info → tree_sitter_analyzer-0.3.0.dist-info}/WHEEL +0 -0
  79. {tree_sitter_analyzer-0.1.3.dist-info → tree_sitter_analyzer-0.3.0.dist-info}/entry_points.txt +0 -0
@@ -1,557 +1,621 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
- Analyze Code Scale MCP Tool
5
-
6
- This tool provides code scale analysis including metrics about
7
- complexity, size, and structure through the MCP protocol.
8
- Enhanced for LLM-friendly analysis workflow.
9
- """
10
-
11
- import json
12
- import logging
13
- import re
14
- from pathlib import Path
15
- from typing import Any, Dict, List, Optional
16
-
17
- from tree_sitter_analyzer.core.analysis_engine import get_analysis_engine, AnalysisRequest
18
- from ...core.analysis_engine import get_analysis_engine, AnalysisRequest
19
- from ...language_detector import detect_language_from_file
20
- from ...utils import log_performance, setup_logger
21
-
22
- # Set up logging
23
- logger = setup_logger(__name__)
24
-
25
-
26
- class AnalyzeScaleTool:
27
- """
28
- MCP Tool for analyzing code scale and complexity metrics.
29
-
30
- This tool integrates with existing analyzer components to provide
31
- comprehensive code analysis through the MCP protocol, optimized
32
- for LLM workflow efficiency.
33
- """
34
-
35
- def __init__(self) -> None:
36
- """Initialize the analyze scale tool."""
37
- # Use unified analysis engine instead of deprecated AdvancedAnalyzer
38
- self.analysis_engine = get_analysis_engine()
39
- logger.info("AnalyzeScaleTool initialized")
40
-
41
- def _calculate_file_metrics(self, file_path: str) -> Dict[str, Any]:
42
- """
43
- Calculate basic file metrics including line counts and estimated token count.
44
-
45
- Args:
46
- file_path: Path to the file to analyze
47
-
48
- Returns:
49
- Dictionary containing file metrics
50
- """
51
- try:
52
- with open(file_path, "r", encoding="utf-8") as f:
53
- content = f.read()
54
-
55
- lines = content.split("\n")
56
- total_lines = len(lines)
57
-
58
- # Count different types of lines
59
- code_lines = 0
60
- comment_lines = 0
61
- blank_lines = 0
62
-
63
- for line in lines:
64
- stripped = line.strip()
65
- if not stripped:
66
- blank_lines += 1
67
- elif (
68
- stripped.startswith("//")
69
- or stripped.startswith("/*")
70
- or stripped.startswith("*")
71
- ):
72
- comment_lines += 1
73
- else:
74
- code_lines += 1
75
-
76
- # Estimate token count (rough approximation)
77
- # Split by common delimiters and count non-empty tokens
78
- tokens = re.findall(r"\b\w+\b|[^\w\s]", content)
79
- estimated_tokens = len([t for t in tokens if t.strip()])
80
-
81
- # Calculate file size
82
- file_size = len(content.encode("utf-8"))
83
-
84
- return {
85
- "total_lines": total_lines,
86
- "code_lines": code_lines,
87
- "comment_lines": comment_lines,
88
- "blank_lines": blank_lines,
89
- "estimated_tokens": estimated_tokens,
90
- "file_size_bytes": file_size,
91
- "file_size_kb": round(file_size / 1024, 2),
92
- }
93
- except Exception as e:
94
- logger.error(f"Error calculating file metrics for {file_path}: {e}")
95
- return {
96
- "total_lines": 0,
97
- "code_lines": 0,
98
- "comment_lines": 0,
99
- "blank_lines": 0,
100
- "estimated_tokens": 0,
101
- "file_size_bytes": 0,
102
- "file_size_kb": 0,
103
- }
104
-
105
- def _extract_structural_overview(self, analysis_result: Any) -> Dict[str, Any]:
106
- """
107
- Extract structural overview with position information for LLM guidance.
108
-
109
- Args:
110
- analysis_result: Result from AdvancedAnalyzer
111
-
112
- Returns:
113
- Dictionary containing structural overview
114
- """
115
- overview = {
116
- "classes": [],
117
- "methods": [],
118
- "fields": [],
119
- "imports": [],
120
- "complexity_hotspots": [],
121
- }
122
-
123
- # Extract class information with position from unified analysis engine
124
- classes = [e for e in analysis_result.elements if e.__class__.__name__ == 'Class']
125
- for cls in classes:
126
- class_info = {
127
- "name": cls.name,
128
- "type": cls.class_type,
129
- "start_line": cls.start_line,
130
- "end_line": cls.end_line,
131
- "line_span": cls.end_line - cls.start_line + 1,
132
- "visibility": cls.visibility,
133
- "extends": cls.extends_class,
134
- "implements": cls.implements_interfaces,
135
- "annotations": [ann.name for ann in cls.annotations],
136
- }
137
- overview["classes"].append(class_info)
138
-
139
- # Extract method information with position and complexity from unified analysis engine
140
- methods = [e for e in analysis_result.elements if e.__class__.__name__ == 'Function']
141
- for method in methods:
142
- method_info = {
143
- "name": method.name,
144
- "start_line": method.start_line,
145
- "end_line": method.end_line,
146
- "line_span": method.end_line - method.start_line + 1,
147
- "visibility": method.visibility,
148
- "return_type": method.return_type,
149
- "parameter_count": len(method.parameters),
150
- "complexity": method.complexity_score,
151
- "is_constructor": method.is_constructor,
152
- "is_static": method.is_static,
153
- "annotations": [ann.name for ann in method.annotations],
154
- }
155
- overview["methods"].append(method_info)
156
-
157
- # Track complexity hotspots
158
- if method.complexity_score > 10: # High complexity threshold
159
- overview["complexity_hotspots"].append(
160
- {
161
- "type": "method",
162
- "name": method.name,
163
- "complexity": method.complexity_score,
164
- "start_line": method.start_line,
165
- "end_line": method.end_line,
166
- }
167
- )
168
-
169
- # Extract field information with position
170
- # Extract field information from unified analysis engine
171
- fields = [e for e in analysis_result.elements if e.__class__.__name__ == 'Variable']
172
- for field in fields:
173
- field_info = {
174
- "name": field.name,
175
- "type": field.field_type,
176
- "start_line": field.start_line,
177
- "end_line": field.end_line,
178
- "visibility": field.visibility,
179
- "is_static": field.is_static,
180
- "is_final": field.is_final,
181
- "annotations": [ann.name for ann in field.annotations],
182
- }
183
- overview["fields"].append(field_info)
184
-
185
- # Extract import information
186
- # Extract import information from unified analysis engine
187
- imports = [e for e in analysis_result.elements if e.__class__.__name__ == 'Import']
188
- for imp in imports:
189
- import_info = {
190
- "name": imp.imported_name,
191
- "statement": imp.import_statement,
192
- "line": imp.line_number,
193
- "is_static": imp.is_static,
194
- "is_wildcard": imp.is_wildcard,
195
- }
196
- overview["imports"].append(import_info)
197
-
198
- return overview
199
-
200
- def _generate_llm_guidance(
201
- self, file_metrics: Dict[str, Any], structural_overview: Dict[str, Any]
202
- ) -> Dict[str, Any]:
203
- """
204
- Generate guidance for LLM on how to efficiently analyze this file.
205
-
206
- Args:
207
- file_metrics: Basic file metrics
208
- structural_overview: Structural overview of the code
209
-
210
- Returns:
211
- Dictionary containing LLM guidance
212
- """
213
- guidance = {
214
- "analysis_strategy": "",
215
- "recommended_tools": [],
216
- "key_areas": [],
217
- "complexity_assessment": "",
218
- "size_category": "",
219
- }
220
-
221
- total_lines = file_metrics["total_lines"]
222
- estimated_tokens = file_metrics["estimated_tokens"]
223
-
224
- # Determine size category
225
- if total_lines < 100:
226
- guidance["size_category"] = "small"
227
- guidance["analysis_strategy"] = (
228
- "This is a small file that can be analyzed in full detail."
229
- )
230
- elif total_lines < 500:
231
- guidance["size_category"] = "medium"
232
- guidance["analysis_strategy"] = (
233
- "This is a medium-sized file. Consider focusing on key classes and methods."
234
- )
235
- elif total_lines < 1500:
236
- guidance["size_category"] = "large"
237
- guidance["analysis_strategy"] = (
238
- "This is a large file. Use targeted analysis with read_code_partial."
239
- )
240
- else:
241
- guidance["size_category"] = "very_large"
242
- guidance["analysis_strategy"] = (
243
- "This is a very large file. Strongly recommend using structural analysis first, then targeted deep-dives."
244
- )
245
-
246
- # Recommend tools based on file size and complexity
247
- if total_lines > 200:
248
- guidance["recommended_tools"].append("read_code_partial")
249
-
250
- if len(structural_overview["complexity_hotspots"]) > 0:
251
- guidance["recommended_tools"].append("format_table")
252
- guidance["complexity_assessment"] = (
253
- f"Found {len(structural_overview['complexity_hotspots'])} complexity hotspots"
254
- )
255
- else:
256
- guidance["complexity_assessment"] = (
257
- "No significant complexity hotspots detected"
258
- )
259
-
260
- # Identify key areas for analysis
261
- if len(structural_overview["classes"]) > 1:
262
- guidance["key_areas"].append(
263
- "Multiple classes - consider analyzing class relationships"
264
- )
265
-
266
- if len(structural_overview["methods"]) > 20:
267
- guidance["key_areas"].append(
268
- "Many methods - focus on public interfaces and high-complexity methods"
269
- )
270
-
271
- if len(structural_overview["imports"]) > 10:
272
- guidance["key_areas"].append("Many imports - consider dependency analysis")
273
-
274
- return guidance
275
-
276
- def get_tool_schema(self) -> Dict[str, Any]:
277
- """
278
- Get the MCP tool schema for analyze_code_scale.
279
-
280
- Returns:
281
- Dictionary containing the tool schema
282
- """
283
- return {
284
- "type": "object",
285
- "properties": {
286
- "file_path": {
287
- "type": "string",
288
- "description": "Path to the code file to analyze",
289
- },
290
- "language": {
291
- "type": "string",
292
- "description": "Programming language (optional, auto-detected if not specified)",
293
- },
294
- "include_complexity": {
295
- "type": "boolean",
296
- "description": "Include complexity metrics in the analysis",
297
- "default": True,
298
- },
299
- "include_details": {
300
- "type": "boolean",
301
- "description": "Include detailed element information",
302
- "default": False,
303
- },
304
- "include_guidance": {
305
- "type": "boolean",
306
- "description": "Include LLM analysis guidance",
307
- "default": True,
308
- },
309
- },
310
- "required": ["file_path"],
311
- "additionalProperties": False,
312
- }
313
-
314
- async def execute(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
315
- """
316
- Execute the analyze_code_scale tool.
317
-
318
- Args:
319
- arguments: Tool arguments containing file_path and optional parameters
320
-
321
- Returns:
322
- Dictionary containing enhanced analysis results optimized for LLM workflow
323
-
324
- Raises:
325
- ValueError: If required arguments are missing or invalid
326
- FileNotFoundError: If the specified file doesn't exist
327
- """
328
- # Validate required arguments
329
- if "file_path" not in arguments:
330
- raise ValueError("file_path is required")
331
-
332
- file_path = arguments["file_path"]
333
- language = arguments.get("language")
334
- include_complexity = arguments.get("include_complexity", True)
335
- include_details = arguments.get("include_details", False)
336
- include_guidance = arguments.get("include_guidance", True)
337
-
338
- # Validate file exists
339
- if not Path(file_path).exists():
340
- raise FileNotFoundError(f"File not found: {file_path}")
341
-
342
- # Detect language if not specified
343
- if not language:
344
- language = detect_language_from_file(file_path)
345
- if language == "unknown":
346
- raise ValueError(f"Could not detect language for file: {file_path}")
347
-
348
- logger.info(f"Analyzing code scale for {file_path} (language: {language})")
349
-
350
- try:
351
- # Use performance monitoring with proper context manager
352
- from ...mcp.utils import get_performance_monitor
353
-
354
- with get_performance_monitor().measure_operation(
355
- "analyze_code_scale_enhanced"
356
- ):
357
- # Calculate basic file metrics
358
- file_metrics = self._calculate_file_metrics(file_path)
359
-
360
- # Use appropriate analyzer based on language
361
- if language == "java":
362
- # Use AdvancedAnalyzer for comprehensive analysis
363
- # Use unified analysis engine instead of deprecated advanced_analyzer
364
- request = AnalysisRequest(
365
- file_path=file_path,
366
- language=language,
367
- include_complexity=True,
368
- include_details=True
369
- )
370
- analysis_result = await self.analysis_engine.analyze(request)
371
- if analysis_result is None:
372
- raise RuntimeError(f"Failed to analyze file: {file_path}")
373
- # Extract structural overview
374
- structural_overview = self._extract_structural_overview(analysis_result)
375
- else:
376
- # Use universal analysis_engine for other languages
377
- request = AnalysisRequest(file_path=file_path, language=language, include_details=include_details)
378
- universal_result = await self.analysis_engine.analyze(request)
379
- if not universal_result or not universal_result.success:
380
- error_msg = universal_result.error_message if universal_result else "Unknown error"
381
- raise RuntimeError(f"Failed to analyze file with universal engine: {error_msg}")
382
-
383
- # Adapt the result to a compatible structure for report generation
384
- # This part needs careful implementation based on universal_result structure
385
- analysis_result = None # Placeholder
386
- structural_overview = {} # Placeholder
387
-
388
- # Generate LLM guidance
389
- llm_guidance = None
390
- if include_guidance:
391
- llm_guidance = self._generate_llm_guidance(
392
- file_metrics, structural_overview
393
- )
394
-
395
- # Build enhanced result structure
396
- result = {
397
- "file_path": file_path,
398
- "language": language,
399
- "file_metrics": file_metrics,
400
- "summary": {
401
- "classes": len([e for e in analysis_result.elements if e.__class__.__name__ == 'Class']),
402
- "methods": len([e for e in analysis_result.elements if e.__class__.__name__ == 'Function']),
403
- "fields": len([e for e in analysis_result.elements if e.__class__.__name__ == 'Variable']),
404
- "imports": len([e for e in analysis_result.elements if e.__class__.__name__ == 'Import']),
405
- "annotations": len(getattr(analysis_result, "annotations", [])),
406
- "package": (
407
- analysis_result.package.name
408
- if analysis_result.package
409
- else None
410
- ),
411
- },
412
- "structural_overview": structural_overview,
413
- }
414
-
415
- if include_guidance:
416
- result["llm_guidance"] = llm_guidance
417
-
418
- # Add detailed information if requested (backward compatibility)
419
- if include_details:
420
- result["detailed_analysis"] = {
421
- "statistics": analysis_result.get_statistics(),
422
- "classes": [
423
- {
424
- "name": cls.name,
425
- "type": cls.class_type,
426
- "visibility": cls.visibility,
427
- "extends": cls.extends_class,
428
- "implements": cls.implements_interfaces,
429
- "annotations": [ann.name for ann in cls.annotations],
430
- "lines": f"{cls.start_line}-{cls.end_line}",
431
- }
432
- for cls in [e for e in analysis_result.elements if e.__class__.__name__ == 'Class']
433
- ],
434
- "methods": [
435
- {
436
- "name": method.name,
437
- "file_path": getattr(method, 'file_path', file_path),
438
- "visibility": method.visibility,
439
- "return_type": method.return_type,
440
- "parameters": len(method.parameters),
441
- "annotations": [ann.name for ann in method.annotations],
442
- "is_constructor": method.is_constructor,
443
- "is_static": method.is_static,
444
- "complexity": method.complexity_score,
445
- "lines": f"{method.start_line}-{method.end_line}",
446
- }
447
- for method in [e for e in analysis_result.elements if e.__class__.__name__ == 'Function']
448
- ],
449
- "fields": [
450
- {
451
- "name": field.name,
452
- "type": field.field_type,
453
- "file_path": getattr(field, 'file_path', file_path),
454
- "visibility": field.visibility,
455
- "is_static": field.is_static,
456
- "is_final": field.is_final,
457
- "annotations": [ann.name for ann in field.annotations],
458
- "lines": f"{field.start_line}-{field.end_line}",
459
- }
460
- for field in [e for e in analysis_result.elements if e.__class__.__name__ == 'Variable']
461
- ],
462
- }
463
-
464
- # Count elements by type
465
- classes_count = len([e for e in analysis_result.elements if e.__class__.__name__ == 'Class'])
466
- methods_count = len([e for e in analysis_result.elements if e.__class__.__name__ == 'Function'])
467
-
468
- logger.info(
469
- f"Successfully analyzed {file_path}: {classes_count} classes, "
470
- f"{methods_count} methods, {file_metrics['total_lines']} lines, "
471
- f"~{file_metrics['estimated_tokens']} tokens"
472
- )
473
-
474
- return result
475
-
476
- except Exception as e:
477
- logger.error(f"Error analyzing {file_path}: {e}")
478
- raise
479
-
480
- def validate_arguments(self, arguments: Dict[str, Any]) -> bool:
481
- """
482
- Validate tool arguments against the schema.
483
-
484
- Args:
485
- arguments: Arguments to validate
486
-
487
- Returns:
488
- True if arguments are valid
489
-
490
- Raises:
491
- ValueError: If arguments are invalid
492
- """
493
- schema = self.get_tool_schema()
494
- required_fields = schema.get("required", [])
495
-
496
- # Check required fields
497
- for field in required_fields:
498
- if field not in arguments:
499
- raise ValueError(f"Required field '{field}' is missing")
500
-
501
- # Validate file_path
502
- if "file_path" in arguments:
503
- file_path = arguments["file_path"]
504
- if not isinstance(file_path, str):
505
- raise ValueError("file_path must be a string")
506
- if not file_path.strip():
507
- raise ValueError("file_path cannot be empty")
508
-
509
- # Validate optional fields
510
- if "language" in arguments:
511
- language = arguments["language"]
512
- if not isinstance(language, str):
513
- raise ValueError("language must be a string")
514
-
515
- if "include_complexity" in arguments:
516
- include_complexity = arguments["include_complexity"]
517
- if not isinstance(include_complexity, bool):
518
- raise ValueError("include_complexity must be a boolean")
519
-
520
- if "include_details" in arguments:
521
- include_details = arguments["include_details"]
522
- if not isinstance(include_details, bool):
523
- raise ValueError("include_details must be a boolean")
524
-
525
- if "include_guidance" in arguments:
526
- include_guidance = arguments["include_guidance"]
527
- if not isinstance(include_guidance, bool):
528
- raise ValueError("include_guidance must be a boolean")
529
-
530
- return True
531
-
532
- def get_tool_definition(self) -> Any:
533
- """
534
- Get the MCP tool definition for analyze_code_scale.
535
-
536
- Returns:
537
- Tool definition object compatible with MCP server
538
- """
539
- try:
540
- from mcp.types import Tool
541
-
542
- return Tool(
543
- name="analyze_code_scale",
544
- description="Analyze code scale, complexity, and structure metrics with LLM-optimized guidance for efficient large file analysis",
545
- inputSchema=self.get_tool_schema(),
546
- )
547
- except ImportError:
548
- # Fallback for when MCP is not available
549
- return {
550
- "name": "analyze_code_scale",
551
- "description": "Analyze code scale, complexity, and structure metrics with LLM-optimized guidance for efficient large file analysis",
552
- "inputSchema": self.get_tool_schema(),
553
- }
554
-
555
-
556
- # Tool instance for easy access
557
- analyze_scale_tool = AnalyzeScaleTool()
1
+ #!/usr/bin/env python3
2
+ """
3
+ Analyze Code Scale MCP Tool
4
+
5
+ This tool provides code scale analysis including metrics about
6
+ complexity, size, and structure through the MCP protocol.
7
+ Enhanced for LLM-friendly analysis workflow.
8
+ """
9
+
10
+ import re
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from ...core.analysis_engine import AnalysisRequest, get_analysis_engine
15
+ from ...language_detector import detect_language_from_file
16
+ from ...utils import setup_logger
17
+
18
+ # Set up logging
19
+ logger = setup_logger(__name__)
20
+
21
+
22
+ class AnalyzeScaleTool:
23
+ """
24
+ MCP Tool for analyzing code scale and complexity metrics.
25
+
26
+ This tool integrates with existing analyzer components to provide
27
+ comprehensive code analysis through the MCP protocol, optimized
28
+ for LLM workflow efficiency.
29
+ """
30
+
31
+ def __init__(self) -> None:
32
+ """Initialize the analyze scale tool."""
33
+ # Use unified analysis engine instead of deprecated AdvancedAnalyzer
34
+ self.analysis_engine = get_analysis_engine()
35
+ logger.info("AnalyzeScaleTool initialized")
36
+
37
+ def _calculate_file_metrics(self, file_path: str) -> dict[str, Any]:
38
+ """
39
+ Calculate basic file metrics including line counts and estimated token count.
40
+
41
+ Args:
42
+ file_path: Path to the file to analyze
43
+
44
+ Returns:
45
+ Dictionary containing file metrics
46
+ """
47
+ try:
48
+ with open(file_path, encoding="utf-8") as f:
49
+ content = f.read()
50
+
51
+ lines = content.split("\n")
52
+ total_lines = len(lines)
53
+
54
+ # Count different types of lines
55
+ code_lines = 0
56
+ comment_lines = 0
57
+ blank_lines = 0
58
+
59
+ for line in lines:
60
+ stripped = line.strip()
61
+ if not stripped:
62
+ blank_lines += 1
63
+ elif (
64
+ stripped.startswith("//")
65
+ or stripped.startswith("/*")
66
+ or stripped.startswith("*")
67
+ ):
68
+ comment_lines += 1
69
+ else:
70
+ code_lines += 1
71
+
72
+ # Estimate token count (rough approximation)
73
+ # Split by common delimiters and count non-empty tokens
74
+ tokens = re.findall(r"\b\w+\b|[^\w\s]", content)
75
+ estimated_tokens = len([t for t in tokens if t.strip()])
76
+
77
+ # Calculate file size
78
+ file_size = len(content.encode("utf-8"))
79
+
80
+ return {
81
+ "total_lines": total_lines,
82
+ "code_lines": code_lines,
83
+ "comment_lines": comment_lines,
84
+ "blank_lines": blank_lines,
85
+ "estimated_tokens": estimated_tokens,
86
+ "file_size_bytes": file_size,
87
+ "file_size_kb": round(file_size / 1024, 2),
88
+ }
89
+ except Exception as e:
90
+ logger.error(f"Error calculating file metrics for {file_path}: {e}")
91
+ return {
92
+ "total_lines": 0,
93
+ "code_lines": 0,
94
+ "comment_lines": 0,
95
+ "blank_lines": 0,
96
+ "estimated_tokens": 0,
97
+ "file_size_bytes": 0,
98
+ "file_size_kb": 0,
99
+ }
100
+
101
+ def _extract_structural_overview(self, analysis_result: Any) -> dict[str, Any]:
102
+ """
103
+ Extract structural overview with position information for LLM guidance.
104
+
105
+ Args:
106
+ analysis_result: Result from AdvancedAnalyzer
107
+
108
+ Returns:
109
+ Dictionary containing structural overview
110
+ """
111
+ overview: dict[str, Any] = {
112
+ "classes": [],
113
+ "methods": [],
114
+ "fields": [],
115
+ "imports": [],
116
+ "complexity_hotspots": [],
117
+ }
118
+
119
+ # Extract class information with position from unified analysis engine
120
+ classes = [
121
+ e for e in analysis_result.elements if e.__class__.__name__ == "Class"
122
+ ]
123
+ for cls in classes:
124
+ class_info = {
125
+ "name": cls.name,
126
+ "type": cls.class_type,
127
+ "start_line": cls.start_line,
128
+ "end_line": cls.end_line,
129
+ "line_span": cls.end_line - cls.start_line + 1,
130
+ "visibility": cls.visibility,
131
+ "extends": cls.extends_class,
132
+ "implements": cls.implements_interfaces,
133
+ "annotations": [ann.name for ann in cls.annotations],
134
+ }
135
+ overview["classes"].append(class_info)
136
+
137
+ # Extract method information with position and complexity from unified analysis engine
138
+ methods = [
139
+ e for e in analysis_result.elements if e.__class__.__name__ == "Function"
140
+ ]
141
+ for method in methods:
142
+ method_info = {
143
+ "name": method.name,
144
+ "start_line": method.start_line,
145
+ "end_line": method.end_line,
146
+ "line_span": method.end_line - method.start_line + 1,
147
+ "visibility": method.visibility,
148
+ "return_type": method.return_type,
149
+ "parameter_count": len(method.parameters),
150
+ "complexity": method.complexity_score,
151
+ "is_constructor": method.is_constructor,
152
+ "is_static": method.is_static,
153
+ "annotations": [ann.name for ann in method.annotations],
154
+ }
155
+ overview["methods"].append(method_info)
156
+
157
+ # Track complexity hotspots
158
+ if method.complexity_score > 10: # High complexity threshold
159
+ overview["complexity_hotspots"].append(
160
+ {
161
+ "type": "method",
162
+ "name": method.name,
163
+ "complexity": method.complexity_score,
164
+ "start_line": method.start_line,
165
+ "end_line": method.end_line,
166
+ }
167
+ )
168
+
169
+ # Extract field information with position
170
+ # Extract field information from unified analysis engine
171
+ fields = [
172
+ e for e in analysis_result.elements if e.__class__.__name__ == "Variable"
173
+ ]
174
+ for field in fields:
175
+ field_info = {
176
+ "name": field.name,
177
+ "type": field.field_type,
178
+ "start_line": field.start_line,
179
+ "end_line": field.end_line,
180
+ "visibility": field.visibility,
181
+ "is_static": field.is_static,
182
+ "is_final": field.is_final,
183
+ "annotations": [ann.name for ann in field.annotations],
184
+ }
185
+ overview["fields"].append(field_info)
186
+
187
+ # Extract import information
188
+ # Extract import information from unified analysis engine
189
+ imports = [
190
+ e for e in analysis_result.elements if e.__class__.__name__ == "Import"
191
+ ]
192
+ for imp in imports:
193
+ import_info = {
194
+ "name": imp.imported_name,
195
+ "statement": imp.import_statement,
196
+ "line": imp.line_number,
197
+ "is_static": imp.is_static,
198
+ "is_wildcard": imp.is_wildcard,
199
+ }
200
+ overview["imports"].append(import_info)
201
+
202
+ return overview
203
+
204
+ def _generate_llm_guidance(
205
+ self, file_metrics: dict[str, Any], structural_overview: dict[str, Any]
206
+ ) -> dict[str, Any]:
207
+ """
208
+ Generate guidance for LLM on how to efficiently analyze this file.
209
+
210
+ Args:
211
+ file_metrics: Basic file metrics
212
+ structural_overview: Structural overview of the code
213
+
214
+ Returns:
215
+ Dictionary containing LLM guidance
216
+ """
217
+ guidance = {
218
+ "analysis_strategy": "",
219
+ "recommended_tools": [],
220
+ "key_areas": [],
221
+ "complexity_assessment": "",
222
+ "size_category": "",
223
+ }
224
+
225
+ total_lines = file_metrics["total_lines"]
226
+ # estimated_tokens = file_metrics["estimated_tokens"] # Not used currently
227
+
228
+ # Determine size category
229
+ if total_lines < 100:
230
+ guidance["size_category"] = "small"
231
+ guidance["analysis_strategy"] = (
232
+ "This is a small file that can be analyzed in full detail."
233
+ )
234
+ elif total_lines < 500:
235
+ guidance["size_category"] = "medium"
236
+ guidance["analysis_strategy"] = (
237
+ "This is a medium-sized file. Consider focusing on key classes and methods."
238
+ )
239
+ elif total_lines < 1500:
240
+ guidance["size_category"] = "large"
241
+ guidance["analysis_strategy"] = (
242
+ "This is a large file. Use targeted analysis with read_code_partial."
243
+ )
244
+ else:
245
+ guidance["size_category"] = "very_large"
246
+ guidance["analysis_strategy"] = (
247
+ "This is a very large file. Strongly recommend using structural analysis first, then targeted deep-dives."
248
+ )
249
+
250
+ # Recommend tools based on file size and complexity
251
+ if total_lines > 200:
252
+ guidance["recommended_tools"].append("read_code_partial")
253
+
254
+ if len(structural_overview["complexity_hotspots"]) > 0:
255
+ guidance["recommended_tools"].append("format_table")
256
+ guidance["complexity_assessment"] = (
257
+ f"Found {len(structural_overview['complexity_hotspots'])} complexity hotspots"
258
+ )
259
+ else:
260
+ guidance["complexity_assessment"] = (
261
+ "No significant complexity hotspots detected"
262
+ )
263
+
264
+ # Identify key areas for analysis
265
+ if len(structural_overview["classes"]) > 1:
266
+ guidance["key_areas"].append(
267
+ "Multiple classes - consider analyzing class relationships"
268
+ )
269
+
270
+ if len(structural_overview["methods"]) > 20:
271
+ guidance["key_areas"].append(
272
+ "Many methods - focus on public interfaces and high-complexity methods"
273
+ )
274
+
275
+ if len(structural_overview["imports"]) > 10:
276
+ guidance["key_areas"].append("Many imports - consider dependency analysis")
277
+
278
+ return guidance
279
+
280
+ def get_tool_schema(self) -> dict[str, Any]:
281
+ """
282
+ Get the MCP tool schema for analyze_code_scale.
283
+
284
+ Returns:
285
+ Dictionary containing the tool schema
286
+ """
287
+ return {
288
+ "type": "object",
289
+ "properties": {
290
+ "file_path": {
291
+ "type": "string",
292
+ "description": "Path to the code file to analyze",
293
+ },
294
+ "language": {
295
+ "type": "string",
296
+ "description": "Programming language (optional, auto-detected if not specified)",
297
+ },
298
+ "include_complexity": {
299
+ "type": "boolean",
300
+ "description": "Include complexity metrics in the analysis",
301
+ "default": True,
302
+ },
303
+ "include_details": {
304
+ "type": "boolean",
305
+ "description": "Include detailed element information",
306
+ "default": False,
307
+ },
308
+ "include_guidance": {
309
+ "type": "boolean",
310
+ "description": "Include LLM analysis guidance",
311
+ "default": True,
312
+ },
313
+ },
314
+ "required": ["file_path"],
315
+ "additionalProperties": False,
316
+ }
317
+
318
+ async def execute(self, arguments: dict[str, Any]) -> dict[str, Any]:
319
+ """
320
+ Execute the analyze_code_scale tool.
321
+
322
+ Args:
323
+ arguments: Tool arguments containing file_path and optional parameters
324
+
325
+ Returns:
326
+ Dictionary containing enhanced analysis results optimized for LLM workflow
327
+
328
+ Raises:
329
+ ValueError: If required arguments are missing or invalid
330
+ FileNotFoundError: If the specified file doesn't exist
331
+ """
332
+ # Validate required arguments
333
+ if "file_path" not in arguments:
334
+ raise ValueError("file_path is required")
335
+
336
+ file_path = arguments["file_path"]
337
+ language = arguments.get("language")
338
+ # include_complexity = arguments.get("include_complexity", True) # Not used currently
339
+ include_details = arguments.get("include_details", False)
340
+ include_guidance = arguments.get("include_guidance", True)
341
+
342
+ # Validate file exists
343
+ if not Path(file_path).exists():
344
+ raise FileNotFoundError(f"File not found: {file_path}")
345
+
346
+ # Detect language if not specified
347
+ if not language:
348
+ language = detect_language_from_file(file_path)
349
+ if language == "unknown":
350
+ raise ValueError(f"Could not detect language for file: {file_path}")
351
+
352
+ logger.info(f"Analyzing code scale for {file_path} (language: {language})")
353
+
354
+ try:
355
+ # Use performance monitoring with proper context manager
356
+ from ...mcp.utils import get_performance_monitor
357
+
358
+ with get_performance_monitor().measure_operation(
359
+ "analyze_code_scale_enhanced"
360
+ ):
361
+ # Calculate basic file metrics
362
+ file_metrics = self._calculate_file_metrics(file_path)
363
+
364
+ # Use appropriate analyzer based on language
365
+ if language == "java":
366
+ # Use AdvancedAnalyzer for comprehensive analysis
367
+ # Use unified analysis engine instead of deprecated advanced_analyzer
368
+ request = AnalysisRequest(
369
+ file_path=file_path,
370
+ language=language,
371
+ include_complexity=True,
372
+ include_details=True,
373
+ )
374
+ analysis_result = await self.analysis_engine.analyze(request)
375
+ if analysis_result is None:
376
+ raise RuntimeError(f"Failed to analyze file: {file_path}")
377
+ # Extract structural overview
378
+ structural_overview = self._extract_structural_overview(
379
+ analysis_result
380
+ )
381
+ else:
382
+ # Use universal analysis_engine for other languages
383
+ request = AnalysisRequest(
384
+ file_path=file_path,
385
+ language=language,
386
+ include_details=include_details,
387
+ )
388
+ universal_result = await self.analysis_engine.analyze(request)
389
+ if not universal_result or not universal_result.success:
390
+ error_msg = (
391
+ universal_result.error_message
392
+ if universal_result
393
+ else "Unknown error"
394
+ )
395
+ raise RuntimeError(
396
+ f"Failed to analyze file with universal engine: {error_msg}"
397
+ )
398
+
399
+ # Adapt the result to a compatible structure for report generation
400
+ # This part needs careful implementation based on universal_result structure
401
+ analysis_result = None # Placeholder
402
+ structural_overview = {} # Placeholder
403
+
404
+ # Generate LLM guidance
405
+ llm_guidance = None
406
+ if include_guidance:
407
+ llm_guidance = self._generate_llm_guidance(
408
+ file_metrics, structural_overview
409
+ )
410
+
411
+ # Build enhanced result structure
412
+ result = {
413
+ "file_path": file_path,
414
+ "language": language,
415
+ "file_metrics": file_metrics,
416
+ "summary": {
417
+ "classes": len(
418
+ [
419
+ e
420
+ for e in analysis_result.elements
421
+ if e.__class__.__name__ == "Class"
422
+ ]
423
+ ),
424
+ "methods": len(
425
+ [
426
+ e
427
+ for e in analysis_result.elements
428
+ if e.__class__.__name__ == "Function"
429
+ ]
430
+ ),
431
+ "fields": len(
432
+ [
433
+ e
434
+ for e in analysis_result.elements
435
+ if e.__class__.__name__ == "Variable"
436
+ ]
437
+ ),
438
+ "imports": len(
439
+ [
440
+ e
441
+ for e in analysis_result.elements
442
+ if e.__class__.__name__ == "Import"
443
+ ]
444
+ ),
445
+ "annotations": len(getattr(analysis_result, "annotations", [])),
446
+ "package": (
447
+ analysis_result.package.name
448
+ if analysis_result.package
449
+ else None
450
+ ),
451
+ },
452
+ "structural_overview": structural_overview,
453
+ }
454
+
455
+ if include_guidance:
456
+ result["llm_guidance"] = llm_guidance
457
+
458
+ # Add detailed information if requested (backward compatibility)
459
+ if include_details:
460
+ result["detailed_analysis"] = {
461
+ "statistics": analysis_result.get_statistics(),
462
+ "classes": [
463
+ {
464
+ "name": cls.name,
465
+ "type": cls.class_type,
466
+ "visibility": cls.visibility,
467
+ "extends": cls.extends_class,
468
+ "implements": cls.implements_interfaces,
469
+ "annotations": [ann.name for ann in cls.annotations],
470
+ "lines": f"{cls.start_line}-{cls.end_line}",
471
+ }
472
+ for cls in [
473
+ e
474
+ for e in analysis_result.elements
475
+ if e.__class__.__name__ == "Class"
476
+ ]
477
+ ],
478
+ "methods": [
479
+ {
480
+ "name": method.name,
481
+ "file_path": getattr(method, "file_path", file_path),
482
+ "visibility": method.visibility,
483
+ "return_type": method.return_type,
484
+ "parameters": len(method.parameters),
485
+ "annotations": [ann.name for ann in method.annotations],
486
+ "is_constructor": method.is_constructor,
487
+ "is_static": method.is_static,
488
+ "complexity": method.complexity_score,
489
+ "lines": f"{method.start_line}-{method.end_line}",
490
+ }
491
+ for method in [
492
+ e
493
+ for e in analysis_result.elements
494
+ if e.__class__.__name__ == "Function"
495
+ ]
496
+ ],
497
+ "fields": [
498
+ {
499
+ "name": field.name,
500
+ "type": field.field_type,
501
+ "file_path": getattr(field, "file_path", file_path),
502
+ "visibility": field.visibility,
503
+ "is_static": field.is_static,
504
+ "is_final": field.is_final,
505
+ "annotations": [ann.name for ann in field.annotations],
506
+ "lines": f"{field.start_line}-{field.end_line}",
507
+ }
508
+ for field in [
509
+ e
510
+ for e in analysis_result.elements
511
+ if e.__class__.__name__ == "Variable"
512
+ ]
513
+ ],
514
+ }
515
+
516
+ # Count elements by type
517
+ classes_count = len(
518
+ [
519
+ e
520
+ for e in analysis_result.elements
521
+ if e.__class__.__name__ == "Class"
522
+ ]
523
+ )
524
+ methods_count = len(
525
+ [
526
+ e
527
+ for e in analysis_result.elements
528
+ if e.__class__.__name__ == "Function"
529
+ ]
530
+ )
531
+
532
+ logger.info(
533
+ f"Successfully analyzed {file_path}: {classes_count} classes, "
534
+ f"{methods_count} methods, {file_metrics['total_lines']} lines, "
535
+ f"~{file_metrics['estimated_tokens']} tokens"
536
+ )
537
+
538
+ return result
539
+
540
+ except Exception as e:
541
+ logger.error(f"Error analyzing {file_path}: {e}")
542
+ raise
543
+
544
+ def validate_arguments(self, arguments: dict[str, Any]) -> bool:
545
+ """
546
+ Validate tool arguments against the schema.
547
+
548
+ Args:
549
+ arguments: Arguments to validate
550
+
551
+ Returns:
552
+ True if arguments are valid
553
+
554
+ Raises:
555
+ ValueError: If arguments are invalid
556
+ """
557
+ schema = self.get_tool_schema()
558
+ required_fields = schema.get("required", [])
559
+
560
+ # Check required fields
561
+ for field in required_fields:
562
+ if field not in arguments:
563
+ raise ValueError(f"Required field '{field}' is missing")
564
+
565
+ # Validate file_path
566
+ if "file_path" in arguments:
567
+ file_path = arguments["file_path"]
568
+ if not isinstance(file_path, str):
569
+ raise ValueError("file_path must be a string")
570
+ if not file_path.strip():
571
+ raise ValueError("file_path cannot be empty")
572
+
573
+ # Validate optional fields
574
+ if "language" in arguments:
575
+ language = arguments["language"]
576
+ if not isinstance(language, str):
577
+ raise ValueError("language must be a string")
578
+
579
+ if "include_complexity" in arguments:
580
+ include_complexity = arguments["include_complexity"]
581
+ if not isinstance(include_complexity, bool):
582
+ raise ValueError("include_complexity must be a boolean")
583
+
584
+ if "include_details" in arguments:
585
+ include_details = arguments["include_details"]
586
+ if not isinstance(include_details, bool):
587
+ raise ValueError("include_details must be a boolean")
588
+
589
+ if "include_guidance" in arguments:
590
+ include_guidance = arguments["include_guidance"]
591
+ if not isinstance(include_guidance, bool):
592
+ raise ValueError("include_guidance must be a boolean")
593
+
594
+ return True
595
+
596
+ def get_tool_definition(self) -> Any:
597
+ """
598
+ Get the MCP tool definition for analyze_code_scale.
599
+
600
+ Returns:
601
+ Tool definition object compatible with MCP server
602
+ """
603
+ try:
604
+ from mcp.types import Tool
605
+
606
+ return Tool(
607
+ name="analyze_code_scale",
608
+ description="Analyze code scale, complexity, and structure metrics with LLM-optimized guidance for efficient large file analysis",
609
+ inputSchema=self.get_tool_schema(),
610
+ )
611
+ except ImportError:
612
+ # Fallback for when MCP is not available
613
+ return {
614
+ "name": "analyze_code_scale",
615
+ "description": "Analyze code scale, complexity, and structure metrics with LLM-optimized guidance for efficient large file analysis",
616
+ "inputSchema": self.get_tool_schema(),
617
+ }
618
+
619
+
620
+ # Tool instance for easy access
621
+ analyze_scale_tool = AnalyzeScaleTool()