tree-sitter-analyzer 0.8.2__py3-none-any.whl → 0.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

@@ -1,673 +1,677 @@
1
- #!/usr/bin/env python3
2
- """
3
- Analyze Code Scale MCP Tool
4
-
5
- This tool provides code scale analysis including metrics about
6
- complexity, size, and structure through the MCP protocol.
7
- Enhanced for LLM-friendly analysis workflow.
8
- """
9
-
10
- import re
11
- from pathlib import Path
12
- from typing import Any
13
-
14
- from ...core.analysis_engine import AnalysisRequest, get_analysis_engine
15
- from ...language_detector import detect_language_from_file
16
- from ...security import SecurityValidator
17
- from ...utils import setup_logger
18
-
19
- # Set up logging
20
- logger = setup_logger(__name__)
21
-
22
-
23
- class AnalyzeScaleTool:
24
- """
25
- MCP Tool for analyzing code scale and complexity metrics.
26
-
27
- This tool integrates with existing analyzer components to provide
28
- comprehensive code analysis through the MCP protocol, optimized
29
- for LLM workflow efficiency.
30
- """
31
-
32
- def __init__(self, project_root: str = None) -> None:
33
- """Initialize the analyze scale tool."""
34
- # Use unified analysis engine instead of deprecated AdvancedAnalyzer
35
- self.project_root = project_root
36
- self.analysis_engine = get_analysis_engine(project_root)
37
- self.security_validator = SecurityValidator(project_root)
38
- logger.info("AnalyzeScaleTool initialized with security validation")
39
-
40
- def _calculate_file_metrics(self, file_path: str) -> dict[str, Any]:
41
- """
42
- Calculate basic file metrics including line counts and estimated token count.
43
-
44
- Args:
45
- file_path: Path to the file to analyze
46
-
47
- Returns:
48
- Dictionary containing file metrics
49
- """
50
- try:
51
- with open(file_path, encoding="utf-8") as f:
52
- content = f.read()
53
-
54
- lines = content.split("\n")
55
- total_lines = len(lines)
56
-
57
- # Count different types of lines
58
- code_lines = 0
59
- comment_lines = 0
60
- blank_lines = 0
61
-
62
- for line in lines:
63
- stripped = line.strip()
64
- if not stripped:
65
- blank_lines += 1
66
- elif (
67
- stripped.startswith("//")
68
- or stripped.startswith("/*")
69
- or stripped.startswith("*")
70
- ):
71
- comment_lines += 1
72
- else:
73
- code_lines += 1
74
-
75
- # Estimate token count (rough approximation)
76
- # Split by common delimiters and count non-empty tokens
77
- tokens = re.findall(r"\b\w+\b|[^\w\s]", content)
78
- estimated_tokens = len([t for t in tokens if t.strip()])
79
-
80
- # Calculate file size
81
- file_size = len(content.encode("utf-8"))
82
-
83
- return {
84
- "total_lines": total_lines,
85
- "code_lines": code_lines,
86
- "comment_lines": comment_lines,
87
- "blank_lines": blank_lines,
88
- "estimated_tokens": estimated_tokens,
89
- "file_size_bytes": file_size,
90
- "file_size_kb": round(file_size / 1024, 2),
91
- }
92
- except Exception as e:
93
- logger.error(f"Error calculating file metrics for {file_path}: {e}")
94
- return {
95
- "total_lines": 0,
96
- "code_lines": 0,
97
- "comment_lines": 0,
98
- "blank_lines": 0,
99
- "estimated_tokens": 0,
100
- "file_size_bytes": 0,
101
- "file_size_kb": 0,
102
- }
103
-
104
- def _extract_structural_overview(self, analysis_result: Any) -> dict[str, Any]:
105
- """
106
- Extract structural overview with position information for LLM guidance.
107
-
108
- Args:
109
- analysis_result: Result from AdvancedAnalyzer
110
-
111
- Returns:
112
- Dictionary containing structural overview
113
- """
114
- overview: dict[str, Any] = {
115
- "classes": [],
116
- "methods": [],
117
- "fields": [],
118
- "imports": [],
119
- "complexity_hotspots": [],
120
- }
121
-
122
- # Extract class information with position from unified analysis engine
123
- classes = [
124
- e for e in analysis_result.elements if e.__class__.__name__ == "Class"
125
- ]
126
- for cls in classes:
127
- class_info = {
128
- "name": cls.name,
129
- "type": cls.class_type,
130
- "start_line": cls.start_line,
131
- "end_line": cls.end_line,
132
- "line_span": cls.end_line - cls.start_line + 1,
133
- "visibility": cls.visibility,
134
- "extends": cls.extends_class,
135
- "implements": cls.implements_interfaces,
136
- "annotations": [ann.name for ann in cls.annotations],
137
- }
138
- overview["classes"].append(class_info)
139
-
140
- # Extract method information with position and complexity from unified analysis engine
141
- methods = [
142
- e for e in analysis_result.elements if e.__class__.__name__ == "Function"
143
- ]
144
- for method in methods:
145
- method_info = {
146
- "name": method.name,
147
- "start_line": method.start_line,
148
- "end_line": method.end_line,
149
- "line_span": method.end_line - method.start_line + 1,
150
- "visibility": method.visibility,
151
- "return_type": method.return_type,
152
- "parameter_count": len(method.parameters),
153
- "complexity": method.complexity_score,
154
- "is_constructor": method.is_constructor,
155
- "is_static": method.is_static,
156
- "annotations": [ann.name for ann in method.annotations],
157
- }
158
- overview["methods"].append(method_info)
159
-
160
- # Track complexity hotspots
161
- if method.complexity_score > 10: # High complexity threshold
162
- overview["complexity_hotspots"].append(
163
- {
164
- "type": "method",
165
- "name": method.name,
166
- "complexity": method.complexity_score,
167
- "start_line": method.start_line,
168
- "end_line": method.end_line,
169
- }
170
- )
171
-
172
- # Extract field information with position
173
- # Extract field information from unified analysis engine
174
- fields = [
175
- e for e in analysis_result.elements if e.__class__.__name__ == "Variable"
176
- ]
177
- for field in fields:
178
- field_info = {
179
- "name": field.name,
180
- "type": field.field_type,
181
- "start_line": field.start_line,
182
- "end_line": field.end_line,
183
- "visibility": field.visibility,
184
- "is_static": field.is_static,
185
- "is_final": field.is_final,
186
- "annotations": [ann.name for ann in field.annotations],
187
- }
188
- overview["fields"].append(field_info)
189
-
190
- # Extract import information
191
- # Extract import information from unified analysis engine
192
- imports = [
193
- e for e in analysis_result.elements if e.__class__.__name__ == "Import"
194
- ]
195
- for imp in imports:
196
- import_info = {
197
- "name": imp.imported_name,
198
- "statement": imp.import_statement,
199
- "line": imp.line_number,
200
- "is_static": imp.is_static,
201
- "is_wildcard": imp.is_wildcard,
202
- }
203
- overview["imports"].append(import_info)
204
-
205
- return overview
206
-
207
- def _generate_llm_guidance(
208
- self, file_metrics: dict[str, Any], structural_overview: dict[str, Any]
209
- ) -> dict[str, Any]:
210
- """
211
- Generate guidance for LLM on how to efficiently analyze this file.
212
-
213
- Args:
214
- file_metrics: Basic file metrics
215
- structural_overview: Structural overview of the code
216
-
217
- Returns:
218
- Dictionary containing LLM guidance
219
- """
220
- guidance: dict[str, Any] = {
221
- "analysis_strategy": "",
222
- "recommended_tools": [],
223
- "key_areas": [],
224
- "complexity_assessment": "",
225
- "size_category": "",
226
- }
227
-
228
- total_lines = file_metrics["total_lines"]
229
- # estimated_tokens = file_metrics["estimated_tokens"] # Not used currently
230
-
231
- # Determine size category
232
- if total_lines < 100:
233
- guidance["size_category"] = "small"
234
- guidance["analysis_strategy"] = (
235
- "This is a small file that can be analyzed in full detail."
236
- )
237
- elif total_lines < 500:
238
- guidance["size_category"] = "medium"
239
- guidance["analysis_strategy"] = (
240
- "This is a medium-sized file. Consider focusing on key classes and methods."
241
- )
242
- elif total_lines < 1500:
243
- guidance["size_category"] = "large"
244
- guidance["analysis_strategy"] = (
245
- "This is a large file. Use targeted analysis with read_code_partial."
246
- )
247
- else:
248
- guidance["size_category"] = "very_large"
249
- guidance["analysis_strategy"] = (
250
- "This is a very large file. Strongly recommend using structural analysis first, then targeted deep-dives."
251
- )
252
-
253
- # Recommend tools based on file size and complexity
254
- if total_lines > 200:
255
- guidance["recommended_tools"].append("read_code_partial")
256
-
257
- # Ensure all required fields exist
258
- required_fields = ["complexity_hotspots", "classes", "methods", "fields", "imports"]
259
- for field in required_fields:
260
- if field not in structural_overview:
261
- structural_overview[field] = []
262
-
263
- if len(structural_overview["complexity_hotspots"]) > 0:
264
- guidance["recommended_tools"].append("format_table")
265
- guidance["complexity_assessment"] = (
266
- f"Found {len(structural_overview['complexity_hotspots'])} complexity hotspots"
267
- )
268
- else:
269
- guidance["complexity_assessment"] = (
270
- "No significant complexity hotspots detected"
271
- )
272
-
273
- # Identify key areas for analysis
274
- if len(structural_overview["classes"]) > 1:
275
- guidance["key_areas"].append(
276
- "Multiple classes - consider analyzing class relationships"
277
- )
278
-
279
- if len(structural_overview["methods"]) > 20:
280
- guidance["key_areas"].append(
281
- "Many methods - focus on public interfaces and high-complexity methods"
282
- )
283
-
284
- if len(structural_overview["imports"]) > 10:
285
- guidance["key_areas"].append("Many imports - consider dependency analysis")
286
-
287
- return guidance
288
-
289
- def get_tool_schema(self) -> dict[str, Any]:
290
- """
291
- Get the MCP tool schema for analyze_code_scale.
292
-
293
- Returns:
294
- Dictionary containing the tool schema
295
- """
296
- return {
297
- "type": "object",
298
- "properties": {
299
- "file_path": {
300
- "type": "string",
301
- "description": "Path to the code file to analyze",
302
- },
303
- "language": {
304
- "type": "string",
305
- "description": "Programming language (optional, auto-detected if not specified)",
306
- },
307
- "include_complexity": {
308
- "type": "boolean",
309
- "description": "Include complexity metrics in the analysis",
310
- "default": True,
311
- },
312
- "include_details": {
313
- "type": "boolean",
314
- "description": "Include detailed element information",
315
- "default": False,
316
- },
317
- "include_guidance": {
318
- "type": "boolean",
319
- "description": "Include LLM analysis guidance",
320
- "default": True,
321
- },
322
- },
323
- "required": ["file_path"],
324
- "additionalProperties": False,
325
- }
326
-
327
- async def execute(self, arguments: dict[str, Any]) -> dict[str, Any]:
328
- """
329
- Execute the analyze_code_scale tool.
330
-
331
- Args:
332
- arguments: Tool arguments containing file_path and optional parameters
333
-
334
- Returns:
335
- Dictionary containing enhanced analysis results optimized for LLM workflow
336
-
337
- Raises:
338
- ValueError: If required arguments are missing or invalid
339
- FileNotFoundError: If the specified file doesn't exist
340
- """
341
- # Validate required arguments
342
- if "file_path" not in arguments:
343
- raise ValueError("file_path is required")
344
-
345
- file_path = arguments["file_path"]
346
- language = arguments.get("language")
347
- # include_complexity = arguments.get("include_complexity", True) # Not used currently
348
- include_details = arguments.get("include_details", False)
349
- include_guidance = arguments.get("include_guidance", True)
350
-
351
- # Security validation
352
- is_valid, error_msg = self.security_validator.validate_file_path(file_path)
353
- if not is_valid:
354
- logger.warning(f"Security validation failed for file path: {file_path} - {error_msg}")
355
- raise ValueError(f"Invalid file path: {error_msg}")
356
-
357
- # Sanitize inputs
358
- if language:
359
- language = self.security_validator.sanitize_input(language, max_length=50)
360
-
361
- # Validate file exists
362
- if not Path(file_path).exists():
363
- raise FileNotFoundError(f"File not found: {file_path}")
364
-
365
- # Detect language if not specified
366
- if not language:
367
- language = detect_language_from_file(file_path)
368
- if language == "unknown":
369
- raise ValueError(f"Could not detect language for file: {file_path}")
370
-
371
- logger.info(f"Analyzing code scale for {file_path} (language: {language})")
372
-
373
- try:
374
- # Use performance monitoring with proper context manager
375
- from ...mcp.utils import get_performance_monitor
376
-
377
- with get_performance_monitor().measure_operation(
378
- "analyze_code_scale_enhanced"
379
- ):
380
- # Calculate basic file metrics
381
- file_metrics = self._calculate_file_metrics(file_path)
382
-
383
- # Use appropriate analyzer based on language
384
- if language == "java":
385
- # Use AdvancedAnalyzer for comprehensive analysis
386
- # Use unified analysis engine instead of deprecated advanced_analyzer
387
- request = AnalysisRequest(
388
- file_path=file_path,
389
- language=language,
390
- include_complexity=True,
391
- include_details=True,
392
- )
393
- analysis_result = await self.analysis_engine.analyze(request)
394
- if analysis_result is None:
395
- raise RuntimeError(f"Failed to analyze file: {file_path}")
396
- # Extract structural overview
397
- structural_overview = self._extract_structural_overview(
398
- analysis_result
399
- )
400
- else:
401
- # Use universal analysis_engine for other languages
402
- request = AnalysisRequest(
403
- file_path=file_path,
404
- language=language,
405
- include_details=include_details,
406
- )
407
- universal_result = await self.analysis_engine.analyze(request)
408
- if not universal_result or not universal_result.success:
409
- error_msg = (
410
- universal_result.error_message
411
- if universal_result
412
- else "Unknown error"
413
- )
414
- raise RuntimeError(
415
- f"Failed to analyze file with universal engine: {error_msg}"
416
- )
417
-
418
- # Adapt the result to a compatible structure for report generation
419
- # This part needs careful implementation based on universal_result structure
420
- analysis_result = None # Placeholder
421
- structural_overview = {} # Placeholder
422
-
423
- # Generate LLM guidance
424
- llm_guidance = None
425
- if include_guidance:
426
- llm_guidance = self._generate_llm_guidance(
427
- file_metrics, structural_overview
428
- )
429
-
430
- # Build enhanced result structure
431
- result = {
432
- "file_path": file_path,
433
- "language": language,
434
- "file_metrics": file_metrics,
435
- "summary": {
436
- "classes": len(
437
- [
438
- e
439
- for e in (
440
- analysis_result.elements if analysis_result else []
441
- )
442
- if e.__class__.__name__ == "Class"
443
- ]
444
- ),
445
- "methods": len(
446
- [
447
- e
448
- for e in (
449
- analysis_result.elements if analysis_result else []
450
- )
451
- if e.__class__.__name__ == "Function"
452
- ]
453
- ),
454
- "fields": len(
455
- [
456
- e
457
- for e in (
458
- analysis_result.elements if analysis_result else []
459
- )
460
- if e.__class__.__name__ == "Variable"
461
- ]
462
- ),
463
- "imports": len(
464
- [
465
- e
466
- for e in (
467
- analysis_result.elements if analysis_result else []
468
- )
469
- if e.__class__.__name__ == "Import"
470
- ]
471
- ),
472
- "annotations": len(
473
- getattr(analysis_result, "annotations", [])
474
- if analysis_result
475
- else []
476
- ),
477
- "package": (
478
- analysis_result.package.name
479
- if analysis_result and analysis_result.package
480
- else None
481
- ),
482
- },
483
- "structural_overview": structural_overview,
484
- }
485
-
486
- if include_guidance:
487
- result["llm_guidance"] = llm_guidance
488
-
489
- # Add detailed information if requested (backward compatibility)
490
- if include_details:
491
- result["detailed_analysis"] = {
492
- "statistics": (
493
- analysis_result.get_statistics() if analysis_result else {}
494
- ),
495
- "classes": [
496
- {
497
- "name": cls.name,
498
- "type": getattr(cls, "class_type", "unknown"),
499
- "visibility": getattr(cls, "visibility", "unknown"),
500
- "extends": getattr(cls, "extends_class", None),
501
- "implements": getattr(cls, "implements_interfaces", []),
502
- "annotations": [
503
- getattr(ann, "name", str(ann))
504
- for ann in getattr(cls, "annotations", [])
505
- ],
506
- "lines": f"{cls.start_line}-{cls.end_line}",
507
- }
508
- for cls in [
509
- e
510
- for e in (
511
- analysis_result.elements if analysis_result else []
512
- )
513
- if e.__class__.__name__ == "Class"
514
- ]
515
- ],
516
- "methods": [
517
- {
518
- "name": method.name,
519
- "file_path": getattr(method, "file_path", file_path),
520
- "visibility": getattr(method, "visibility", "unknown"),
521
- "return_type": getattr(
522
- method, "return_type", "unknown"
523
- ),
524
- "parameters": len(getattr(method, "parameters", [])),
525
- "annotations": [
526
- getattr(ann, "name", str(ann))
527
- for ann in getattr(method, "annotations", [])
528
- ],
529
- "is_constructor": getattr(
530
- method, "is_constructor", False
531
- ),
532
- "is_static": getattr(method, "is_static", False),
533
- "complexity": getattr(method, "complexity_score", 0),
534
- "lines": f"{method.start_line}-{method.end_line}",
535
- }
536
- for method in [
537
- e
538
- for e in (
539
- analysis_result.elements if analysis_result else []
540
- )
541
- if e.__class__.__name__ == "Function"
542
- ]
543
- ],
544
- "fields": [
545
- {
546
- "name": field.name,
547
- "type": getattr(field, "field_type", "unknown"),
548
- "file_path": getattr(field, "file_path", file_path),
549
- "visibility": getattr(field, "visibility", "unknown"),
550
- "is_static": getattr(field, "is_static", False),
551
- "is_final": getattr(field, "is_final", False),
552
- "annotations": [
553
- getattr(ann, "name", str(ann))
554
- for ann in getattr(field, "annotations", [])
555
- ],
556
- "lines": f"{field.start_line}-{field.end_line}",
557
- }
558
- for field in [
559
- e
560
- for e in (
561
- analysis_result.elements if analysis_result else []
562
- )
563
- if e.__class__.__name__ == "Variable"
564
- ]
565
- ],
566
- }
567
-
568
- # Count elements by type
569
- classes_count = len(
570
- [
571
- e
572
- for e in (analysis_result.elements if analysis_result else [])
573
- if e.__class__.__name__ == "Class"
574
- ]
575
- )
576
- methods_count = len(
577
- [
578
- e
579
- for e in (analysis_result.elements if analysis_result else [])
580
- if e.__class__.__name__ == "Function"
581
- ]
582
- )
583
-
584
- logger.info(
585
- f"Successfully analyzed {file_path}: {classes_count} classes, "
586
- f"{methods_count} methods, {file_metrics['total_lines']} lines, "
587
- f"~{file_metrics['estimated_tokens']} tokens"
588
- )
589
-
590
- return result
591
-
592
- except Exception as e:
593
- logger.error(f"Error analyzing {file_path}: {e}")
594
- raise
595
-
596
- def validate_arguments(self, arguments: dict[str, Any]) -> bool:
597
- """
598
- Validate tool arguments against the schema.
599
-
600
- Args:
601
- arguments: Arguments to validate
602
-
603
- Returns:
604
- True if arguments are valid
605
-
606
- Raises:
607
- ValueError: If arguments are invalid
608
- """
609
- schema = self.get_tool_schema()
610
- required_fields = schema.get("required", [])
611
-
612
- # Check required fields
613
- for field in required_fields:
614
- if field not in arguments:
615
- raise ValueError(f"Required field '{field}' is missing")
616
-
617
- # Validate file_path
618
- if "file_path" in arguments:
619
- file_path = arguments["file_path"]
620
- if not isinstance(file_path, str):
621
- raise ValueError("file_path must be a string")
622
- if not file_path.strip():
623
- raise ValueError("file_path cannot be empty")
624
-
625
- # Validate optional fields
626
- if "language" in arguments:
627
- language = arguments["language"]
628
- if not isinstance(language, str):
629
- raise ValueError("language must be a string")
630
-
631
- if "include_complexity" in arguments:
632
- include_complexity = arguments["include_complexity"]
633
- if not isinstance(include_complexity, bool):
634
- raise ValueError("include_complexity must be a boolean")
635
-
636
- if "include_details" in arguments:
637
- include_details = arguments["include_details"]
638
- if not isinstance(include_details, bool):
639
- raise ValueError("include_details must be a boolean")
640
-
641
- if "include_guidance" in arguments:
642
- include_guidance = arguments["include_guidance"]
643
- if not isinstance(include_guidance, bool):
644
- raise ValueError("include_guidance must be a boolean")
645
-
646
- return True
647
-
648
- def get_tool_definition(self) -> Any:
649
- """
650
- Get the MCP tool definition for analyze_code_scale.
651
-
652
- Returns:
653
- Tool definition object compatible with MCP server
654
- """
655
- try:
656
- from mcp.types import Tool
657
-
658
- return Tool(
659
- name="analyze_code_scale",
660
- description="Analyze code scale, complexity, and structure metrics with LLM-optimized guidance for efficient large file analysis",
661
- inputSchema=self.get_tool_schema(),
662
- )
663
- except ImportError:
664
- # Fallback for when MCP is not available
665
- return {
666
- "name": "analyze_code_scale",
667
- "description": "Analyze code scale, complexity, and structure metrics with LLM-optimized guidance for efficient large file analysis",
668
- "inputSchema": self.get_tool_schema(),
669
- }
670
-
671
-
672
- # Tool instance for easy access
673
- analyze_scale_tool = AnalyzeScaleTool()
1
+ #!/usr/bin/env python3
2
+ """
3
+ Analyze Code Scale MCP Tool
4
+
5
+ This tool provides code scale analysis including metrics about
6
+ complexity, size, and structure through the MCP protocol.
7
+ Enhanced for LLM-friendly analysis workflow.
8
+ """
9
+
10
+ import re
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from ...core.analysis_engine import AnalysisRequest, get_analysis_engine
15
+ from ...language_detector import detect_language_from_file
16
+ from ...security import SecurityValidator
17
+ from ...utils import setup_logger
18
+
19
+ # Set up logging
20
+ logger = setup_logger(__name__)
21
+
22
+
23
+ class AnalyzeScaleTool:
24
+ """
25
+ MCP Tool for analyzing code scale and complexity metrics.
26
+
27
+ This tool integrates with existing analyzer components to provide
28
+ comprehensive code analysis through the MCP protocol, optimized
29
+ for LLM workflow efficiency.
30
+ """
31
+
32
+ def __init__(self, project_root: str = None) -> None:
33
+ """Initialize the analyze scale tool."""
34
+ # Use unified analysis engine instead of deprecated AdvancedAnalyzer
35
+ self.project_root = project_root
36
+ self.analysis_engine = get_analysis_engine(project_root)
37
+ self.security_validator = SecurityValidator(project_root)
38
+ logger.info("AnalyzeScaleTool initialized with security validation")
39
+
40
+ def _calculate_file_metrics(self, file_path: str) -> dict[str, Any]:
41
+ """
42
+ Calculate basic file metrics including line counts and estimated token count.
43
+
44
+ Args:
45
+ file_path: Path to the file to analyze
46
+
47
+ Returns:
48
+ Dictionary containing file metrics
49
+ """
50
+ try:
51
+ with open(file_path, encoding="utf-8") as f:
52
+ content = f.read()
53
+
54
+ lines = content.split("\n")
55
+ total_lines = len(lines)
56
+
57
+ # Count different types of lines
58
+ code_lines = 0
59
+ comment_lines = 0
60
+ blank_lines = 0
61
+
62
+ for line in lines:
63
+ stripped = line.strip()
64
+ if not stripped:
65
+ blank_lines += 1
66
+ elif (
67
+ stripped.startswith("//")
68
+ or stripped.startswith("/*")
69
+ or stripped.startswith("*")
70
+ ):
71
+ comment_lines += 1
72
+ else:
73
+ code_lines += 1
74
+
75
+ # Estimate token count (rough approximation)
76
+ # Split by common delimiters and count non-empty tokens
77
+ tokens = re.findall(r"\b\w+\b|[^\w\s]", content)
78
+ estimated_tokens = len([t for t in tokens if t.strip()])
79
+
80
+ # Calculate file size
81
+ file_size = len(content.encode("utf-8"))
82
+
83
+ return {
84
+ "total_lines": total_lines,
85
+ "code_lines": code_lines,
86
+ "comment_lines": comment_lines,
87
+ "blank_lines": blank_lines,
88
+ "estimated_tokens": estimated_tokens,
89
+ "file_size_bytes": file_size,
90
+ "file_size_kb": round(file_size / 1024, 2),
91
+ }
92
+ except Exception as e:
93
+ logger.error(f"Error calculating file metrics for {file_path}: {e}")
94
+ return {
95
+ "total_lines": 0,
96
+ "code_lines": 0,
97
+ "comment_lines": 0,
98
+ "blank_lines": 0,
99
+ "estimated_tokens": 0,
100
+ "file_size_bytes": 0,
101
+ "file_size_kb": 0,
102
+ }
103
+
104
+ def _extract_structural_overview(self, analysis_result: Any) -> dict[str, Any]:
105
+ """
106
+ Extract structural overview with position information for LLM guidance.
107
+
108
+ Args:
109
+ analysis_result: Result from AdvancedAnalyzer
110
+
111
+ Returns:
112
+ Dictionary containing structural overview
113
+ """
114
+ overview: dict[str, Any] = {
115
+ "classes": [],
116
+ "methods": [],
117
+ "fields": [],
118
+ "imports": [],
119
+ "complexity_hotspots": [],
120
+ }
121
+
122
+ # Extract class information with position from unified analysis engine
123
+ classes = [
124
+ e for e in analysis_result.elements if e.__class__.__name__ == "Class"
125
+ ]
126
+ for cls in classes:
127
+ class_info = {
128
+ "name": cls.name,
129
+ "type": cls.class_type,
130
+ "start_line": cls.start_line,
131
+ "end_line": cls.end_line,
132
+ "line_span": cls.end_line - cls.start_line + 1,
133
+ "visibility": cls.visibility,
134
+ "extends": cls.extends_class,
135
+ "implements": cls.implements_interfaces,
136
+ "annotations": [ann.name for ann in cls.annotations],
137
+ }
138
+ overview["classes"].append(class_info)
139
+
140
+ # Extract method information with position and complexity from unified analysis engine
141
+ methods = [
142
+ e for e in analysis_result.elements if e.__class__.__name__ == "Function"
143
+ ]
144
+ for method in methods:
145
+ method_info = {
146
+ "name": method.name,
147
+ "start_line": method.start_line,
148
+ "end_line": method.end_line,
149
+ "line_span": method.end_line - method.start_line + 1,
150
+ "visibility": method.visibility,
151
+ "return_type": method.return_type,
152
+ "parameter_count": len(method.parameters),
153
+ "complexity": method.complexity_score,
154
+ "is_constructor": method.is_constructor,
155
+ "is_static": method.is_static,
156
+ "annotations": [ann.name for ann in method.annotations],
157
+ }
158
+ overview["methods"].append(method_info)
159
+
160
+ # Track complexity hotspots
161
+ if method.complexity_score > 10: # High complexity threshold
162
+ overview["complexity_hotspots"].append(
163
+ {
164
+ "type": "method",
165
+ "name": method.name,
166
+ "complexity": method.complexity_score,
167
+ "start_line": method.start_line,
168
+ "end_line": method.end_line,
169
+ }
170
+ )
171
+
172
+ # Extract field information with position
173
+ # Extract field information from unified analysis engine
174
+ fields = [
175
+ e for e in analysis_result.elements if e.__class__.__name__ == "Variable"
176
+ ]
177
+ for field in fields:
178
+ field_info = {
179
+ "name": field.name,
180
+ "type": field.field_type,
181
+ "start_line": field.start_line,
182
+ "end_line": field.end_line,
183
+ "visibility": field.visibility,
184
+ "is_static": field.is_static,
185
+ "is_final": field.is_final,
186
+ "annotations": [ann.name for ann in field.annotations],
187
+ }
188
+ overview["fields"].append(field_info)
189
+
190
+ # Extract import information
191
+ # Extract import information from unified analysis engine
192
+ imports = [
193
+ e for e in analysis_result.elements if e.__class__.__name__ == "Import"
194
+ ]
195
+ for imp in imports:
196
+ import_info = {
197
+ "name": imp.imported_name,
198
+ "statement": imp.import_statement,
199
+ "line": imp.line_number,
200
+ "is_static": imp.is_static,
201
+ "is_wildcard": imp.is_wildcard,
202
+ }
203
+ overview["imports"].append(import_info)
204
+
205
+ return overview
206
+
207
+ def _generate_llm_guidance(
208
+ self, file_metrics: dict[str, Any], structural_overview: dict[str, Any]
209
+ ) -> dict[str, Any]:
210
+ """
211
+ Generate guidance for LLM on how to efficiently analyze this file.
212
+
213
+ Args:
214
+ file_metrics: Basic file metrics
215
+ structural_overview: Structural overview of the code
216
+
217
+ Returns:
218
+ Dictionary containing LLM guidance
219
+ """
220
+ guidance: dict[str, Any] = {
221
+ "analysis_strategy": "",
222
+ "recommended_tools": [],
223
+ "key_areas": [],
224
+ "complexity_assessment": "",
225
+ "size_category": "",
226
+ }
227
+
228
+ total_lines = file_metrics["total_lines"]
229
+ # estimated_tokens = file_metrics["estimated_tokens"] # Not used currently
230
+
231
+ # Determine size category
232
+ if total_lines < 100:
233
+ guidance["size_category"] = "small"
234
+ guidance["analysis_strategy"] = (
235
+ "This is a small file that can be analyzed in full detail."
236
+ )
237
+ elif total_lines < 500:
238
+ guidance["size_category"] = "medium"
239
+ guidance["analysis_strategy"] = (
240
+ "This is a medium-sized file. Consider focusing on key classes and methods."
241
+ )
242
+ elif total_lines < 1500:
243
+ guidance["size_category"] = "large"
244
+ guidance["analysis_strategy"] = (
245
+ "This is a large file. Use targeted analysis with read_code_partial."
246
+ )
247
+ else:
248
+ guidance["size_category"] = "very_large"
249
+ guidance["analysis_strategy"] = (
250
+ "This is a very large file. Strongly recommend using structural analysis first, then targeted deep-dives."
251
+ )
252
+
253
+ # Recommend tools based on file size and complexity
254
+ if total_lines > 200:
255
+ guidance["recommended_tools"].append("read_code_partial")
256
+
257
+ # Ensure all required fields exist
258
+ required_fields = ["complexity_hotspots", "classes", "methods", "fields", "imports"]
259
+ for field in required_fields:
260
+ if field not in structural_overview:
261
+ structural_overview[field] = []
262
+
263
+ if len(structural_overview["complexity_hotspots"]) > 0:
264
+ guidance["recommended_tools"].append("format_table")
265
+ guidance["complexity_assessment"] = (
266
+ f"Found {len(structural_overview['complexity_hotspots'])} complexity hotspots"
267
+ )
268
+ else:
269
+ guidance["complexity_assessment"] = (
270
+ "No significant complexity hotspots detected"
271
+ )
272
+
273
+ # Identify key areas for analysis
274
+ if len(structural_overview["classes"]) > 1:
275
+ guidance["key_areas"].append(
276
+ "Multiple classes - consider analyzing class relationships"
277
+ )
278
+
279
+ if len(structural_overview["methods"]) > 20:
280
+ guidance["key_areas"].append(
281
+ "Many methods - focus on public interfaces and high-complexity methods"
282
+ )
283
+
284
+ if len(structural_overview["imports"]) > 10:
285
+ guidance["key_areas"].append("Many imports - consider dependency analysis")
286
+
287
+ return guidance
288
+
289
+ def get_tool_schema(self) -> dict[str, Any]:
290
+ """
291
+ Get the MCP tool schema for analyze_code_scale.
292
+
293
+ Returns:
294
+ Dictionary containing the tool schema
295
+ """
296
+ return {
297
+ "type": "object",
298
+ "properties": {
299
+ "file_path": {
300
+ "type": "string",
301
+ "description": "Path to the code file to analyze",
302
+ },
303
+ "language": {
304
+ "type": "string",
305
+ "description": "Programming language (optional, auto-detected if not specified)",
306
+ },
307
+ "include_complexity": {
308
+ "type": "boolean",
309
+ "description": "Include complexity metrics in the analysis",
310
+ "default": True,
311
+ },
312
+ "include_details": {
313
+ "type": "boolean",
314
+ "description": "Include detailed element information",
315
+ "default": False,
316
+ },
317
+ "include_guidance": {
318
+ "type": "boolean",
319
+ "description": "Include LLM analysis guidance",
320
+ "default": True,
321
+ },
322
+ },
323
+ "required": ["file_path"],
324
+ "additionalProperties": False,
325
+ }
326
+
327
+ async def execute(self, arguments: dict[str, Any]) -> dict[str, Any]:
328
+ """
329
+ Execute the analyze_code_scale tool.
330
+
331
+ Args:
332
+ arguments: Tool arguments containing file_path and optional parameters
333
+
334
+ Returns:
335
+ Dictionary containing enhanced analysis results optimized for LLM workflow
336
+
337
+ Raises:
338
+ ValueError: If required arguments are missing or invalid
339
+ FileNotFoundError: If the specified file doesn't exist
340
+ """
341
+ # Validate required arguments
342
+ if "file_path" not in arguments:
343
+ raise ValueError("file_path is required")
344
+
345
+ file_path = arguments["file_path"]
346
+ language = arguments.get("language")
347
+ # include_complexity = arguments.get("include_complexity", True) # Not used currently
348
+ include_details = arguments.get("include_details", False)
349
+ include_guidance = arguments.get("include_guidance", True)
350
+
351
+ # Security validation (use project_root as base when available)
352
+ is_valid, error_msg = self.security_validator.validate_file_path(
353
+ file_path, base_path=self.project_root
354
+ )
355
+ if not is_valid:
356
+ logger.warning(
357
+ f"Security validation failed for file path: {file_path} - {error_msg}"
358
+ )
359
+ raise ValueError(f"Invalid file path: {error_msg}")
360
+
361
+ # Sanitize inputs
362
+ if language:
363
+ language = self.security_validator.sanitize_input(language, max_length=50)
364
+
365
+ # Validate file exists
366
+ if not Path(file_path).exists():
367
+ raise FileNotFoundError(f"File not found: {file_path}")
368
+
369
+ # Detect language if not specified
370
+ if not language:
371
+ language = detect_language_from_file(file_path)
372
+ if language == "unknown":
373
+ raise ValueError(f"Could not detect language for file: {file_path}")
374
+
375
+ logger.info(f"Analyzing code scale for {file_path} (language: {language})")
376
+
377
+ try:
378
+ # Use performance monitoring with proper context manager
379
+ from ...mcp.utils import get_performance_monitor
380
+
381
+ with get_performance_monitor().measure_operation(
382
+ "analyze_code_scale_enhanced"
383
+ ):
384
+ # Calculate basic file metrics
385
+ file_metrics = self._calculate_file_metrics(file_path)
386
+
387
+ # Use appropriate analyzer based on language
388
+ if language == "java":
389
+ # Use AdvancedAnalyzer for comprehensive analysis
390
+ # Use unified analysis engine instead of deprecated advanced_analyzer
391
+ request = AnalysisRequest(
392
+ file_path=file_path,
393
+ language=language,
394
+ include_complexity=True,
395
+ include_details=True,
396
+ )
397
+ analysis_result = await self.analysis_engine.analyze(request)
398
+ if analysis_result is None:
399
+ raise RuntimeError(f"Failed to analyze file: {file_path}")
400
+ # Extract structural overview
401
+ structural_overview = self._extract_structural_overview(
402
+ analysis_result
403
+ )
404
+ else:
405
+ # Use universal analysis_engine for other languages
406
+ request = AnalysisRequest(
407
+ file_path=file_path,
408
+ language=language,
409
+ include_details=include_details,
410
+ )
411
+ universal_result = await self.analysis_engine.analyze(request)
412
+ if not universal_result or not universal_result.success:
413
+ error_msg = (
414
+ universal_result.error_message
415
+ if universal_result
416
+ else "Unknown error"
417
+ )
418
+ raise RuntimeError(
419
+ f"Failed to analyze file with universal engine: {error_msg}"
420
+ )
421
+
422
+ # Adapt the result to a compatible structure for report generation
423
+ # This part needs careful implementation based on universal_result structure
424
+ analysis_result = None # Placeholder
425
+ structural_overview = {} # Placeholder
426
+
427
+ # Generate LLM guidance
428
+ llm_guidance = None
429
+ if include_guidance:
430
+ llm_guidance = self._generate_llm_guidance(
431
+ file_metrics, structural_overview
432
+ )
433
+
434
+ # Build enhanced result structure
435
+ result = {
436
+ "file_path": file_path,
437
+ "language": language,
438
+ "file_metrics": file_metrics,
439
+ "summary": {
440
+ "classes": len(
441
+ [
442
+ e
443
+ for e in (
444
+ analysis_result.elements if analysis_result else []
445
+ )
446
+ if e.__class__.__name__ == "Class"
447
+ ]
448
+ ),
449
+ "methods": len(
450
+ [
451
+ e
452
+ for e in (
453
+ analysis_result.elements if analysis_result else []
454
+ )
455
+ if e.__class__.__name__ == "Function"
456
+ ]
457
+ ),
458
+ "fields": len(
459
+ [
460
+ e
461
+ for e in (
462
+ analysis_result.elements if analysis_result else []
463
+ )
464
+ if e.__class__.__name__ == "Variable"
465
+ ]
466
+ ),
467
+ "imports": len(
468
+ [
469
+ e
470
+ for e in (
471
+ analysis_result.elements if analysis_result else []
472
+ )
473
+ if e.__class__.__name__ == "Import"
474
+ ]
475
+ ),
476
+ "annotations": len(
477
+ getattr(analysis_result, "annotations", [])
478
+ if analysis_result
479
+ else []
480
+ ),
481
+ "package": (
482
+ analysis_result.package.name
483
+ if analysis_result and analysis_result.package
484
+ else None
485
+ ),
486
+ },
487
+ "structural_overview": structural_overview,
488
+ }
489
+
490
+ if include_guidance:
491
+ result["llm_guidance"] = llm_guidance
492
+
493
+ # Add detailed information if requested (backward compatibility)
494
+ if include_details:
495
+ result["detailed_analysis"] = {
496
+ "statistics": (
497
+ analysis_result.get_statistics() if analysis_result else {}
498
+ ),
499
+ "classes": [
500
+ {
501
+ "name": cls.name,
502
+ "type": getattr(cls, "class_type", "unknown"),
503
+ "visibility": getattr(cls, "visibility", "unknown"),
504
+ "extends": getattr(cls, "extends_class", None),
505
+ "implements": getattr(cls, "implements_interfaces", []),
506
+ "annotations": [
507
+ getattr(ann, "name", str(ann))
508
+ for ann in getattr(cls, "annotations", [])
509
+ ],
510
+ "lines": f"{cls.start_line}-{cls.end_line}",
511
+ }
512
+ for cls in [
513
+ e
514
+ for e in (
515
+ analysis_result.elements if analysis_result else []
516
+ )
517
+ if e.__class__.__name__ == "Class"
518
+ ]
519
+ ],
520
+ "methods": [
521
+ {
522
+ "name": method.name,
523
+ "file_path": getattr(method, "file_path", file_path),
524
+ "visibility": getattr(method, "visibility", "unknown"),
525
+ "return_type": getattr(
526
+ method, "return_type", "unknown"
527
+ ),
528
+ "parameters": len(getattr(method, "parameters", [])),
529
+ "annotations": [
530
+ getattr(ann, "name", str(ann))
531
+ for ann in getattr(method, "annotations", [])
532
+ ],
533
+ "is_constructor": getattr(
534
+ method, "is_constructor", False
535
+ ),
536
+ "is_static": getattr(method, "is_static", False),
537
+ "complexity": getattr(method, "complexity_score", 0),
538
+ "lines": f"{method.start_line}-{method.end_line}",
539
+ }
540
+ for method in [
541
+ e
542
+ for e in (
543
+ analysis_result.elements if analysis_result else []
544
+ )
545
+ if e.__class__.__name__ == "Function"
546
+ ]
547
+ ],
548
+ "fields": [
549
+ {
550
+ "name": field.name,
551
+ "type": getattr(field, "field_type", "unknown"),
552
+ "file_path": getattr(field, "file_path", file_path),
553
+ "visibility": getattr(field, "visibility", "unknown"),
554
+ "is_static": getattr(field, "is_static", False),
555
+ "is_final": getattr(field, "is_final", False),
556
+ "annotations": [
557
+ getattr(ann, "name", str(ann))
558
+ for ann in getattr(field, "annotations", [])
559
+ ],
560
+ "lines": f"{field.start_line}-{field.end_line}",
561
+ }
562
+ for field in [
563
+ e
564
+ for e in (
565
+ analysis_result.elements if analysis_result else []
566
+ )
567
+ if e.__class__.__name__ == "Variable"
568
+ ]
569
+ ],
570
+ }
571
+
572
+ # Count elements by type
573
+ classes_count = len(
574
+ [
575
+ e
576
+ for e in (analysis_result.elements if analysis_result else [])
577
+ if e.__class__.__name__ == "Class"
578
+ ]
579
+ )
580
+ methods_count = len(
581
+ [
582
+ e
583
+ for e in (analysis_result.elements if analysis_result else [])
584
+ if e.__class__.__name__ == "Function"
585
+ ]
586
+ )
587
+
588
+ logger.info(
589
+ f"Successfully analyzed {file_path}: {classes_count} classes, "
590
+ f"{methods_count} methods, {file_metrics['total_lines']} lines, "
591
+ f"~{file_metrics['estimated_tokens']} tokens"
592
+ )
593
+
594
+ return result
595
+
596
+ except Exception as e:
597
+ logger.error(f"Error analyzing {file_path}: {e}")
598
+ raise
599
+
600
+ def validate_arguments(self, arguments: dict[str, Any]) -> bool:
601
+ """
602
+ Validate tool arguments against the schema.
603
+
604
+ Args:
605
+ arguments: Arguments to validate
606
+
607
+ Returns:
608
+ True if arguments are valid
609
+
610
+ Raises:
611
+ ValueError: If arguments are invalid
612
+ """
613
+ schema = self.get_tool_schema()
614
+ required_fields = schema.get("required", [])
615
+
616
+ # Check required fields
617
+ for field in required_fields:
618
+ if field not in arguments:
619
+ raise ValueError(f"Required field '{field}' is missing")
620
+
621
+ # Validate file_path
622
+ if "file_path" in arguments:
623
+ file_path = arguments["file_path"]
624
+ if not isinstance(file_path, str):
625
+ raise ValueError("file_path must be a string")
626
+ if not file_path.strip():
627
+ raise ValueError("file_path cannot be empty")
628
+
629
+ # Validate optional fields
630
+ if "language" in arguments:
631
+ language = arguments["language"]
632
+ if not isinstance(language, str):
633
+ raise ValueError("language must be a string")
634
+
635
+ if "include_complexity" in arguments:
636
+ include_complexity = arguments["include_complexity"]
637
+ if not isinstance(include_complexity, bool):
638
+ raise ValueError("include_complexity must be a boolean")
639
+
640
+ if "include_details" in arguments:
641
+ include_details = arguments["include_details"]
642
+ if not isinstance(include_details, bool):
643
+ raise ValueError("include_details must be a boolean")
644
+
645
+ if "include_guidance" in arguments:
646
+ include_guidance = arguments["include_guidance"]
647
+ if not isinstance(include_guidance, bool):
648
+ raise ValueError("include_guidance must be a boolean")
649
+
650
+ return True
651
+
652
+ def get_tool_definition(self) -> Any:
653
+ """
654
+ Get the MCP tool definition for analyze_code_scale.
655
+
656
+ Returns:
657
+ Tool definition object compatible with MCP server
658
+ """
659
+ try:
660
+ from mcp.types import Tool
661
+
662
+ return Tool(
663
+ name="analyze_code_scale",
664
+ description="Analyze code scale, complexity, and structure metrics with LLM-optimized guidance for efficient large file analysis",
665
+ inputSchema=self.get_tool_schema(),
666
+ )
667
+ except ImportError:
668
+ # Fallback for when MCP is not available
669
+ return {
670
+ "name": "analyze_code_scale",
671
+ "description": "Analyze code scale, complexity, and structure metrics with LLM-optimized guidance for efficient large file analysis",
672
+ "inputSchema": self.get_tool_schema(),
673
+ }
674
+
675
+
676
+ # Tool instance for easy access
677
+ analyze_scale_tool = AnalyzeScaleTool()