tree-sitter-analyzer 0.8.3__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tree-sitter-analyzer might be problematic. Click here for more details.
- tree_sitter_analyzer/__init__.py +1 -1
- tree_sitter_analyzer/cli/commands/base_command.py +1 -3
- tree_sitter_analyzer/mcp/__init__.py +17 -3
- tree_sitter_analyzer/mcp/server.py +185 -53
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +673 -677
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +9 -4
- tree_sitter_analyzer/mcp/tools/read_partial_tool.py +4 -4
- tree_sitter_analyzer/security/boundary_manager.py +237 -279
- tree_sitter_analyzer/security/validator.py +241 -244
- {tree_sitter_analyzer-0.8.3.dist-info → tree_sitter_analyzer-0.9.1.dist-info}/METADATA +28 -19
- {tree_sitter_analyzer-0.8.3.dist-info → tree_sitter_analyzer-0.9.1.dist-info}/RECORD +13 -13
- {tree_sitter_analyzer-0.8.3.dist-info → tree_sitter_analyzer-0.9.1.dist-info}/WHEEL +0 -0
- {tree_sitter_analyzer-0.8.3.dist-info → tree_sitter_analyzer-0.9.1.dist-info}/entry_points.txt +0 -0
tree_sitter_analyzer/__init__.py
CHANGED
|
@@ -47,9 +47,7 @@ class BaseCommand(ABC):
|
|
|
47
47
|
return False
|
|
48
48
|
|
|
49
49
|
# Security validation
|
|
50
|
-
is_valid, error_msg = self.security_validator.validate_file_path(
|
|
51
|
-
self.args.file_path, base_path=self.project_root
|
|
52
|
-
)
|
|
50
|
+
is_valid, error_msg = self.security_validator.validate_file_path(self.args.file_path)
|
|
53
51
|
if not is_valid:
|
|
54
52
|
output_error(f"Invalid file path: {error_msg}")
|
|
55
53
|
return False
|
|
@@ -15,12 +15,26 @@ __author__ = "Tree-sitter Analyzer Team"
|
|
|
15
15
|
MCP_INFO: dict[str, Any] = {
|
|
16
16
|
"name": "tree-sitter-analyzer-mcp",
|
|
17
17
|
"version": __version__,
|
|
18
|
-
"description": "Tree-sitter based code analyzer with MCP support",
|
|
18
|
+
"description": "Tree-sitter based code analyzer with MCP support - Solve LLM token limit problems for large code files",
|
|
19
19
|
"protocol_version": "2024-11-05",
|
|
20
20
|
"capabilities": {
|
|
21
|
-
"tools": {
|
|
21
|
+
"tools": {
|
|
22
|
+
"description": "Three-step workflow for analyzing large code files",
|
|
23
|
+
"available_tools": [
|
|
24
|
+
"check_code_scale",
|
|
25
|
+
"analyze_code_structure",
|
|
26
|
+
"extract_code_section"
|
|
27
|
+
],
|
|
28
|
+
"workflow": [
|
|
29
|
+
"1. check_code_scale - Get file metrics and complexity",
|
|
30
|
+
"2. analyze_code_structure - Generate structure tables for large files",
|
|
31
|
+
"3. extract_code_section - Get specific code sections by line range"
|
|
32
|
+
]
|
|
33
|
+
},
|
|
22
34
|
"resources": {},
|
|
23
|
-
"prompts": {
|
|
35
|
+
"prompts": {
|
|
36
|
+
"usage_guide": "See README.md AI Assistant Integration section for complete workflow guide"
|
|
37
|
+
},
|
|
24
38
|
"logging": {},
|
|
25
39
|
},
|
|
26
40
|
}
|
|
@@ -53,7 +53,6 @@ from .resources import CodeFileResource, ProjectStatsResource
|
|
|
53
53
|
from .tools.base_tool import MCPTool
|
|
54
54
|
from .tools.read_partial_tool import ReadPartialTool
|
|
55
55
|
from .tools.table_format_tool import TableFormatTool
|
|
56
|
-
from .tools.universal_analyze_tool import UniversalAnalyzeTool
|
|
57
56
|
from .utils.error_handler import handle_mcp_errors
|
|
58
57
|
|
|
59
58
|
# Set up logging
|
|
@@ -77,22 +76,11 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
77
76
|
|
|
78
77
|
self.analysis_engine = get_analysis_engine(project_root)
|
|
79
78
|
self.security_validator = SecurityValidator(project_root)
|
|
80
|
-
# Ensure boundary manager exposes the exact provided project_root for consistency in tests/environments
|
|
81
|
-
try:
|
|
82
|
-
import os as _os
|
|
83
|
-
if self.security_validator.boundary_manager and project_root:
|
|
84
|
-
provided_root = _os.path.abspath(project_root)
|
|
85
|
-
self.security_validator.boundary_manager.project_root = provided_root
|
|
86
|
-
# Keep allowed directories in sync with the exposed project_root
|
|
87
|
-
self.security_validator.boundary_manager.allowed_directories = {provided_root}
|
|
88
|
-
except Exception:
|
|
89
|
-
pass
|
|
90
79
|
# Use unified analysis engine instead of deprecated AdvancedAnalyzer
|
|
91
80
|
|
|
92
|
-
# Initialize MCP tools with security validation
|
|
93
|
-
self.read_partial_tool: MCPTool = ReadPartialTool(project_root)
|
|
94
|
-
self.
|
|
95
|
-
self.table_format_tool: MCPTool = TableFormatTool(project_root)
|
|
81
|
+
# Initialize MCP tools with security validation (three core tools)
|
|
82
|
+
self.read_partial_tool: MCPTool = ReadPartialTool(project_root) # extract_code_section
|
|
83
|
+
self.table_format_tool: MCPTool = TableFormatTool(project_root) # analyze_code_structure
|
|
96
84
|
|
|
97
85
|
# Initialize MCP resources
|
|
98
86
|
self.code_file_resource = CodeFileResource()
|
|
@@ -114,14 +102,100 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
114
102
|
if not self._initialization_complete:
|
|
115
103
|
raise RuntimeError("Server not fully initialized. Please wait for initialization to complete.")
|
|
116
104
|
|
|
117
|
-
@handle_mcp_errors("
|
|
105
|
+
@handle_mcp_errors("check_code_scale")
|
|
118
106
|
async def _analyze_code_scale(self, arguments: dict[str, Any]) -> dict[str, Any]:
|
|
119
107
|
"""
|
|
120
|
-
Analyze code scale and complexity metrics
|
|
108
|
+
Analyze code scale and complexity metrics using the analysis engine directly.
|
|
121
109
|
"""
|
|
122
110
|
self._ensure_initialized()
|
|
123
|
-
|
|
124
|
-
|
|
111
|
+
|
|
112
|
+
# Validate required arguments
|
|
113
|
+
if "file_path" not in arguments:
|
|
114
|
+
raise ValueError("file_path is required")
|
|
115
|
+
|
|
116
|
+
file_path = arguments["file_path"]
|
|
117
|
+
language = arguments.get("language")
|
|
118
|
+
include_complexity = arguments.get("include_complexity", True)
|
|
119
|
+
include_details = arguments.get("include_details", False)
|
|
120
|
+
|
|
121
|
+
# Security validation
|
|
122
|
+
is_valid, error_msg = self.security_validator.validate_file_path(file_path)
|
|
123
|
+
if not is_valid:
|
|
124
|
+
raise ValueError(f"Invalid file path: {error_msg}")
|
|
125
|
+
|
|
126
|
+
# Use analysis engine directly
|
|
127
|
+
from ..core.analysis_engine import AnalysisRequest
|
|
128
|
+
from ..language_detector import detect_language_from_file
|
|
129
|
+
from pathlib import Path
|
|
130
|
+
|
|
131
|
+
# Validate file exists
|
|
132
|
+
if not Path(file_path).exists():
|
|
133
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
134
|
+
|
|
135
|
+
# Detect language if not specified
|
|
136
|
+
if not language:
|
|
137
|
+
language = detect_language_from_file(file_path)
|
|
138
|
+
|
|
139
|
+
# Create analysis request
|
|
140
|
+
request = AnalysisRequest(
|
|
141
|
+
file_path=file_path,
|
|
142
|
+
language=language,
|
|
143
|
+
include_complexity=include_complexity,
|
|
144
|
+
include_details=include_details,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Perform analysis
|
|
148
|
+
analysis_result = await self.analysis_engine.analyze(request)
|
|
149
|
+
|
|
150
|
+
if analysis_result is None or not analysis_result.success:
|
|
151
|
+
error_msg = analysis_result.error_message if analysis_result else "Unknown error"
|
|
152
|
+
raise RuntimeError(f"Failed to analyze file: {file_path} - {error_msg}")
|
|
153
|
+
|
|
154
|
+
# Convert to dictionary format
|
|
155
|
+
result_dict = analysis_result.to_dict()
|
|
156
|
+
|
|
157
|
+
# Format result to match test expectations
|
|
158
|
+
elements = result_dict.get("elements", [])
|
|
159
|
+
|
|
160
|
+
# Count elements by type
|
|
161
|
+
classes_count = len([e for e in elements if e.get("__class__") == "Class"])
|
|
162
|
+
methods_count = len([e for e in elements if e.get("__class__") == "Function"])
|
|
163
|
+
fields_count = len([e for e in elements if e.get("__class__") == "Variable"])
|
|
164
|
+
imports_count = len([e for e in elements if e.get("__class__") == "Import"])
|
|
165
|
+
|
|
166
|
+
result = {
|
|
167
|
+
"file_path": file_path,
|
|
168
|
+
"language": language,
|
|
169
|
+
"metrics": {
|
|
170
|
+
"lines_total": result_dict.get("line_count", 0),
|
|
171
|
+
"lines_code": result_dict.get("line_count", 0), # Approximation
|
|
172
|
+
"lines_comment": 0, # Not available in basic analysis
|
|
173
|
+
"lines_blank": 0, # Not available in basic analysis
|
|
174
|
+
"elements": {
|
|
175
|
+
"classes": classes_count,
|
|
176
|
+
"methods": methods_count,
|
|
177
|
+
"fields": fields_count,
|
|
178
|
+
"imports": imports_count,
|
|
179
|
+
"total": len(elements),
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if include_complexity:
|
|
185
|
+
# Add complexity metrics if available
|
|
186
|
+
methods = [e for e in elements if e.get("__class__") == "Function"]
|
|
187
|
+
if methods:
|
|
188
|
+
complexities = [e.get("complexity_score", 0) for e in methods]
|
|
189
|
+
result["metrics"]["complexity"] = {
|
|
190
|
+
"total": sum(complexities),
|
|
191
|
+
"average": sum(complexities) / len(complexities) if complexities else 0,
|
|
192
|
+
"max": max(complexities) if complexities else 0,
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
if include_details:
|
|
196
|
+
result["detailed_elements"] = elements
|
|
197
|
+
|
|
198
|
+
return result
|
|
125
199
|
|
|
126
200
|
def create_server(self) -> Server:
|
|
127
201
|
"""
|
|
@@ -138,17 +212,29 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
138
212
|
# Register tools
|
|
139
213
|
@server.list_tools() # type: ignore
|
|
140
214
|
async def handle_list_tools() -> list[Tool]:
|
|
141
|
-
"""
|
|
215
|
+
"""
|
|
216
|
+
List available tools with clear naming and usage guidance.
|
|
217
|
+
|
|
218
|
+
🎯 SOLVE LLM TOKEN LIMIT PROBLEMS FOR LARGE CODE FILES
|
|
219
|
+
|
|
220
|
+
REQUIRED WORKFLOW FOR LLM (follow this order):
|
|
221
|
+
1. FIRST: 'check_code_scale' - understand file size and complexity
|
|
222
|
+
2. SECOND: 'analyze_code_structure' - get detailed structure with line positions
|
|
223
|
+
3. THIRD: 'extract_code_section' - get specific code from line positions
|
|
224
|
+
|
|
225
|
+
⚠️ PARAMETER NAMES: Use snake_case (file_path, start_line, end_line, format_type)
|
|
226
|
+
📖 Full guide: See README.md AI Assistant Integration section
|
|
227
|
+
"""
|
|
142
228
|
tools = [
|
|
143
229
|
Tool(
|
|
144
|
-
name="
|
|
145
|
-
description="
|
|
230
|
+
name="check_code_scale",
|
|
231
|
+
description="🔍 STEP 1: Check code file scale, complexity, and basic metrics. Use this FIRST to understand if the file is large and needs structure analysis. Returns: line count, element counts, complexity metrics.",
|
|
146
232
|
inputSchema={
|
|
147
233
|
"type": "object",
|
|
148
234
|
"properties": {
|
|
149
235
|
"file_path": {
|
|
150
236
|
"type": "string",
|
|
151
|
-
"description": "Path to the code file to analyze",
|
|
237
|
+
"description": "Path to the code file to analyze (REQUIRED - use exact file path)",
|
|
152
238
|
},
|
|
153
239
|
"language": {
|
|
154
240
|
"type": "string",
|
|
@@ -156,35 +242,87 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
156
242
|
},
|
|
157
243
|
"include_complexity": {
|
|
158
244
|
"type": "boolean",
|
|
159
|
-
"description": "Include complexity metrics in the analysis",
|
|
245
|
+
"description": "Include complexity metrics in the analysis (default: true)",
|
|
160
246
|
"default": True,
|
|
161
247
|
},
|
|
162
248
|
"include_details": {
|
|
163
249
|
"type": "boolean",
|
|
164
|
-
"description": "Include detailed element information",
|
|
250
|
+
"description": "Include detailed element information (default: false)",
|
|
165
251
|
"default": False,
|
|
166
252
|
},
|
|
167
253
|
},
|
|
168
254
|
"required": ["file_path"],
|
|
169
255
|
"additionalProperties": False,
|
|
170
256
|
},
|
|
171
|
-
)
|
|
257
|
+
),
|
|
258
|
+
Tool(
|
|
259
|
+
name="analyze_code_structure",
|
|
260
|
+
description="📊 STEP 2: Generate detailed structure tables (classes, methods, fields) with LINE POSITIONS for large files. Use AFTER check_code_scale shows file is large (>100 lines). Returns: tables with start_line/end_line for each element.",
|
|
261
|
+
inputSchema={
|
|
262
|
+
"type": "object",
|
|
263
|
+
"properties": {
|
|
264
|
+
"file_path": {
|
|
265
|
+
"type": "string",
|
|
266
|
+
"description": "Path to the code file to analyze (REQUIRED - use exact file path)",
|
|
267
|
+
},
|
|
268
|
+
"format_type": {
|
|
269
|
+
"type": "string",
|
|
270
|
+
"description": "Table format type (default: 'full' for detailed tables)",
|
|
271
|
+
"enum": ["full", "compact", "csv"],
|
|
272
|
+
"default": "full",
|
|
273
|
+
},
|
|
274
|
+
"language": {
|
|
275
|
+
"type": "string",
|
|
276
|
+
"description": "Programming language (optional, auto-detected if not specified)",
|
|
277
|
+
},
|
|
278
|
+
},
|
|
279
|
+
"required": ["file_path"],
|
|
280
|
+
"additionalProperties": False,
|
|
281
|
+
},
|
|
282
|
+
),
|
|
283
|
+
Tool(
|
|
284
|
+
name="extract_code_section",
|
|
285
|
+
description="✂️ STEP 3: Extract specific code sections by line range. Use AFTER analyze_code_structure to get exact code from structure table line positions. Returns: precise code content without reading entire file.",
|
|
286
|
+
inputSchema={
|
|
287
|
+
"type": "object",
|
|
288
|
+
"properties": {
|
|
289
|
+
"file_path": {
|
|
290
|
+
"type": "string",
|
|
291
|
+
"description": "Path to the code file to read (REQUIRED - use exact file path)",
|
|
292
|
+
},
|
|
293
|
+
"start_line": {
|
|
294
|
+
"type": "integer",
|
|
295
|
+
"description": "Starting line number (REQUIRED - 1-based, get from structure table)",
|
|
296
|
+
"minimum": 1,
|
|
297
|
+
},
|
|
298
|
+
"end_line": {
|
|
299
|
+
"type": "integer",
|
|
300
|
+
"description": "Ending line number (optional - 1-based, reads to end if not specified)",
|
|
301
|
+
"minimum": 1,
|
|
302
|
+
},
|
|
303
|
+
"start_column": {
|
|
304
|
+
"type": "integer",
|
|
305
|
+
"description": "Starting column number (optional - 0-based)",
|
|
306
|
+
"minimum": 0,
|
|
307
|
+
},
|
|
308
|
+
"end_column": {
|
|
309
|
+
"type": "integer",
|
|
310
|
+
"description": "Ending column number (optional - 0-based)",
|
|
311
|
+
"minimum": 0,
|
|
312
|
+
},
|
|
313
|
+
"format": {
|
|
314
|
+
"type": "string",
|
|
315
|
+
"description": "Output format for the content (default: 'text')",
|
|
316
|
+
"enum": ["text", "json"],
|
|
317
|
+
"default": "text",
|
|
318
|
+
},
|
|
319
|
+
},
|
|
320
|
+
"required": ["file_path", "start_line"],
|
|
321
|
+
"additionalProperties": False,
|
|
322
|
+
},
|
|
323
|
+
),
|
|
172
324
|
]
|
|
173
325
|
|
|
174
|
-
# Add tools from tool classes - FIXED VERSION
|
|
175
|
-
for tool_instance in [
|
|
176
|
-
self.read_partial_tool,
|
|
177
|
-
self.table_format_tool,
|
|
178
|
-
self.universal_analyze_tool,
|
|
179
|
-
]:
|
|
180
|
-
tool_def = tool_instance.get_tool_definition()
|
|
181
|
-
if isinstance(tool_def, dict):
|
|
182
|
-
# Convert dict to Tool object
|
|
183
|
-
tools.append(Tool(**tool_def))
|
|
184
|
-
else:
|
|
185
|
-
# Already a Tool object
|
|
186
|
-
tools.append(tool_def)
|
|
187
|
-
|
|
188
326
|
return tools
|
|
189
327
|
|
|
190
328
|
@server.call_tool() # type: ignore
|
|
@@ -212,7 +350,9 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
212
350
|
# Basic sanitization for string inputs
|
|
213
351
|
sanitized_value = self.security_validator.sanitize_input(value, max_length=10000)
|
|
214
352
|
arguments[key] = sanitized_value
|
|
215
|
-
|
|
353
|
+
|
|
354
|
+
# Handle tool calls with unified naming (only new names)
|
|
355
|
+
if sanitized_name == "check_code_scale":
|
|
216
356
|
result = await self._analyze_code_scale(arguments)
|
|
217
357
|
return [
|
|
218
358
|
TextContent(
|
|
@@ -220,15 +360,7 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
220
360
|
text=json.dumps(result, indent=2, ensure_ascii=False),
|
|
221
361
|
)
|
|
222
362
|
]
|
|
223
|
-
elif sanitized_name == "
|
|
224
|
-
result = await self.read_partial_tool.execute(arguments)
|
|
225
|
-
return [
|
|
226
|
-
TextContent(
|
|
227
|
-
type="text",
|
|
228
|
-
text=json.dumps(result, indent=2, ensure_ascii=False),
|
|
229
|
-
)
|
|
230
|
-
]
|
|
231
|
-
elif sanitized_name == "format_table":
|
|
363
|
+
elif sanitized_name == "analyze_code_structure":
|
|
232
364
|
result = await self.table_format_tool.execute(arguments)
|
|
233
365
|
return [
|
|
234
366
|
TextContent(
|
|
@@ -236,8 +368,8 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
236
368
|
text=json.dumps(result, indent=2, ensure_ascii=False),
|
|
237
369
|
)
|
|
238
370
|
]
|
|
239
|
-
elif sanitized_name == "
|
|
240
|
-
result = await self.
|
|
371
|
+
elif sanitized_name == "extract_code_section":
|
|
372
|
+
result = await self.read_partial_tool.execute(arguments)
|
|
241
373
|
return [
|
|
242
374
|
TextContent(
|
|
243
375
|
type="text",
|
|
@@ -245,7 +377,7 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
245
377
|
)
|
|
246
378
|
]
|
|
247
379
|
else:
|
|
248
|
-
raise ValueError(f"Unknown tool: {name}")
|
|
380
|
+
raise ValueError(f"Unknown tool: {name}. Available tools: check_code_scale, analyze_code_structure, extract_code_section")
|
|
249
381
|
|
|
250
382
|
except Exception as e:
|
|
251
383
|
try:
|