tree-sitter-analyzer 0.8.2__py3-none-any.whl → 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tree-sitter-analyzer might be problematic. Click here for more details.
- tree_sitter_analyzer/__init__.py +1 -1
- tree_sitter_analyzer/mcp/__init__.py +17 -3
- tree_sitter_analyzer/mcp/server.py +185 -43
- tree_sitter_analyzer/mcp/tools/read_partial_tool.py +4 -4
- tree_sitter_analyzer/project_detector.py +317 -317
- tree_sitter_analyzer/security/__init__.py +22 -22
- tree_sitter_analyzer/security/boundary_manager.py +237 -237
- tree_sitter_analyzer/security/regex_checker.py +292 -292
- tree_sitter_analyzer/security/validator.py +241 -241
- {tree_sitter_analyzer-0.8.2.dist-info → tree_sitter_analyzer-0.9.1.dist-info}/METADATA +23 -13
- {tree_sitter_analyzer-0.8.2.dist-info → tree_sitter_analyzer-0.9.1.dist-info}/RECORD +13 -13
- {tree_sitter_analyzer-0.8.2.dist-info → tree_sitter_analyzer-0.9.1.dist-info}/WHEEL +0 -0
- {tree_sitter_analyzer-0.8.2.dist-info → tree_sitter_analyzer-0.9.1.dist-info}/entry_points.txt +0 -0
tree_sitter_analyzer/__init__.py
CHANGED
|
@@ -15,12 +15,26 @@ __author__ = "Tree-sitter Analyzer Team"
|
|
|
15
15
|
MCP_INFO: dict[str, Any] = {
|
|
16
16
|
"name": "tree-sitter-analyzer-mcp",
|
|
17
17
|
"version": __version__,
|
|
18
|
-
"description": "Tree-sitter based code analyzer with MCP support",
|
|
18
|
+
"description": "Tree-sitter based code analyzer with MCP support - Solve LLM token limit problems for large code files",
|
|
19
19
|
"protocol_version": "2024-11-05",
|
|
20
20
|
"capabilities": {
|
|
21
|
-
"tools": {
|
|
21
|
+
"tools": {
|
|
22
|
+
"description": "Three-step workflow for analyzing large code files",
|
|
23
|
+
"available_tools": [
|
|
24
|
+
"check_code_scale",
|
|
25
|
+
"analyze_code_structure",
|
|
26
|
+
"extract_code_section"
|
|
27
|
+
],
|
|
28
|
+
"workflow": [
|
|
29
|
+
"1. check_code_scale - Get file metrics and complexity",
|
|
30
|
+
"2. analyze_code_structure - Generate structure tables for large files",
|
|
31
|
+
"3. extract_code_section - Get specific code sections by line range"
|
|
32
|
+
]
|
|
33
|
+
},
|
|
22
34
|
"resources": {},
|
|
23
|
-
"prompts": {
|
|
35
|
+
"prompts": {
|
|
36
|
+
"usage_guide": "See README.md AI Assistant Integration section for complete workflow guide"
|
|
37
|
+
},
|
|
24
38
|
"logging": {},
|
|
25
39
|
},
|
|
26
40
|
}
|
|
@@ -53,7 +53,6 @@ from .resources import CodeFileResource, ProjectStatsResource
|
|
|
53
53
|
from .tools.base_tool import MCPTool
|
|
54
54
|
from .tools.read_partial_tool import ReadPartialTool
|
|
55
55
|
from .tools.table_format_tool import TableFormatTool
|
|
56
|
-
from .tools.universal_analyze_tool import UniversalAnalyzeTool
|
|
57
56
|
from .utils.error_handler import handle_mcp_errors
|
|
58
57
|
|
|
59
58
|
# Set up logging
|
|
@@ -79,10 +78,9 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
79
78
|
self.security_validator = SecurityValidator(project_root)
|
|
80
79
|
# Use unified analysis engine instead of deprecated AdvancedAnalyzer
|
|
81
80
|
|
|
82
|
-
# Initialize MCP tools with security validation
|
|
83
|
-
self.read_partial_tool: MCPTool = ReadPartialTool(project_root)
|
|
84
|
-
self.
|
|
85
|
-
self.table_format_tool: MCPTool = TableFormatTool(project_root)
|
|
81
|
+
# Initialize MCP tools with security validation (three core tools)
|
|
82
|
+
self.read_partial_tool: MCPTool = ReadPartialTool(project_root) # extract_code_section
|
|
83
|
+
self.table_format_tool: MCPTool = TableFormatTool(project_root) # analyze_code_structure
|
|
86
84
|
|
|
87
85
|
# Initialize MCP resources
|
|
88
86
|
self.code_file_resource = CodeFileResource()
|
|
@@ -104,14 +102,100 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
104
102
|
if not self._initialization_complete:
|
|
105
103
|
raise RuntimeError("Server not fully initialized. Please wait for initialization to complete.")
|
|
106
104
|
|
|
107
|
-
@handle_mcp_errors("
|
|
105
|
+
@handle_mcp_errors("check_code_scale")
|
|
108
106
|
async def _analyze_code_scale(self, arguments: dict[str, Any]) -> dict[str, Any]:
|
|
109
107
|
"""
|
|
110
|
-
Analyze code scale and complexity metrics
|
|
108
|
+
Analyze code scale and complexity metrics using the analysis engine directly.
|
|
111
109
|
"""
|
|
112
110
|
self._ensure_initialized()
|
|
113
|
-
|
|
114
|
-
|
|
111
|
+
|
|
112
|
+
# Validate required arguments
|
|
113
|
+
if "file_path" not in arguments:
|
|
114
|
+
raise ValueError("file_path is required")
|
|
115
|
+
|
|
116
|
+
file_path = arguments["file_path"]
|
|
117
|
+
language = arguments.get("language")
|
|
118
|
+
include_complexity = arguments.get("include_complexity", True)
|
|
119
|
+
include_details = arguments.get("include_details", False)
|
|
120
|
+
|
|
121
|
+
# Security validation
|
|
122
|
+
is_valid, error_msg = self.security_validator.validate_file_path(file_path)
|
|
123
|
+
if not is_valid:
|
|
124
|
+
raise ValueError(f"Invalid file path: {error_msg}")
|
|
125
|
+
|
|
126
|
+
# Use analysis engine directly
|
|
127
|
+
from ..core.analysis_engine import AnalysisRequest
|
|
128
|
+
from ..language_detector import detect_language_from_file
|
|
129
|
+
from pathlib import Path
|
|
130
|
+
|
|
131
|
+
# Validate file exists
|
|
132
|
+
if not Path(file_path).exists():
|
|
133
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
134
|
+
|
|
135
|
+
# Detect language if not specified
|
|
136
|
+
if not language:
|
|
137
|
+
language = detect_language_from_file(file_path)
|
|
138
|
+
|
|
139
|
+
# Create analysis request
|
|
140
|
+
request = AnalysisRequest(
|
|
141
|
+
file_path=file_path,
|
|
142
|
+
language=language,
|
|
143
|
+
include_complexity=include_complexity,
|
|
144
|
+
include_details=include_details,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Perform analysis
|
|
148
|
+
analysis_result = await self.analysis_engine.analyze(request)
|
|
149
|
+
|
|
150
|
+
if analysis_result is None or not analysis_result.success:
|
|
151
|
+
error_msg = analysis_result.error_message if analysis_result else "Unknown error"
|
|
152
|
+
raise RuntimeError(f"Failed to analyze file: {file_path} - {error_msg}")
|
|
153
|
+
|
|
154
|
+
# Convert to dictionary format
|
|
155
|
+
result_dict = analysis_result.to_dict()
|
|
156
|
+
|
|
157
|
+
# Format result to match test expectations
|
|
158
|
+
elements = result_dict.get("elements", [])
|
|
159
|
+
|
|
160
|
+
# Count elements by type
|
|
161
|
+
classes_count = len([e for e in elements if e.get("__class__") == "Class"])
|
|
162
|
+
methods_count = len([e for e in elements if e.get("__class__") == "Function"])
|
|
163
|
+
fields_count = len([e for e in elements if e.get("__class__") == "Variable"])
|
|
164
|
+
imports_count = len([e for e in elements if e.get("__class__") == "Import"])
|
|
165
|
+
|
|
166
|
+
result = {
|
|
167
|
+
"file_path": file_path,
|
|
168
|
+
"language": language,
|
|
169
|
+
"metrics": {
|
|
170
|
+
"lines_total": result_dict.get("line_count", 0),
|
|
171
|
+
"lines_code": result_dict.get("line_count", 0), # Approximation
|
|
172
|
+
"lines_comment": 0, # Not available in basic analysis
|
|
173
|
+
"lines_blank": 0, # Not available in basic analysis
|
|
174
|
+
"elements": {
|
|
175
|
+
"classes": classes_count,
|
|
176
|
+
"methods": methods_count,
|
|
177
|
+
"fields": fields_count,
|
|
178
|
+
"imports": imports_count,
|
|
179
|
+
"total": len(elements),
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if include_complexity:
|
|
185
|
+
# Add complexity metrics if available
|
|
186
|
+
methods = [e for e in elements if e.get("__class__") == "Function"]
|
|
187
|
+
if methods:
|
|
188
|
+
complexities = [e.get("complexity_score", 0) for e in methods]
|
|
189
|
+
result["metrics"]["complexity"] = {
|
|
190
|
+
"total": sum(complexities),
|
|
191
|
+
"average": sum(complexities) / len(complexities) if complexities else 0,
|
|
192
|
+
"max": max(complexities) if complexities else 0,
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
if include_details:
|
|
196
|
+
result["detailed_elements"] = elements
|
|
197
|
+
|
|
198
|
+
return result
|
|
115
199
|
|
|
116
200
|
def create_server(self) -> Server:
|
|
117
201
|
"""
|
|
@@ -128,17 +212,29 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
128
212
|
# Register tools
|
|
129
213
|
@server.list_tools() # type: ignore
|
|
130
214
|
async def handle_list_tools() -> list[Tool]:
|
|
131
|
-
"""
|
|
215
|
+
"""
|
|
216
|
+
List available tools with clear naming and usage guidance.
|
|
217
|
+
|
|
218
|
+
🎯 SOLVE LLM TOKEN LIMIT PROBLEMS FOR LARGE CODE FILES
|
|
219
|
+
|
|
220
|
+
REQUIRED WORKFLOW FOR LLM (follow this order):
|
|
221
|
+
1. FIRST: 'check_code_scale' - understand file size and complexity
|
|
222
|
+
2. SECOND: 'analyze_code_structure' - get detailed structure with line positions
|
|
223
|
+
3. THIRD: 'extract_code_section' - get specific code from line positions
|
|
224
|
+
|
|
225
|
+
⚠️ PARAMETER NAMES: Use snake_case (file_path, start_line, end_line, format_type)
|
|
226
|
+
📖 Full guide: See README.md AI Assistant Integration section
|
|
227
|
+
"""
|
|
132
228
|
tools = [
|
|
133
229
|
Tool(
|
|
134
|
-
name="
|
|
135
|
-
description="
|
|
230
|
+
name="check_code_scale",
|
|
231
|
+
description="🔍 STEP 1: Check code file scale, complexity, and basic metrics. Use this FIRST to understand if the file is large and needs structure analysis. Returns: line count, element counts, complexity metrics.",
|
|
136
232
|
inputSchema={
|
|
137
233
|
"type": "object",
|
|
138
234
|
"properties": {
|
|
139
235
|
"file_path": {
|
|
140
236
|
"type": "string",
|
|
141
|
-
"description": "Path to the code file to analyze",
|
|
237
|
+
"description": "Path to the code file to analyze (REQUIRED - use exact file path)",
|
|
142
238
|
},
|
|
143
239
|
"language": {
|
|
144
240
|
"type": "string",
|
|
@@ -146,35 +242,87 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
146
242
|
},
|
|
147
243
|
"include_complexity": {
|
|
148
244
|
"type": "boolean",
|
|
149
|
-
"description": "Include complexity metrics in the analysis",
|
|
245
|
+
"description": "Include complexity metrics in the analysis (default: true)",
|
|
150
246
|
"default": True,
|
|
151
247
|
},
|
|
152
248
|
"include_details": {
|
|
153
249
|
"type": "boolean",
|
|
154
|
-
"description": "Include detailed element information",
|
|
250
|
+
"description": "Include detailed element information (default: false)",
|
|
155
251
|
"default": False,
|
|
156
252
|
},
|
|
157
253
|
},
|
|
158
254
|
"required": ["file_path"],
|
|
159
255
|
"additionalProperties": False,
|
|
160
256
|
},
|
|
161
|
-
)
|
|
257
|
+
),
|
|
258
|
+
Tool(
|
|
259
|
+
name="analyze_code_structure",
|
|
260
|
+
description="📊 STEP 2: Generate detailed structure tables (classes, methods, fields) with LINE POSITIONS for large files. Use AFTER check_code_scale shows file is large (>100 lines). Returns: tables with start_line/end_line for each element.",
|
|
261
|
+
inputSchema={
|
|
262
|
+
"type": "object",
|
|
263
|
+
"properties": {
|
|
264
|
+
"file_path": {
|
|
265
|
+
"type": "string",
|
|
266
|
+
"description": "Path to the code file to analyze (REQUIRED - use exact file path)",
|
|
267
|
+
},
|
|
268
|
+
"format_type": {
|
|
269
|
+
"type": "string",
|
|
270
|
+
"description": "Table format type (default: 'full' for detailed tables)",
|
|
271
|
+
"enum": ["full", "compact", "csv"],
|
|
272
|
+
"default": "full",
|
|
273
|
+
},
|
|
274
|
+
"language": {
|
|
275
|
+
"type": "string",
|
|
276
|
+
"description": "Programming language (optional, auto-detected if not specified)",
|
|
277
|
+
},
|
|
278
|
+
},
|
|
279
|
+
"required": ["file_path"],
|
|
280
|
+
"additionalProperties": False,
|
|
281
|
+
},
|
|
282
|
+
),
|
|
283
|
+
Tool(
|
|
284
|
+
name="extract_code_section",
|
|
285
|
+
description="✂️ STEP 3: Extract specific code sections by line range. Use AFTER analyze_code_structure to get exact code from structure table line positions. Returns: precise code content without reading entire file.",
|
|
286
|
+
inputSchema={
|
|
287
|
+
"type": "object",
|
|
288
|
+
"properties": {
|
|
289
|
+
"file_path": {
|
|
290
|
+
"type": "string",
|
|
291
|
+
"description": "Path to the code file to read (REQUIRED - use exact file path)",
|
|
292
|
+
},
|
|
293
|
+
"start_line": {
|
|
294
|
+
"type": "integer",
|
|
295
|
+
"description": "Starting line number (REQUIRED - 1-based, get from structure table)",
|
|
296
|
+
"minimum": 1,
|
|
297
|
+
},
|
|
298
|
+
"end_line": {
|
|
299
|
+
"type": "integer",
|
|
300
|
+
"description": "Ending line number (optional - 1-based, reads to end if not specified)",
|
|
301
|
+
"minimum": 1,
|
|
302
|
+
},
|
|
303
|
+
"start_column": {
|
|
304
|
+
"type": "integer",
|
|
305
|
+
"description": "Starting column number (optional - 0-based)",
|
|
306
|
+
"minimum": 0,
|
|
307
|
+
},
|
|
308
|
+
"end_column": {
|
|
309
|
+
"type": "integer",
|
|
310
|
+
"description": "Ending column number (optional - 0-based)",
|
|
311
|
+
"minimum": 0,
|
|
312
|
+
},
|
|
313
|
+
"format": {
|
|
314
|
+
"type": "string",
|
|
315
|
+
"description": "Output format for the content (default: 'text')",
|
|
316
|
+
"enum": ["text", "json"],
|
|
317
|
+
"default": "text",
|
|
318
|
+
},
|
|
319
|
+
},
|
|
320
|
+
"required": ["file_path", "start_line"],
|
|
321
|
+
"additionalProperties": False,
|
|
322
|
+
},
|
|
323
|
+
),
|
|
162
324
|
]
|
|
163
325
|
|
|
164
|
-
# Add tools from tool classes - FIXED VERSION
|
|
165
|
-
for tool_instance in [
|
|
166
|
-
self.read_partial_tool,
|
|
167
|
-
self.table_format_tool,
|
|
168
|
-
self.universal_analyze_tool,
|
|
169
|
-
]:
|
|
170
|
-
tool_def = tool_instance.get_tool_definition()
|
|
171
|
-
if isinstance(tool_def, dict):
|
|
172
|
-
# Convert dict to Tool object
|
|
173
|
-
tools.append(Tool(**tool_def))
|
|
174
|
-
else:
|
|
175
|
-
# Already a Tool object
|
|
176
|
-
tools.append(tool_def)
|
|
177
|
-
|
|
178
326
|
return tools
|
|
179
327
|
|
|
180
328
|
@server.call_tool() # type: ignore
|
|
@@ -202,7 +350,9 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
202
350
|
# Basic sanitization for string inputs
|
|
203
351
|
sanitized_value = self.security_validator.sanitize_input(value, max_length=10000)
|
|
204
352
|
arguments[key] = sanitized_value
|
|
205
|
-
|
|
353
|
+
|
|
354
|
+
# Handle tool calls with unified naming (only new names)
|
|
355
|
+
if sanitized_name == "check_code_scale":
|
|
206
356
|
result = await self._analyze_code_scale(arguments)
|
|
207
357
|
return [
|
|
208
358
|
TextContent(
|
|
@@ -210,15 +360,7 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
210
360
|
text=json.dumps(result, indent=2, ensure_ascii=False),
|
|
211
361
|
)
|
|
212
362
|
]
|
|
213
|
-
elif sanitized_name == "
|
|
214
|
-
result = await self.read_partial_tool.execute(arguments)
|
|
215
|
-
return [
|
|
216
|
-
TextContent(
|
|
217
|
-
type="text",
|
|
218
|
-
text=json.dumps(result, indent=2, ensure_ascii=False),
|
|
219
|
-
)
|
|
220
|
-
]
|
|
221
|
-
elif sanitized_name == "format_table":
|
|
363
|
+
elif sanitized_name == "analyze_code_structure":
|
|
222
364
|
result = await self.table_format_tool.execute(arguments)
|
|
223
365
|
return [
|
|
224
366
|
TextContent(
|
|
@@ -226,8 +368,8 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
226
368
|
text=json.dumps(result, indent=2, ensure_ascii=False),
|
|
227
369
|
)
|
|
228
370
|
]
|
|
229
|
-
elif sanitized_name == "
|
|
230
|
-
result = await self.
|
|
371
|
+
elif sanitized_name == "extract_code_section":
|
|
372
|
+
result = await self.read_partial_tool.execute(arguments)
|
|
231
373
|
return [
|
|
232
374
|
TextContent(
|
|
233
375
|
type="text",
|
|
@@ -235,7 +377,7 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
235
377
|
)
|
|
236
378
|
]
|
|
237
379
|
else:
|
|
238
|
-
raise ValueError(f"Unknown tool: {name}")
|
|
380
|
+
raise ValueError(f"Unknown tool: {name}. Available tools: check_code_scale, analyze_code_structure, extract_code_section")
|
|
239
381
|
|
|
240
382
|
except Exception as e:
|
|
241
383
|
try:
|
|
@@ -291,15 +291,15 @@ class ReadPartialTool:
|
|
|
291
291
|
from mcp.types import Tool
|
|
292
292
|
|
|
293
293
|
return Tool(
|
|
294
|
-
name="
|
|
295
|
-
description="
|
|
294
|
+
name="extract_code_section",
|
|
295
|
+
description="Extract specific code sections by line range (equivalent to CLI --partial-read option)",
|
|
296
296
|
inputSchema=self.get_tool_schema(),
|
|
297
297
|
)
|
|
298
298
|
except ImportError:
|
|
299
299
|
# Fallback for when MCP is not available
|
|
300
300
|
return {
|
|
301
|
-
"name": "
|
|
302
|
-
"description": "
|
|
301
|
+
"name": "extract_code_section",
|
|
302
|
+
"description": "Extract specific code sections by line range (equivalent to CLI --partial-read option)",
|
|
303
303
|
"inputSchema": self.get_tool_schema(),
|
|
304
304
|
}
|
|
305
305
|
|