tree-sitter-analyzer 0.8.3__py3-none-any.whl → 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

@@ -11,7 +11,7 @@ Architecture:
11
11
  - Data Models: Generic and language-specific code element representations
12
12
  """
13
13
 
14
- __version__ = "0.8.3"
14
+ __version__ = "0.9.1"
15
15
  __author__ = "aisheng.yu"
16
16
  __email__ = "aimasteracc@gmail.com"
17
17
 
@@ -47,9 +47,7 @@ class BaseCommand(ABC):
47
47
  return False
48
48
 
49
49
  # Security validation
50
- is_valid, error_msg = self.security_validator.validate_file_path(
51
- self.args.file_path, base_path=self.project_root
52
- )
50
+ is_valid, error_msg = self.security_validator.validate_file_path(self.args.file_path)
53
51
  if not is_valid:
54
52
  output_error(f"Invalid file path: {error_msg}")
55
53
  return False
@@ -15,12 +15,26 @@ __author__ = "Tree-sitter Analyzer Team"
15
15
  MCP_INFO: dict[str, Any] = {
16
16
  "name": "tree-sitter-analyzer-mcp",
17
17
  "version": __version__,
18
- "description": "Tree-sitter based code analyzer with MCP support",
18
+ "description": "Tree-sitter based code analyzer with MCP support - Solve LLM token limit problems for large code files",
19
19
  "protocol_version": "2024-11-05",
20
20
  "capabilities": {
21
- "tools": {},
21
+ "tools": {
22
+ "description": "Three-step workflow for analyzing large code files",
23
+ "available_tools": [
24
+ "check_code_scale",
25
+ "analyze_code_structure",
26
+ "extract_code_section"
27
+ ],
28
+ "workflow": [
29
+ "1. check_code_scale - Get file metrics and complexity",
30
+ "2. analyze_code_structure - Generate structure tables for large files",
31
+ "3. extract_code_section - Get specific code sections by line range"
32
+ ]
33
+ },
22
34
  "resources": {},
23
- "prompts": {},
35
+ "prompts": {
36
+ "usage_guide": "See README.md AI Assistant Integration section for complete workflow guide"
37
+ },
24
38
  "logging": {},
25
39
  },
26
40
  }
@@ -53,7 +53,6 @@ from .resources import CodeFileResource, ProjectStatsResource
53
53
  from .tools.base_tool import MCPTool
54
54
  from .tools.read_partial_tool import ReadPartialTool
55
55
  from .tools.table_format_tool import TableFormatTool
56
- from .tools.universal_analyze_tool import UniversalAnalyzeTool
57
56
  from .utils.error_handler import handle_mcp_errors
58
57
 
59
58
  # Set up logging
@@ -77,22 +76,11 @@ class TreeSitterAnalyzerMCPServer:
77
76
 
78
77
  self.analysis_engine = get_analysis_engine(project_root)
79
78
  self.security_validator = SecurityValidator(project_root)
80
- # Ensure boundary manager exposes the exact provided project_root for consistency in tests/environments
81
- try:
82
- import os as _os
83
- if self.security_validator.boundary_manager and project_root:
84
- provided_root = _os.path.abspath(project_root)
85
- self.security_validator.boundary_manager.project_root = provided_root
86
- # Keep allowed directories in sync with the exposed project_root
87
- self.security_validator.boundary_manager.allowed_directories = {provided_root}
88
- except Exception:
89
- pass
90
79
  # Use unified analysis engine instead of deprecated AdvancedAnalyzer
91
80
 
92
- # Initialize MCP tools with security validation
93
- self.read_partial_tool: MCPTool = ReadPartialTool(project_root)
94
- self.universal_analyze_tool: MCPTool = UniversalAnalyzeTool(project_root)
95
- self.table_format_tool: MCPTool = TableFormatTool(project_root)
81
+ # Initialize MCP tools with security validation (three core tools)
82
+ self.read_partial_tool: MCPTool = ReadPartialTool(project_root) # extract_code_section
83
+ self.table_format_tool: MCPTool = TableFormatTool(project_root) # analyze_code_structure
96
84
 
97
85
  # Initialize MCP resources
98
86
  self.code_file_resource = CodeFileResource()
@@ -114,14 +102,100 @@ class TreeSitterAnalyzerMCPServer:
114
102
  if not self._initialization_complete:
115
103
  raise RuntimeError("Server not fully initialized. Please wait for initialization to complete.")
116
104
 
117
- @handle_mcp_errors("analyze_code_scale")
105
+ @handle_mcp_errors("check_code_scale")
118
106
  async def _analyze_code_scale(self, arguments: dict[str, Any]) -> dict[str, Any]:
119
107
  """
120
- Analyze code scale and complexity metrics by delegating to the universal_analyze_tool.
108
+ Analyze code scale and complexity metrics using the analysis engine directly.
121
109
  """
122
110
  self._ensure_initialized()
123
- # Delegate the execution to the already initialized tool
124
- return await self.universal_analyze_tool.execute(arguments)
111
+
112
+ # Validate required arguments
113
+ if "file_path" not in arguments:
114
+ raise ValueError("file_path is required")
115
+
116
+ file_path = arguments["file_path"]
117
+ language = arguments.get("language")
118
+ include_complexity = arguments.get("include_complexity", True)
119
+ include_details = arguments.get("include_details", False)
120
+
121
+ # Security validation
122
+ is_valid, error_msg = self.security_validator.validate_file_path(file_path)
123
+ if not is_valid:
124
+ raise ValueError(f"Invalid file path: {error_msg}")
125
+
126
+ # Use analysis engine directly
127
+ from ..core.analysis_engine import AnalysisRequest
128
+ from ..language_detector import detect_language_from_file
129
+ from pathlib import Path
130
+
131
+ # Validate file exists
132
+ if not Path(file_path).exists():
133
+ raise FileNotFoundError(f"File not found: {file_path}")
134
+
135
+ # Detect language if not specified
136
+ if not language:
137
+ language = detect_language_from_file(file_path)
138
+
139
+ # Create analysis request
140
+ request = AnalysisRequest(
141
+ file_path=file_path,
142
+ language=language,
143
+ include_complexity=include_complexity,
144
+ include_details=include_details,
145
+ )
146
+
147
+ # Perform analysis
148
+ analysis_result = await self.analysis_engine.analyze(request)
149
+
150
+ if analysis_result is None or not analysis_result.success:
151
+ error_msg = analysis_result.error_message if analysis_result else "Unknown error"
152
+ raise RuntimeError(f"Failed to analyze file: {file_path} - {error_msg}")
153
+
154
+ # Convert to dictionary format
155
+ result_dict = analysis_result.to_dict()
156
+
157
+ # Format result to match test expectations
158
+ elements = result_dict.get("elements", [])
159
+
160
+ # Count elements by type
161
+ classes_count = len([e for e in elements if e.get("__class__") == "Class"])
162
+ methods_count = len([e for e in elements if e.get("__class__") == "Function"])
163
+ fields_count = len([e for e in elements if e.get("__class__") == "Variable"])
164
+ imports_count = len([e for e in elements if e.get("__class__") == "Import"])
165
+
166
+ result = {
167
+ "file_path": file_path,
168
+ "language": language,
169
+ "metrics": {
170
+ "lines_total": result_dict.get("line_count", 0),
171
+ "lines_code": result_dict.get("line_count", 0), # Approximation
172
+ "lines_comment": 0, # Not available in basic analysis
173
+ "lines_blank": 0, # Not available in basic analysis
174
+ "elements": {
175
+ "classes": classes_count,
176
+ "methods": methods_count,
177
+ "fields": fields_count,
178
+ "imports": imports_count,
179
+ "total": len(elements),
180
+ }
181
+ }
182
+ }
183
+
184
+ if include_complexity:
185
+ # Add complexity metrics if available
186
+ methods = [e for e in elements if e.get("__class__") == "Function"]
187
+ if methods:
188
+ complexities = [e.get("complexity_score", 0) for e in methods]
189
+ result["metrics"]["complexity"] = {
190
+ "total": sum(complexities),
191
+ "average": sum(complexities) / len(complexities) if complexities else 0,
192
+ "max": max(complexities) if complexities else 0,
193
+ }
194
+
195
+ if include_details:
196
+ result["detailed_elements"] = elements
197
+
198
+ return result
125
199
 
126
200
  def create_server(self) -> Server:
127
201
  """
@@ -138,17 +212,29 @@ class TreeSitterAnalyzerMCPServer:
138
212
  # Register tools
139
213
  @server.list_tools() # type: ignore
140
214
  async def handle_list_tools() -> list[Tool]:
141
- """List available tools."""
215
+ """
216
+ List available tools with clear naming and usage guidance.
217
+
218
+ 🎯 SOLVE LLM TOKEN LIMIT PROBLEMS FOR LARGE CODE FILES
219
+
220
+ REQUIRED WORKFLOW FOR LLM (follow this order):
221
+ 1. FIRST: 'check_code_scale' - understand file size and complexity
222
+ 2. SECOND: 'analyze_code_structure' - get detailed structure with line positions
223
+ 3. THIRD: 'extract_code_section' - get specific code from line positions
224
+
225
+ ⚠️ PARAMETER NAMES: Use snake_case (file_path, start_line, end_line, format_type)
226
+ 📖 Full guide: See README.md AI Assistant Integration section
227
+ """
142
228
  tools = [
143
229
  Tool(
144
- name="analyze_code_scale",
145
- description="Analyze code scale, complexity, and structure metrics",
230
+ name="check_code_scale",
231
+ description="🔍 STEP 1: Check code file scale, complexity, and basic metrics. Use this FIRST to understand if the file is large and needs structure analysis. Returns: line count, element counts, complexity metrics.",
146
232
  inputSchema={
147
233
  "type": "object",
148
234
  "properties": {
149
235
  "file_path": {
150
236
  "type": "string",
151
- "description": "Path to the code file to analyze",
237
+ "description": "Path to the code file to analyze (REQUIRED - use exact file path)",
152
238
  },
153
239
  "language": {
154
240
  "type": "string",
@@ -156,35 +242,87 @@ class TreeSitterAnalyzerMCPServer:
156
242
  },
157
243
  "include_complexity": {
158
244
  "type": "boolean",
159
- "description": "Include complexity metrics in the analysis",
245
+ "description": "Include complexity metrics in the analysis (default: true)",
160
246
  "default": True,
161
247
  },
162
248
  "include_details": {
163
249
  "type": "boolean",
164
- "description": "Include detailed element information",
250
+ "description": "Include detailed element information (default: false)",
165
251
  "default": False,
166
252
  },
167
253
  },
168
254
  "required": ["file_path"],
169
255
  "additionalProperties": False,
170
256
  },
171
- )
257
+ ),
258
+ Tool(
259
+ name="analyze_code_structure",
260
+ description="📊 STEP 2: Generate detailed structure tables (classes, methods, fields) with LINE POSITIONS for large files. Use AFTER check_code_scale shows file is large (>100 lines). Returns: tables with start_line/end_line for each element.",
261
+ inputSchema={
262
+ "type": "object",
263
+ "properties": {
264
+ "file_path": {
265
+ "type": "string",
266
+ "description": "Path to the code file to analyze (REQUIRED - use exact file path)",
267
+ },
268
+ "format_type": {
269
+ "type": "string",
270
+ "description": "Table format type (default: 'full' for detailed tables)",
271
+ "enum": ["full", "compact", "csv"],
272
+ "default": "full",
273
+ },
274
+ "language": {
275
+ "type": "string",
276
+ "description": "Programming language (optional, auto-detected if not specified)",
277
+ },
278
+ },
279
+ "required": ["file_path"],
280
+ "additionalProperties": False,
281
+ },
282
+ ),
283
+ Tool(
284
+ name="extract_code_section",
285
+ description="✂️ STEP 3: Extract specific code sections by line range. Use AFTER analyze_code_structure to get exact code from structure table line positions. Returns: precise code content without reading entire file.",
286
+ inputSchema={
287
+ "type": "object",
288
+ "properties": {
289
+ "file_path": {
290
+ "type": "string",
291
+ "description": "Path to the code file to read (REQUIRED - use exact file path)",
292
+ },
293
+ "start_line": {
294
+ "type": "integer",
295
+ "description": "Starting line number (REQUIRED - 1-based, get from structure table)",
296
+ "minimum": 1,
297
+ },
298
+ "end_line": {
299
+ "type": "integer",
300
+ "description": "Ending line number (optional - 1-based, reads to end if not specified)",
301
+ "minimum": 1,
302
+ },
303
+ "start_column": {
304
+ "type": "integer",
305
+ "description": "Starting column number (optional - 0-based)",
306
+ "minimum": 0,
307
+ },
308
+ "end_column": {
309
+ "type": "integer",
310
+ "description": "Ending column number (optional - 0-based)",
311
+ "minimum": 0,
312
+ },
313
+ "format": {
314
+ "type": "string",
315
+ "description": "Output format for the content (default: 'text')",
316
+ "enum": ["text", "json"],
317
+ "default": "text",
318
+ },
319
+ },
320
+ "required": ["file_path", "start_line"],
321
+ "additionalProperties": False,
322
+ },
323
+ ),
172
324
  ]
173
325
 
174
- # Add tools from tool classes - FIXED VERSION
175
- for tool_instance in [
176
- self.read_partial_tool,
177
- self.table_format_tool,
178
- self.universal_analyze_tool,
179
- ]:
180
- tool_def = tool_instance.get_tool_definition()
181
- if isinstance(tool_def, dict):
182
- # Convert dict to Tool object
183
- tools.append(Tool(**tool_def))
184
- else:
185
- # Already a Tool object
186
- tools.append(tool_def)
187
-
188
326
  return tools
189
327
 
190
328
  @server.call_tool() # type: ignore
@@ -212,7 +350,9 @@ class TreeSitterAnalyzerMCPServer:
212
350
  # Basic sanitization for string inputs
213
351
  sanitized_value = self.security_validator.sanitize_input(value, max_length=10000)
214
352
  arguments[key] = sanitized_value
215
- if sanitized_name == "analyze_code_scale":
353
+
354
+ # Handle tool calls with unified naming (only new names)
355
+ if sanitized_name == "check_code_scale":
216
356
  result = await self._analyze_code_scale(arguments)
217
357
  return [
218
358
  TextContent(
@@ -220,15 +360,7 @@ class TreeSitterAnalyzerMCPServer:
220
360
  text=json.dumps(result, indent=2, ensure_ascii=False),
221
361
  )
222
362
  ]
223
- elif sanitized_name == "read_code_partial":
224
- result = await self.read_partial_tool.execute(arguments)
225
- return [
226
- TextContent(
227
- type="text",
228
- text=json.dumps(result, indent=2, ensure_ascii=False),
229
- )
230
- ]
231
- elif sanitized_name == "format_table":
363
+ elif sanitized_name == "analyze_code_structure":
232
364
  result = await self.table_format_tool.execute(arguments)
233
365
  return [
234
366
  TextContent(
@@ -236,8 +368,8 @@ class TreeSitterAnalyzerMCPServer:
236
368
  text=json.dumps(result, indent=2, ensure_ascii=False),
237
369
  )
238
370
  ]
239
- elif sanitized_name == "analyze_code_universal":
240
- result = await self.universal_analyze_tool.execute(arguments)
371
+ elif sanitized_name == "extract_code_section":
372
+ result = await self.read_partial_tool.execute(arguments)
241
373
  return [
242
374
  TextContent(
243
375
  type="text",
@@ -245,7 +377,7 @@ class TreeSitterAnalyzerMCPServer:
245
377
  )
246
378
  ]
247
379
  else:
248
- raise ValueError(f"Unknown tool: {name}")
380
+ raise ValueError(f"Unknown tool: {name}. Available tools: check_code_scale, analyze_code_structure, extract_code_section")
249
381
 
250
382
  except Exception as e:
251
383
  try: