tree-sitter-analyzer 0.9.4__py3-none-any.whl → 0.9.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tree-sitter-analyzer might be problematic. Click here for more details.
- tree_sitter_analyzer/__init__.py +1 -1
- tree_sitter_analyzer/cli/commands/query_command.py +50 -35
- tree_sitter_analyzer/cli_main.py +21 -0
- tree_sitter_analyzer/core/query_filter.py +200 -0
- tree_sitter_analyzer/core/query_service.py +162 -0
- tree_sitter_analyzer/encoding_utils.py +7 -4
- tree_sitter_analyzer/mcp/resources/code_file_resource.py +1 -2
- tree_sitter_analyzer/mcp/server.py +7 -1
- tree_sitter_analyzer/mcp/tools/query_tool.py +238 -0
- tree_sitter_analyzer/queries/java.py +5 -0
- tree_sitter_analyzer/security/boundary_manager.py +10 -2
- tree_sitter_analyzer/table_formatter.py +6 -1
- tree_sitter_analyzer/utils.py +51 -40
- tree_sitter_analyzer-0.9.6.dist-info/METADATA +631 -0
- {tree_sitter_analyzer-0.9.4.dist-info → tree_sitter_analyzer-0.9.6.dist-info}/RECORD +17 -14
- tree_sitter_analyzer-0.9.4.dist-info/METADATA +0 -409
- {tree_sitter_analyzer-0.9.4.dist-info → tree_sitter_analyzer-0.9.6.dist-info}/WHEEL +0 -0
- {tree_sitter_analyzer-0.9.4.dist-info → tree_sitter_analyzer-0.9.6.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Query Tool for MCP
|
|
4
|
+
|
|
5
|
+
MCP tool providing tree-sitter query functionality using unified QueryService.
|
|
6
|
+
Supports both predefined query keys and custom query strings.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from ...core.query_service import QueryService
|
|
13
|
+
from ...language_detector import detect_language_from_file
|
|
14
|
+
from ...security import SecurityValidator
|
|
15
|
+
from ..utils.error_handler import handle_mcp_errors
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class QueryTool:
|
|
21
|
+
"""MCP query tool providing tree-sitter query functionality"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, project_root: str | None = None) -> None:
|
|
24
|
+
"""Initialize query tool"""
|
|
25
|
+
self.project_root = project_root
|
|
26
|
+
self.query_service = QueryService(project_root)
|
|
27
|
+
self.security_validator = SecurityValidator(project_root)
|
|
28
|
+
|
|
29
|
+
def get_tool_definition(self) -> dict[str, Any]:
|
|
30
|
+
"""
|
|
31
|
+
Get MCP tool definition
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Tool definition dictionary
|
|
35
|
+
"""
|
|
36
|
+
return {
|
|
37
|
+
"name": "query_code",
|
|
38
|
+
"description": "Execute tree-sitter queries on code files to extract specific code elements",
|
|
39
|
+
"inputSchema": {
|
|
40
|
+
"type": "object",
|
|
41
|
+
"properties": {
|
|
42
|
+
"file_path": {
|
|
43
|
+
"type": "string",
|
|
44
|
+
"description": "Path to the code file to query (relative to project root)",
|
|
45
|
+
},
|
|
46
|
+
"language": {
|
|
47
|
+
"type": "string",
|
|
48
|
+
"description": "Programming language (optional, auto-detected if not provided)",
|
|
49
|
+
},
|
|
50
|
+
"query_key": {
|
|
51
|
+
"type": "string",
|
|
52
|
+
"description": "Predefined query key (e.g., 'methods', 'class', 'functions')",
|
|
53
|
+
},
|
|
54
|
+
"query_string": {
|
|
55
|
+
"type": "string",
|
|
56
|
+
"description": "Custom tree-sitter query string (e.g., '(method_declaration) @method')",
|
|
57
|
+
},
|
|
58
|
+
"filter": {
|
|
59
|
+
"type": "string",
|
|
60
|
+
"description": "Filter expression to refine results (e.g., 'name=main', 'name=~get*,public=true')",
|
|
61
|
+
},
|
|
62
|
+
"output_format": {
|
|
63
|
+
"type": "string",
|
|
64
|
+
"enum": ["json", "summary"],
|
|
65
|
+
"default": "json",
|
|
66
|
+
"description": "Output format",
|
|
67
|
+
},
|
|
68
|
+
},
|
|
69
|
+
"required": ["file_path"],
|
|
70
|
+
"anyOf": [
|
|
71
|
+
{"required": ["query_key"]},
|
|
72
|
+
{"required": ["query_string"]},
|
|
73
|
+
],
|
|
74
|
+
},
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
@handle_mcp_errors
|
|
78
|
+
async def execute(self, arguments: dict[str, Any]) -> dict[str, Any]:
|
|
79
|
+
"""
|
|
80
|
+
Execute query tool
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
arguments: Tool arguments
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Query results
|
|
87
|
+
"""
|
|
88
|
+
# Validate input parameters
|
|
89
|
+
file_path = arguments.get("file_path")
|
|
90
|
+
if not file_path:
|
|
91
|
+
raise ValueError("file_path is required")
|
|
92
|
+
|
|
93
|
+
# Security validation
|
|
94
|
+
validated_path = self.security_validator.validate_file_path(file_path)
|
|
95
|
+
|
|
96
|
+
# Get query parameters
|
|
97
|
+
query_key = arguments.get("query_key")
|
|
98
|
+
query_string = arguments.get("query_string")
|
|
99
|
+
filter_expression = arguments.get("filter")
|
|
100
|
+
output_format = arguments.get("output_format", "json")
|
|
101
|
+
|
|
102
|
+
if not query_key and not query_string:
|
|
103
|
+
raise ValueError("Either query_key or query_string must be provided")
|
|
104
|
+
|
|
105
|
+
if query_key and query_string:
|
|
106
|
+
raise ValueError("Cannot provide both query_key and query_string")
|
|
107
|
+
|
|
108
|
+
# Detect language
|
|
109
|
+
language = arguments.get("language")
|
|
110
|
+
if not language:
|
|
111
|
+
language = detect_language_from_file(validated_path)
|
|
112
|
+
if not language:
|
|
113
|
+
raise ValueError(f"Could not detect language for file: {file_path}")
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
# Execute query
|
|
117
|
+
results = await self.query_service.execute_query(
|
|
118
|
+
validated_path, language, query_key, query_string, filter_expression
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
if not results:
|
|
122
|
+
return {
|
|
123
|
+
"success": True,
|
|
124
|
+
"message": "No results found matching the query",
|
|
125
|
+
"results": [],
|
|
126
|
+
"count": 0,
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
# Format output
|
|
130
|
+
if output_format == "summary":
|
|
131
|
+
return self._format_summary(results, query_key or "custom", language)
|
|
132
|
+
else:
|
|
133
|
+
return {
|
|
134
|
+
"success": True,
|
|
135
|
+
"results": results,
|
|
136
|
+
"count": len(results),
|
|
137
|
+
"file_path": file_path,
|
|
138
|
+
"language": language,
|
|
139
|
+
"query": query_key or query_string,
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
except Exception as e:
|
|
143
|
+
logger.error(f"Query execution failed: {e}")
|
|
144
|
+
return {
|
|
145
|
+
"success": False,
|
|
146
|
+
"error": str(e),
|
|
147
|
+
"file_path": file_path,
|
|
148
|
+
"language": language,
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
def _format_summary(
|
|
152
|
+
self, results: list[dict[str, Any]], query_type: str, language: str
|
|
153
|
+
) -> dict[str, Any]:
|
|
154
|
+
"""
|
|
155
|
+
Format summary output
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
results: Query results
|
|
159
|
+
query_type: Query type
|
|
160
|
+
language: Programming language
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
Summary formatted results
|
|
164
|
+
"""
|
|
165
|
+
# Group by capture name
|
|
166
|
+
by_capture = {}
|
|
167
|
+
for result in results:
|
|
168
|
+
capture_name = result["capture_name"]
|
|
169
|
+
if capture_name not in by_capture:
|
|
170
|
+
by_capture[capture_name] = []
|
|
171
|
+
by_capture[capture_name].append(result)
|
|
172
|
+
|
|
173
|
+
# Create summary
|
|
174
|
+
summary = {
|
|
175
|
+
"success": True,
|
|
176
|
+
"query_type": query_type,
|
|
177
|
+
"language": language,
|
|
178
|
+
"total_count": len(results),
|
|
179
|
+
"captures": {},
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
for capture_name, items in by_capture.items():
|
|
183
|
+
summary["captures"][capture_name] = {
|
|
184
|
+
"count": len(items),
|
|
185
|
+
"items": [
|
|
186
|
+
{
|
|
187
|
+
"name": self._extract_name_from_content(item["content"]),
|
|
188
|
+
"line_range": f"{item['start_line']}-{item['end_line']}",
|
|
189
|
+
"node_type": item["node_type"],
|
|
190
|
+
}
|
|
191
|
+
for item in items
|
|
192
|
+
],
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
return summary
|
|
196
|
+
|
|
197
|
+
def _extract_name_from_content(self, content: str) -> str:
|
|
198
|
+
"""
|
|
199
|
+
Extract name from content (simple heuristic method)
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
content: Code content
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Extracted name
|
|
206
|
+
"""
|
|
207
|
+
# Simple name extraction logic, can be improved as needed
|
|
208
|
+
lines = content.strip().split("\n")
|
|
209
|
+
if lines:
|
|
210
|
+
first_line = lines[0].strip()
|
|
211
|
+
# Extract method names, class names, etc.
|
|
212
|
+
import re
|
|
213
|
+
|
|
214
|
+
# Match common declaration patterns
|
|
215
|
+
patterns = [
|
|
216
|
+
r"(?:public|private|protected)?\s*(?:static)?\s*(?:class|interface)\s+(\w+)", # class/interface
|
|
217
|
+
r"(?:public|private|protected)?\s*(?:static)?\s*\w+\s+(\w+)\s*\(", # method
|
|
218
|
+
r"(\w+)\s*\(", # simple function call
|
|
219
|
+
]
|
|
220
|
+
|
|
221
|
+
for pattern in patterns:
|
|
222
|
+
match = re.search(pattern, first_line)
|
|
223
|
+
if match:
|
|
224
|
+
return match.group(1)
|
|
225
|
+
|
|
226
|
+
return "unnamed"
|
|
227
|
+
|
|
228
|
+
def get_available_queries(self, language: str) -> list[str]:
|
|
229
|
+
"""
|
|
230
|
+
Get available query keys
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
language: Programming language
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
List of available query keys
|
|
237
|
+
"""
|
|
238
|
+
return self.query_service.get_available_queries(language)
|
|
@@ -356,6 +356,11 @@ ALL_QUERIES["functions"] = {
|
|
|
356
356
|
"description": "Search all function/method declarations (alias for method)",
|
|
357
357
|
}
|
|
358
358
|
|
|
359
|
+
ALL_QUERIES["methods"] = {
|
|
360
|
+
"query": JAVA_QUERIES["method"],
|
|
361
|
+
"description": "Search all method declarations (alias for method)",
|
|
362
|
+
}
|
|
363
|
+
|
|
359
364
|
ALL_QUERIES["classes"] = {
|
|
360
365
|
"query": JAVA_QUERIES["class"],
|
|
361
366
|
"description": "Search all class declarations (alias for class)",
|
|
@@ -189,7 +189,15 @@ class ProjectBoundaryManager:
|
|
|
189
189
|
if not os.path.exists(file_path):
|
|
190
190
|
return True # Non-existent files are safe
|
|
191
191
|
|
|
192
|
-
#
|
|
192
|
+
# If the fully resolved path is within project boundaries, we treat it as safe.
|
|
193
|
+
# This makes the check tolerant to system-level symlinks like
|
|
194
|
+
# /var -> /private/var on macOS runners.
|
|
195
|
+
resolved = os.path.realpath(file_path)
|
|
196
|
+
if self.is_within_project(resolved):
|
|
197
|
+
return True
|
|
198
|
+
|
|
199
|
+
# Otherwise, inspect each path component symlink to ensure no hop jumps outside
|
|
200
|
+
# the allowed directories.
|
|
193
201
|
path_parts = Path(file_path).parts
|
|
194
202
|
current_path = ""
|
|
195
203
|
|
|
@@ -199,7 +207,6 @@ class ProjectBoundaryManager:
|
|
|
199
207
|
)
|
|
200
208
|
|
|
201
209
|
if os.path.islink(current_path):
|
|
202
|
-
# Check if symlink target is within boundaries
|
|
203
210
|
target = os.path.realpath(current_path)
|
|
204
211
|
if not self.is_within_project(target):
|
|
205
212
|
log_warning(
|
|
@@ -207,6 +214,7 @@ class ProjectBoundaryManager:
|
|
|
207
214
|
)
|
|
208
215
|
return False
|
|
209
216
|
|
|
217
|
+
# If no unsafe hop found, consider safe
|
|
210
218
|
return True
|
|
211
219
|
|
|
212
220
|
except Exception as e:
|
|
@@ -616,7 +616,12 @@ class TableFormatter:
|
|
|
616
616
|
type_name = str(type_name)
|
|
617
617
|
|
|
618
618
|
# At this point, type_name is guaranteed to be a string
|
|
619
|
-
|
|
619
|
+
# Defensive check (avoid using assert for runtime safety and security checks)
|
|
620
|
+
if not isinstance(type_name, str):
|
|
621
|
+
try:
|
|
622
|
+
type_name = str(type_name)
|
|
623
|
+
except Exception:
|
|
624
|
+
type_name = "O"
|
|
620
625
|
|
|
621
626
|
type_mapping = {
|
|
622
627
|
"String": "S",
|
tree_sitter_analyzer/utils.py
CHANGED
|
@@ -51,10 +51,13 @@ def setup_logger(
|
|
|
51
51
|
logger.addHandler(file_handler)
|
|
52
52
|
except Exception as e:
|
|
53
53
|
# Never let logging configuration break runtime behavior; log to stderr if possible
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
54
|
+
if hasattr(sys, "stderr") and hasattr(sys.stderr, "write"):
|
|
55
|
+
try:
|
|
56
|
+
sys.stderr.write(
|
|
57
|
+
f"[logging_setup] file handler init skipped: {e}\n"
|
|
58
|
+
)
|
|
59
|
+
except Exception:
|
|
60
|
+
...
|
|
58
61
|
|
|
59
62
|
logger.setLevel(level)
|
|
60
63
|
|
|
@@ -111,17 +114,19 @@ def setup_safe_logging_shutdown() -> None:
|
|
|
111
114
|
handler.close()
|
|
112
115
|
logger.removeHandler(handler)
|
|
113
116
|
except Exception as e:
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
117
|
+
if hasattr(sys, "stderr") and hasattr(sys.stderr, "write"):
|
|
118
|
+
try:
|
|
119
|
+
sys.stderr.write(
|
|
120
|
+
f"[logging_cleanup] handler close/remove skipped: {e}\n"
|
|
121
|
+
)
|
|
122
|
+
except Exception:
|
|
123
|
+
...
|
|
120
124
|
except Exception as e:
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
+
if hasattr(sys, "stderr") and hasattr(sys.stderr, "write"):
|
|
126
|
+
try:
|
|
127
|
+
sys.stderr.write(f"[logging_cleanup] cleanup skipped: {e}\n")
|
|
128
|
+
except Exception:
|
|
129
|
+
...
|
|
125
130
|
|
|
126
131
|
# Register cleanup function
|
|
127
132
|
atexit.register(cleanup_logging)
|
|
@@ -140,10 +145,11 @@ def log_info(message: str, *args: Any, **kwargs: Any) -> None:
|
|
|
140
145
|
try:
|
|
141
146
|
logger.info(message, *args, **kwargs)
|
|
142
147
|
except (ValueError, OSError) as e:
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
148
|
+
if hasattr(sys, "stderr") and hasattr(sys.stderr, "write"):
|
|
149
|
+
try:
|
|
150
|
+
sys.stderr.write(f"[log_info] suppressed: {e}\n")
|
|
151
|
+
except Exception:
|
|
152
|
+
...
|
|
147
153
|
|
|
148
154
|
|
|
149
155
|
def log_warning(message: str, *args: Any, **kwargs: Any) -> None:
|
|
@@ -151,10 +157,11 @@ def log_warning(message: str, *args: Any, **kwargs: Any) -> None:
|
|
|
151
157
|
try:
|
|
152
158
|
logger.warning(message, *args, **kwargs)
|
|
153
159
|
except (ValueError, OSError) as e:
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
160
|
+
if hasattr(sys, "stderr") and hasattr(sys.stderr, "write"):
|
|
161
|
+
try:
|
|
162
|
+
sys.stderr.write(f"[log_warning] suppressed: {e}\n")
|
|
163
|
+
except Exception:
|
|
164
|
+
...
|
|
158
165
|
|
|
159
166
|
|
|
160
167
|
def log_error(message: str, *args: Any, **kwargs: Any) -> None:
|
|
@@ -162,10 +169,11 @@ def log_error(message: str, *args: Any, **kwargs: Any) -> None:
|
|
|
162
169
|
try:
|
|
163
170
|
logger.error(message, *args, **kwargs)
|
|
164
171
|
except (ValueError, OSError) as e:
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
172
|
+
if hasattr(sys, "stderr") and hasattr(sys.stderr, "write"):
|
|
173
|
+
try:
|
|
174
|
+
sys.stderr.write(f"[log_error] suppressed: {e}\n")
|
|
175
|
+
except Exception:
|
|
176
|
+
...
|
|
169
177
|
|
|
170
178
|
|
|
171
179
|
def log_debug(message: str, *args: Any, **kwargs: Any) -> None:
|
|
@@ -173,10 +181,11 @@ def log_debug(message: str, *args: Any, **kwargs: Any) -> None:
|
|
|
173
181
|
try:
|
|
174
182
|
logger.debug(message, *args, **kwargs)
|
|
175
183
|
except (ValueError, OSError) as e:
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
184
|
+
if hasattr(sys, "stderr") and hasattr(sys.stderr, "write"):
|
|
185
|
+
try:
|
|
186
|
+
sys.stderr.write(f"[log_debug] suppressed: {e}\n")
|
|
187
|
+
except Exception:
|
|
188
|
+
...
|
|
180
189
|
|
|
181
190
|
|
|
182
191
|
def suppress_output(func: Any) -> Any:
|
|
@@ -199,12 +208,13 @@ def suppress_output(func: Any) -> Any:
|
|
|
199
208
|
try:
|
|
200
209
|
sys.stdout.close()
|
|
201
210
|
except Exception as e:
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
211
|
+
if hasattr(sys, "stderr") and hasattr(sys.stderr, "write"):
|
|
212
|
+
try:
|
|
213
|
+
sys.stderr.write(
|
|
214
|
+
f"[suppress_output] stdout close suppressed: {e}\n"
|
|
215
|
+
)
|
|
216
|
+
except Exception:
|
|
217
|
+
...
|
|
208
218
|
sys.stdout = old_stdout
|
|
209
219
|
|
|
210
220
|
return result
|
|
@@ -282,10 +292,11 @@ def log_performance(
|
|
|
282
292
|
message += f" - {detail_str}"
|
|
283
293
|
perf_logger.debug(message) # Change to DEBUG level
|
|
284
294
|
except (ValueError, OSError) as e:
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
295
|
+
if hasattr(sys, "stderr") and hasattr(sys.stderr, "write"):
|
|
296
|
+
try:
|
|
297
|
+
sys.stderr.write(f"[log_performance] suppressed: {e}\n")
|
|
298
|
+
except Exception:
|
|
299
|
+
...
|
|
289
300
|
|
|
290
301
|
|
|
291
302
|
def setup_performance_logger() -> logging.Logger:
|