tree-sitter-analyzer 1.6.1.2__py3-none-any.whl → 1.6.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tree-sitter-analyzer might be problematic. Click here for more details.
- tree_sitter_analyzer/__init__.py +1 -1
- tree_sitter_analyzer/core/query.py +11 -9
- tree_sitter_analyzer/core/query_service.py +10 -13
- tree_sitter_analyzer/encoding_utils.py +55 -0
- tree_sitter_analyzer/file_handler.py +43 -27
- tree_sitter_analyzer/languages/python_plugin.py +51 -27
- tree_sitter_analyzer/logging_manager.py +361 -0
- tree_sitter_analyzer/mcp/server.py +1 -1
- tree_sitter_analyzer/mcp/tools/output_format_validator.py +147 -0
- tree_sitter_analyzer/mcp/tools/search_content_tool.py +41 -8
- tree_sitter_analyzer/mcp/utils/file_output_manager.py +74 -1
- tree_sitter_analyzer/mcp/utils/search_cache.py +9 -0
- tree_sitter_analyzer/utils.py +38 -203
- {tree_sitter_analyzer-1.6.1.2.dist-info → tree_sitter_analyzer-1.6.1.4.dist-info}/METADATA +7 -7
- {tree_sitter_analyzer-1.6.1.2.dist-info → tree_sitter_analyzer-1.6.1.4.dist-info}/RECORD +17 -15
- {tree_sitter_analyzer-1.6.1.2.dist-info → tree_sitter_analyzer-1.6.1.4.dist-info}/WHEEL +0 -0
- {tree_sitter_analyzer-1.6.1.2.dist-info → tree_sitter_analyzer-1.6.1.4.dist-info}/entry_points.txt +0 -0
tree_sitter_analyzer/__init__.py
CHANGED
|
@@ -10,7 +10,7 @@ import logging
|
|
|
10
10
|
import time
|
|
11
11
|
from typing import Any
|
|
12
12
|
|
|
13
|
-
from tree_sitter import Language, Node, Tree
|
|
13
|
+
from tree_sitter import Language, Node, Query, QueryCursor, Tree
|
|
14
14
|
|
|
15
15
|
from ..query_loader import get_query_loader
|
|
16
16
|
|
|
@@ -77,10 +77,11 @@ class QueryExecutor:
|
|
|
77
77
|
f"Query '{query_name}' not found", query_name=query_name
|
|
78
78
|
)
|
|
79
79
|
|
|
80
|
-
# Create and execute the query
|
|
80
|
+
# Create and execute the query using new API (tree-sitter 0.25.0+)
|
|
81
81
|
try:
|
|
82
|
-
query = language
|
|
83
|
-
|
|
82
|
+
query = Query(language, query_string)
|
|
83
|
+
cursor = QueryCursor(query)
|
|
84
|
+
captures = list(cursor.captures(tree.root_node))
|
|
84
85
|
|
|
85
86
|
# Process captures
|
|
86
87
|
try:
|
|
@@ -146,10 +147,11 @@ class QueryExecutor:
|
|
|
146
147
|
if language is None:
|
|
147
148
|
return self._create_error_result("Language is None") # type: ignore[unreachable]
|
|
148
149
|
|
|
149
|
-
# Create and execute the query
|
|
150
|
+
# Create and execute the query using new API (tree-sitter 0.25.0+)
|
|
150
151
|
try:
|
|
151
|
-
query = language
|
|
152
|
-
|
|
152
|
+
query = Query(language, query_string)
|
|
153
|
+
cursor = QueryCursor(query)
|
|
154
|
+
captures = list(cursor.captures(tree.root_node))
|
|
153
155
|
|
|
154
156
|
# Process captures
|
|
155
157
|
try:
|
|
@@ -373,8 +375,8 @@ class QueryExecutor:
|
|
|
373
375
|
if lang_obj is None:
|
|
374
376
|
return False
|
|
375
377
|
|
|
376
|
-
# Try to create the query
|
|
377
|
-
lang_obj
|
|
378
|
+
# Try to create the query using new API (tree-sitter 0.25.0+)
|
|
379
|
+
Query(lang_obj, query_string)
|
|
378
380
|
return True
|
|
379
381
|
|
|
380
382
|
except Exception as e:
|
|
@@ -9,6 +9,8 @@ Provides core tree-sitter query functionality including predefined and custom qu
|
|
|
9
9
|
import logging
|
|
10
10
|
from typing import Any
|
|
11
11
|
|
|
12
|
+
from tree_sitter import Query, QueryCursor
|
|
13
|
+
|
|
12
14
|
from ..encoding_utils import read_file_safe
|
|
13
15
|
from ..query_loader import query_loader
|
|
14
16
|
from .parser import Parser
|
|
@@ -80,23 +82,18 @@ class QueryService:
|
|
|
80
82
|
f"Query '{query_key}' not found for language '{language}'"
|
|
81
83
|
)
|
|
82
84
|
|
|
83
|
-
# Execute tree-sitter query
|
|
84
|
-
ts_query = language_obj
|
|
85
|
-
|
|
85
|
+
# Execute tree-sitter query using new API (tree-sitter 0.25.0+)
|
|
86
|
+
ts_query = Query(language_obj, query_string)
|
|
87
|
+
cursor = QueryCursor(ts_query)
|
|
88
|
+
matches = cursor.matches(tree.root_node)
|
|
86
89
|
|
|
87
|
-
# Process
|
|
90
|
+
# Process match results (new API returns list of (pattern_index, captures_dict))
|
|
88
91
|
results = []
|
|
89
|
-
|
|
90
|
-
#
|
|
91
|
-
for capture_name, nodes in
|
|
92
|
+
for pattern_index, captures_dict in matches:
|
|
93
|
+
# captures_dict is {capture_name: [node1, node2, ...]}
|
|
94
|
+
for capture_name, nodes in captures_dict.items():
|
|
92
95
|
for node in nodes:
|
|
93
96
|
results.append(self._create_result_dict(node, capture_name))
|
|
94
|
-
else:
|
|
95
|
-
# Old tree-sitter API returns list of tuples
|
|
96
|
-
for capture in captures:
|
|
97
|
-
if isinstance(capture, tuple) and len(capture) == 2:
|
|
98
|
-
node, name = capture
|
|
99
|
-
results.append(self._create_result_dict(node, name))
|
|
100
97
|
|
|
101
98
|
# Apply filters
|
|
102
99
|
if filter_expression and results:
|
|
@@ -456,6 +456,61 @@ def extract_text_slice(
|
|
|
456
456
|
)
|
|
457
457
|
|
|
458
458
|
|
|
459
|
+
def read_file_safe_streaming(file_path: str | Path):
|
|
460
|
+
"""
|
|
461
|
+
Context manager for streaming file reading with automatic encoding detection.
|
|
462
|
+
|
|
463
|
+
This function opens a file with the correct encoding detected from the file's
|
|
464
|
+
content and yields a file handle that can be used for line-by-line reading.
|
|
465
|
+
This is memory-efficient for large files as it doesn't load the entire content.
|
|
466
|
+
|
|
467
|
+
Args:
|
|
468
|
+
file_path: Path to the file to read
|
|
469
|
+
|
|
470
|
+
Yields:
|
|
471
|
+
File handle opened with the correct encoding
|
|
472
|
+
|
|
473
|
+
Example:
|
|
474
|
+
with read_file_safe_streaming("large_file.txt") as f:
|
|
475
|
+
for line_num, line in enumerate(f, 1):
|
|
476
|
+
if line_num >= start_line:
|
|
477
|
+
# Process line
|
|
478
|
+
pass
|
|
479
|
+
"""
|
|
480
|
+
import contextlib
|
|
481
|
+
|
|
482
|
+
file_path = Path(file_path)
|
|
483
|
+
|
|
484
|
+
# First, detect encoding by reading a small sample
|
|
485
|
+
try:
|
|
486
|
+
with open(file_path, "rb") as f:
|
|
487
|
+
# Read first 8KB to detect encoding
|
|
488
|
+
sample_data = f.read(8192)
|
|
489
|
+
|
|
490
|
+
if not sample_data:
|
|
491
|
+
# Empty file, use default encoding
|
|
492
|
+
detected_encoding = EncodingManager.DEFAULT_ENCODING
|
|
493
|
+
else:
|
|
494
|
+
# Detect encoding from sample with file path for caching
|
|
495
|
+
detected_encoding = EncodingManager.detect_encoding(sample_data, str(file_path))
|
|
496
|
+
|
|
497
|
+
except OSError as e:
|
|
498
|
+
log_warning(f"Failed to read file for encoding detection {file_path}: {e}")
|
|
499
|
+
raise e
|
|
500
|
+
|
|
501
|
+
# Open file with detected encoding for streaming
|
|
502
|
+
@contextlib.contextmanager
|
|
503
|
+
def _file_context():
|
|
504
|
+
try:
|
|
505
|
+
with open(file_path, "r", encoding=detected_encoding, errors="replace") as f:
|
|
506
|
+
yield f
|
|
507
|
+
except OSError as e:
|
|
508
|
+
log_warning(f"Failed to open file for streaming {file_path}: {e}")
|
|
509
|
+
raise e
|
|
510
|
+
|
|
511
|
+
return _file_context()
|
|
512
|
+
|
|
513
|
+
|
|
459
514
|
def clear_encoding_cache() -> None:
|
|
460
515
|
"""Clear the global encoding cache"""
|
|
461
516
|
_encoding_cache.clear()
|
|
@@ -5,9 +5,10 @@ File Handler Module
|
|
|
5
5
|
This module provides file reading functionality with encoding detection and fallback.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
import itertools
|
|
8
9
|
from pathlib import Path
|
|
9
10
|
|
|
10
|
-
from .encoding_utils import read_file_safe
|
|
11
|
+
from .encoding_utils import read_file_safe, read_file_safe_streaming
|
|
11
12
|
from .utils import log_error, log_info, log_warning
|
|
12
13
|
|
|
13
14
|
|
|
@@ -81,7 +82,10 @@ def read_file_partial(
|
|
|
81
82
|
end_column: int | None = None,
|
|
82
83
|
) -> str | None:
|
|
83
84
|
"""
|
|
84
|
-
Read partial file content by line/column range
|
|
85
|
+
Read partial file content by line/column range using streaming approach.
|
|
86
|
+
|
|
87
|
+
This function uses a memory-efficient streaming approach that reads only
|
|
88
|
+
the required lines from the file, making it suitable for very large files.
|
|
85
89
|
|
|
86
90
|
Args:
|
|
87
91
|
file_path: Path to file
|
|
@@ -109,30 +113,39 @@ def read_file_partial(
|
|
|
109
113
|
return None
|
|
110
114
|
|
|
111
115
|
try:
|
|
112
|
-
#
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
116
|
+
# Use streaming approach for memory efficiency
|
|
117
|
+
with read_file_safe_streaming(file_path) as f:
|
|
118
|
+
# Convert to 0-based indexing
|
|
119
|
+
start_idx = start_line - 1
|
|
120
|
+
end_idx = end_line - 1 if end_line is not None else None
|
|
121
|
+
|
|
122
|
+
# Use itertools.islice for efficient line selection
|
|
123
|
+
if end_idx is not None:
|
|
124
|
+
# Read specific range
|
|
125
|
+
selected_lines_iter = itertools.islice(f, start_idx, end_idx + 1)
|
|
126
|
+
else:
|
|
127
|
+
# Read from start_line to end of file
|
|
128
|
+
selected_lines_iter = itertools.islice(f, start_idx, None)
|
|
129
|
+
|
|
130
|
+
# Convert iterator to list for processing
|
|
131
|
+
selected_lines = list(selected_lines_iter)
|
|
132
|
+
|
|
133
|
+
# Check if we got any lines
|
|
134
|
+
if not selected_lines:
|
|
135
|
+
# Check if start_line is beyond file length by counting lines
|
|
136
|
+
with read_file_safe_streaming(file_path) as f_count:
|
|
137
|
+
total_lines = sum(1 for _ in f_count)
|
|
138
|
+
|
|
139
|
+
if start_idx >= total_lines:
|
|
140
|
+
log_warning(
|
|
141
|
+
f"start_line ({start_line}) exceeds file length ({total_lines})"
|
|
142
|
+
)
|
|
143
|
+
return ""
|
|
144
|
+
else:
|
|
145
|
+
# File might be empty or other issue
|
|
146
|
+
return ""
|
|
134
147
|
|
|
135
|
-
# Handle column range
|
|
148
|
+
# Handle column range if specified
|
|
136
149
|
if start_column is not None or end_column is not None:
|
|
137
150
|
processed_lines = []
|
|
138
151
|
for i, line in enumerate(selected_lines):
|
|
@@ -167,7 +180,7 @@ def read_file_partial(
|
|
|
167
180
|
# Preserve original newline (except last line)
|
|
168
181
|
if i < len(selected_lines) - 1:
|
|
169
182
|
# Detect original newline char of the line
|
|
170
|
-
original_line =
|
|
183
|
+
original_line = selected_lines[i]
|
|
171
184
|
if original_line.endswith("\r\n"):
|
|
172
185
|
line_content += "\r\n"
|
|
173
186
|
elif original_line.endswith("\n"):
|
|
@@ -182,9 +195,12 @@ def read_file_partial(
|
|
|
182
195
|
# No column range: join lines directly
|
|
183
196
|
result = "".join(selected_lines)
|
|
184
197
|
|
|
198
|
+
# Calculate end line for logging
|
|
199
|
+
actual_end_line = end_line or (start_line + len(selected_lines) - 1)
|
|
200
|
+
|
|
185
201
|
log_info(
|
|
186
202
|
f"Successfully read partial file {file_path}: "
|
|
187
|
-
f"lines {start_line}-{
|
|
203
|
+
f"lines {start_line}-{actual_end_line}"
|
|
188
204
|
f"{f', columns {start_column}-{end_column}' if start_column is not None or end_column is not None else ''}"
|
|
189
205
|
)
|
|
190
206
|
|
|
@@ -15,6 +15,7 @@ if TYPE_CHECKING:
|
|
|
15
15
|
|
|
16
16
|
try:
|
|
17
17
|
import tree_sitter
|
|
18
|
+
from tree_sitter import Query, QueryCursor
|
|
18
19
|
|
|
19
20
|
TREE_SITTER_AVAILABLE = True
|
|
20
21
|
except ImportError:
|
|
@@ -113,17 +114,23 @@ class PythonElementExtractor(ElementExtractor):
|
|
|
113
114
|
|
|
114
115
|
language = tree.language if hasattr(tree, "language") else None
|
|
115
116
|
if language:
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
117
|
+
# Use new API (tree-sitter 0.25.0+)
|
|
118
|
+
query = Query(language, class_query)
|
|
119
|
+
cursor = QueryCursor(query)
|
|
120
|
+
matches = cursor.matches(tree.root_node)
|
|
121
|
+
|
|
122
|
+
# Process matches to get class bodies
|
|
123
|
+
class_bodies = []
|
|
124
|
+
for pattern_index, captures_dict in matches:
|
|
125
|
+
for capture_name, nodes in captures_dict.items():
|
|
126
|
+
if capture_name == "class.body":
|
|
127
|
+
class_bodies.extend(nodes)
|
|
128
|
+
|
|
129
|
+
# For each class body, extract attribute assignments
|
|
130
|
+
for class_body in class_bodies:
|
|
131
|
+
variables.extend(
|
|
132
|
+
self._extract_class_attributes(class_body, source_code)
|
|
133
|
+
)
|
|
127
134
|
|
|
128
135
|
except Exception as e:
|
|
129
136
|
log_warning(f"Could not extract Python class attributes: {e}")
|
|
@@ -664,20 +671,29 @@ class PythonElementExtractor(ElementExtractor):
|
|
|
664
671
|
language = tree.language if hasattr(tree, "language") else None
|
|
665
672
|
if language:
|
|
666
673
|
for query_string in import_queries:
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
674
|
+
# Use new API (tree-sitter 0.25.0+)
|
|
675
|
+
query = Query(language, query_string)
|
|
676
|
+
cursor = QueryCursor(query)
|
|
677
|
+
matches = cursor.matches(tree.root_node)
|
|
678
|
+
|
|
679
|
+
# Process matches to get statement nodes
|
|
680
|
+
statement_nodes = {}
|
|
681
|
+
for pattern_index, captures_dict in matches:
|
|
682
|
+
for capture_name, nodes in captures_dict.items():
|
|
683
|
+
if capture_name.endswith("statement"):
|
|
684
|
+
import_type = capture_name.split(".")[0]
|
|
685
|
+
if import_type not in statement_nodes:
|
|
686
|
+
statement_nodes[import_type] = []
|
|
687
|
+
statement_nodes[import_type].extend(nodes)
|
|
688
|
+
|
|
689
|
+
# Process different types of imports
|
|
690
|
+
for import_type, nodes in statement_nodes.items():
|
|
691
|
+
for node in nodes:
|
|
692
|
+
imp = self._extract_import_info(
|
|
693
|
+
node, source_code, import_type
|
|
694
|
+
)
|
|
695
|
+
if imp:
|
|
696
|
+
imports.append(imp)
|
|
681
697
|
|
|
682
698
|
except Exception as e:
|
|
683
699
|
log_warning(f"Could not extract Python imports: {e}")
|
|
@@ -1179,8 +1195,16 @@ class PythonPlugin(LanguagePlugin):
|
|
|
1179
1195
|
else:
|
|
1180
1196
|
return {"error": f"Unknown query: {query_name}"}
|
|
1181
1197
|
|
|
1182
|
-
|
|
1183
|
-
|
|
1198
|
+
# Use new API (tree-sitter 0.25.0+)
|
|
1199
|
+
query = Query(language, query_string)
|
|
1200
|
+
cursor = QueryCursor(query)
|
|
1201
|
+
matches = list(cursor.matches(tree.root_node))
|
|
1202
|
+
# Convert matches to legacy format for compatibility
|
|
1203
|
+
captures = []
|
|
1204
|
+
for pattern_index, captures_dict in matches:
|
|
1205
|
+
for capture_name, nodes in captures_dict.items():
|
|
1206
|
+
for node in nodes:
|
|
1207
|
+
captures.append((node, capture_name))
|
|
1184
1208
|
return {"captures": captures, "query": query_string}
|
|
1185
1209
|
|
|
1186
1210
|
except Exception as e:
|