tree-sitter-analyzer 1.6.1.2__py3-none-any.whl → 1.6.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

@@ -11,7 +11,7 @@ Architecture:
11
11
  - Data Models: Generic and language-specific code element representations
12
12
  """
13
13
 
14
- __version__ = "1.6.1.2"
14
+ __version__ = "1.6.1.4"
15
15
  __author__ = "aisheng.yu"
16
16
  __email__ = "aimasteracc@gmail.com"
17
17
 
@@ -10,7 +10,7 @@ import logging
10
10
  import time
11
11
  from typing import Any
12
12
 
13
- from tree_sitter import Language, Node, Tree
13
+ from tree_sitter import Language, Node, Query, QueryCursor, Tree
14
14
 
15
15
  from ..query_loader import get_query_loader
16
16
 
@@ -77,10 +77,11 @@ class QueryExecutor:
77
77
  f"Query '{query_name}' not found", query_name=query_name
78
78
  )
79
79
 
80
- # Create and execute the query
80
+ # Create and execute the query using new API (tree-sitter 0.25.0+)
81
81
  try:
82
- query = language.query(query_string)
83
- captures = query.captures(tree.root_node)
82
+ query = Query(language, query_string)
83
+ cursor = QueryCursor(query)
84
+ captures = list(cursor.captures(tree.root_node))
84
85
 
85
86
  # Process captures
86
87
  try:
@@ -146,10 +147,11 @@ class QueryExecutor:
146
147
  if language is None:
147
148
  return self._create_error_result("Language is None") # type: ignore[unreachable]
148
149
 
149
- # Create and execute the query
150
+ # Create and execute the query using new API (tree-sitter 0.25.0+)
150
151
  try:
151
- query = language.query(query_string)
152
- captures = query.captures(tree.root_node)
152
+ query = Query(language, query_string)
153
+ cursor = QueryCursor(query)
154
+ captures = list(cursor.captures(tree.root_node))
153
155
 
154
156
  # Process captures
155
157
  try:
@@ -373,8 +375,8 @@ class QueryExecutor:
373
375
  if lang_obj is None:
374
376
  return False
375
377
 
376
- # Try to create the query
377
- lang_obj.query(query_string)
378
+ # Try to create the query using new API (tree-sitter 0.25.0+)
379
+ Query(lang_obj, query_string)
378
380
  return True
379
381
 
380
382
  except Exception as e:
@@ -9,6 +9,8 @@ Provides core tree-sitter query functionality including predefined and custom qu
9
9
  import logging
10
10
  from typing import Any
11
11
 
12
+ from tree_sitter import Query, QueryCursor
13
+
12
14
  from ..encoding_utils import read_file_safe
13
15
  from ..query_loader import query_loader
14
16
  from .parser import Parser
@@ -80,23 +82,18 @@ class QueryService:
80
82
  f"Query '{query_key}' not found for language '{language}'"
81
83
  )
82
84
 
83
- # Execute tree-sitter query
84
- ts_query = language_obj.query(query_string)
85
- captures = ts_query.captures(tree.root_node)
85
+ # Execute tree-sitter query using new API (tree-sitter 0.25.0+)
86
+ ts_query = Query(language_obj, query_string)
87
+ cursor = QueryCursor(ts_query)
88
+ matches = cursor.matches(tree.root_node)
86
89
 
87
- # Process capture results
90
+ # Process match results (new API returns list of (pattern_index, captures_dict))
88
91
  results = []
89
- if isinstance(captures, dict):
90
- # New tree-sitter API returns dictionary
91
- for capture_name, nodes in captures.items():
92
+ for pattern_index, captures_dict in matches:
93
+ # captures_dict is {capture_name: [node1, node2, ...]}
94
+ for capture_name, nodes in captures_dict.items():
92
95
  for node in nodes:
93
96
  results.append(self._create_result_dict(node, capture_name))
94
- else:
95
- # Old tree-sitter API returns list of tuples
96
- for capture in captures:
97
- if isinstance(capture, tuple) and len(capture) == 2:
98
- node, name = capture
99
- results.append(self._create_result_dict(node, name))
100
97
 
101
98
  # Apply filters
102
99
  if filter_expression and results:
@@ -456,6 +456,61 @@ def extract_text_slice(
456
456
  )
457
457
 
458
458
 
459
+ def read_file_safe_streaming(file_path: str | Path):
460
+ """
461
+ Context manager for streaming file reading with automatic encoding detection.
462
+
463
+ This function opens a file with the correct encoding detected from the file's
464
+ content and yields a file handle that can be used for line-by-line reading.
465
+ This is memory-efficient for large files as it doesn't load the entire content.
466
+
467
+ Args:
468
+ file_path: Path to the file to read
469
+
470
+ Yields:
471
+ File handle opened with the correct encoding
472
+
473
+ Example:
474
+ with read_file_safe_streaming("large_file.txt") as f:
475
+ for line_num, line in enumerate(f, 1):
476
+ if line_num >= start_line:
477
+ # Process line
478
+ pass
479
+ """
480
+ import contextlib
481
+
482
+ file_path = Path(file_path)
483
+
484
+ # First, detect encoding by reading a small sample
485
+ try:
486
+ with open(file_path, "rb") as f:
487
+ # Read first 8KB to detect encoding
488
+ sample_data = f.read(8192)
489
+
490
+ if not sample_data:
491
+ # Empty file, use default encoding
492
+ detected_encoding = EncodingManager.DEFAULT_ENCODING
493
+ else:
494
+ # Detect encoding from sample with file path for caching
495
+ detected_encoding = EncodingManager.detect_encoding(sample_data, str(file_path))
496
+
497
+ except OSError as e:
498
+ log_warning(f"Failed to read file for encoding detection {file_path}: {e}")
499
+ raise e
500
+
501
+ # Open file with detected encoding for streaming
502
+ @contextlib.contextmanager
503
+ def _file_context():
504
+ try:
505
+ with open(file_path, "r", encoding=detected_encoding, errors="replace") as f:
506
+ yield f
507
+ except OSError as e:
508
+ log_warning(f"Failed to open file for streaming {file_path}: {e}")
509
+ raise e
510
+
511
+ return _file_context()
512
+
513
+
459
514
  def clear_encoding_cache() -> None:
460
515
  """Clear the global encoding cache"""
461
516
  _encoding_cache.clear()
@@ -5,9 +5,10 @@ File Handler Module
5
5
  This module provides file reading functionality with encoding detection and fallback.
6
6
  """
7
7
 
8
+ import itertools
8
9
  from pathlib import Path
9
10
 
10
- from .encoding_utils import read_file_safe
11
+ from .encoding_utils import read_file_safe, read_file_safe_streaming
11
12
  from .utils import log_error, log_info, log_warning
12
13
 
13
14
 
@@ -81,7 +82,10 @@ def read_file_partial(
81
82
  end_column: int | None = None,
82
83
  ) -> str | None:
83
84
  """
84
- Read partial file content by line/column range
85
+ Read partial file content by line/column range using streaming approach.
86
+
87
+ This function uses a memory-efficient streaming approach that reads only
88
+ the required lines from the file, making it suitable for very large files.
85
89
 
86
90
  Args:
87
91
  file_path: Path to file
@@ -109,30 +113,39 @@ def read_file_partial(
109
113
  return None
110
114
 
111
115
  try:
112
- # Read whole file safely
113
- content, detected_encoding = read_file_safe(file_path)
114
-
115
- # Split to lines
116
- lines = content.splitlines(keepends=True)
117
- total_lines = len(lines)
118
-
119
- # Adjust line indexes
120
- start_idx = start_line - 1 # convert to 0-based
121
- end_idx = min(
122
- end_line - 1 if end_line is not None else total_lines - 1, total_lines - 1
123
- )
124
-
125
- # Range check
126
- if start_idx >= total_lines:
127
- log_warning(
128
- f"start_line ({start_line}) exceeds file length ({total_lines})"
129
- )
130
- return ""
131
-
132
- # Select lines
133
- selected_lines = lines[start_idx : end_idx + 1]
116
+ # Use streaming approach for memory efficiency
117
+ with read_file_safe_streaming(file_path) as f:
118
+ # Convert to 0-based indexing
119
+ start_idx = start_line - 1
120
+ end_idx = end_line - 1 if end_line is not None else None
121
+
122
+ # Use itertools.islice for efficient line selection
123
+ if end_idx is not None:
124
+ # Read specific range
125
+ selected_lines_iter = itertools.islice(f, start_idx, end_idx + 1)
126
+ else:
127
+ # Read from start_line to end of file
128
+ selected_lines_iter = itertools.islice(f, start_idx, None)
129
+
130
+ # Convert iterator to list for processing
131
+ selected_lines = list(selected_lines_iter)
132
+
133
+ # Check if we got any lines
134
+ if not selected_lines:
135
+ # Check if start_line is beyond file length by counting lines
136
+ with read_file_safe_streaming(file_path) as f_count:
137
+ total_lines = sum(1 for _ in f_count)
138
+
139
+ if start_idx >= total_lines:
140
+ log_warning(
141
+ f"start_line ({start_line}) exceeds file length ({total_lines})"
142
+ )
143
+ return ""
144
+ else:
145
+ # File might be empty or other issue
146
+ return ""
134
147
 
135
- # Handle column range
148
+ # Handle column range if specified
136
149
  if start_column is not None or end_column is not None:
137
150
  processed_lines = []
138
151
  for i, line in enumerate(selected_lines):
@@ -167,7 +180,7 @@ def read_file_partial(
167
180
  # Preserve original newline (except last line)
168
181
  if i < len(selected_lines) - 1:
169
182
  # Detect original newline char of the line
170
- original_line = lines[start_idx + i]
183
+ original_line = selected_lines[i]
171
184
  if original_line.endswith("\r\n"):
172
185
  line_content += "\r\n"
173
186
  elif original_line.endswith("\n"):
@@ -182,9 +195,12 @@ def read_file_partial(
182
195
  # No column range: join lines directly
183
196
  result = "".join(selected_lines)
184
197
 
198
+ # Calculate end line for logging
199
+ actual_end_line = end_line or (start_line + len(selected_lines) - 1)
200
+
185
201
  log_info(
186
202
  f"Successfully read partial file {file_path}: "
187
- f"lines {start_line}-{end_line or total_lines}"
203
+ f"lines {start_line}-{actual_end_line}"
188
204
  f"{f', columns {start_column}-{end_column}' if start_column is not None or end_column is not None else ''}"
189
205
  )
190
206
 
@@ -15,6 +15,7 @@ if TYPE_CHECKING:
15
15
 
16
16
  try:
17
17
  import tree_sitter
18
+ from tree_sitter import Query, QueryCursor
18
19
 
19
20
  TREE_SITTER_AVAILABLE = True
20
21
  except ImportError:
@@ -113,17 +114,23 @@ class PythonElementExtractor(ElementExtractor):
113
114
 
114
115
  language = tree.language if hasattr(tree, "language") else None
115
116
  if language:
116
- query = language.query(class_query)
117
- captures = query.captures(tree.root_node)
118
-
119
- if isinstance(captures, dict):
120
- class_bodies = captures.get("class.body", [])
121
-
122
- # For each class body, extract attribute assignments
123
- for class_body in class_bodies:
124
- variables.extend(
125
- self._extract_class_attributes(class_body, source_code)
126
- )
117
+ # Use new API (tree-sitter 0.25.0+)
118
+ query = Query(language, class_query)
119
+ cursor = QueryCursor(query)
120
+ matches = cursor.matches(tree.root_node)
121
+
122
+ # Process matches to get class bodies
123
+ class_bodies = []
124
+ for pattern_index, captures_dict in matches:
125
+ for capture_name, nodes in captures_dict.items():
126
+ if capture_name == "class.body":
127
+ class_bodies.extend(nodes)
128
+
129
+ # For each class body, extract attribute assignments
130
+ for class_body in class_bodies:
131
+ variables.extend(
132
+ self._extract_class_attributes(class_body, source_code)
133
+ )
127
134
 
128
135
  except Exception as e:
129
136
  log_warning(f"Could not extract Python class attributes: {e}")
@@ -664,20 +671,29 @@ class PythonElementExtractor(ElementExtractor):
664
671
  language = tree.language if hasattr(tree, "language") else None
665
672
  if language:
666
673
  for query_string in import_queries:
667
- query = language.query(query_string)
668
- captures = query.captures(tree.root_node)
669
-
670
- if isinstance(captures, dict):
671
- # Process different types of imports
672
- for key, nodes in captures.items():
673
- if key.endswith("statement"):
674
- import_type = key.split(".")[0]
675
- for node in nodes:
676
- imp = self._extract_import_info(
677
- node, source_code, import_type
678
- )
679
- if imp:
680
- imports.append(imp)
674
+ # Use new API (tree-sitter 0.25.0+)
675
+ query = Query(language, query_string)
676
+ cursor = QueryCursor(query)
677
+ matches = cursor.matches(tree.root_node)
678
+
679
+ # Process matches to get statement nodes
680
+ statement_nodes = {}
681
+ for pattern_index, captures_dict in matches:
682
+ for capture_name, nodes in captures_dict.items():
683
+ if capture_name.endswith("statement"):
684
+ import_type = capture_name.split(".")[0]
685
+ if import_type not in statement_nodes:
686
+ statement_nodes[import_type] = []
687
+ statement_nodes[import_type].extend(nodes)
688
+
689
+ # Process different types of imports
690
+ for import_type, nodes in statement_nodes.items():
691
+ for node in nodes:
692
+ imp = self._extract_import_info(
693
+ node, source_code, import_type
694
+ )
695
+ if imp:
696
+ imports.append(imp)
681
697
 
682
698
  except Exception as e:
683
699
  log_warning(f"Could not extract Python imports: {e}")
@@ -1179,8 +1195,16 @@ class PythonPlugin(LanguagePlugin):
1179
1195
  else:
1180
1196
  return {"error": f"Unknown query: {query_name}"}
1181
1197
 
1182
- query = language.query(query_string)
1183
- captures = query.captures(tree.root_node)
1198
+ # Use new API (tree-sitter 0.25.0+)
1199
+ query = Query(language, query_string)
1200
+ cursor = QueryCursor(query)
1201
+ matches = list(cursor.matches(tree.root_node))
1202
+ # Convert matches to legacy format for compatibility
1203
+ captures = []
1204
+ for pattern_index, captures_dict in matches:
1205
+ for capture_name, nodes in captures_dict.items():
1206
+ for node in nodes:
1207
+ captures.append((node, capture_name))
1184
1208
  return {"captures": captures, "query": query_string}
1185
1209
 
1186
1210
  except Exception as e: