PyPI - tree-sitter-analyzer - Versions diffs - 1.6.1.2__py3-none-any.whl → 1.6.1.4__py3-none-any.whl - Mend

tree-sitter-analyzer 1.6.1.2py3-none-any.whl → 1.6.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (17) hide show

tree_sitter_analyzer/__init__.py CHANGED Viewed

@@ -11,7 +11,7 @@ Architecture:
 - Data Models: Generic and language-specific code element representations
 """
-__version__ = "1.6.1.2"
+__version__ = "1.6.1.4"
 __author__ = "aisheng.yu"
 __email__ = "aimasteracc@gmail.com"

tree_sitter_analyzer/core/query.py CHANGED Viewed

@@ -10,7 +10,7 @@ import logging
 import time
 from typing import Any
-from tree_sitter import Language, Node, Tree
+from tree_sitter import Language, Node, Query, QueryCursor, Tree
 from ..query_loader import get_query_loader
@@ -77,10 +77,11 @@ class QueryExecutor:
                     f"Query '{query_name}' not found", query_name=query_name
                 )
-            # Create and execute the query
+            # Create and execute the query using new API (tree-sitter 0.25.0+)
             try:
-                query = language.query(query_string)
-                captures = query.captures(tree.root_node)
+                query = Query(language, query_string)
+                cursor = QueryCursor(query)
+                captures = list(cursor.captures(tree.root_node))
                 # Process captures
                 try:
@@ -146,10 +147,11 @@ class QueryExecutor:
             if language is None:
                 return self._create_error_result("Language is None")  # type: ignore[unreachable]
-            # Create and execute the query
+            # Create and execute the query using new API (tree-sitter 0.25.0+)
             try:
-                query = language.query(query_string)
-                captures = query.captures(tree.root_node)
+                query = Query(language, query_string)
+                cursor = QueryCursor(query)
+                captures = list(cursor.captures(tree.root_node))
                 # Process captures
                 try:
@@ -373,8 +375,8 @@ class QueryExecutor:
             if lang_obj is None:
                 return False
-            # Try to create the query
-            lang_obj.query(query_string)
+            # Try to create the query using new API (tree-sitter 0.25.0+)
+            Query(lang_obj, query_string)
             return True
         except Exception as e:

tree_sitter_analyzer/core/query_service.py CHANGED Viewed

@@ -9,6 +9,8 @@ Provides core tree-sitter query functionality including predefined and custom qu
 import logging
 from typing import Any
+from tree_sitter import Query, QueryCursor
 from ..encoding_utils import read_file_safe
 from ..query_loader import query_loader
 from .parser import Parser
@@ -80,23 +82,18 @@ class QueryService:
                         f"Query '{query_key}' not found for language '{language}'"
                     )
-            # Execute tree-sitter query
-            ts_query = language_obj.query(query_string)
-            captures = ts_query.captures(tree.root_node)
+            # Execute tree-sitter query using new API (tree-sitter 0.25.0+)
+            ts_query = Query(language_obj, query_string)
+            cursor = QueryCursor(ts_query)
+            matches = cursor.matches(tree.root_node)
-            # Process capture results
+            # Process match results (new API returns list of (pattern_index, captures_dict))
             results = []
-            if isinstance(captures, dict):
-                # New tree-sitter API returns dictionary
-                for capture_name, nodes in captures.items():
+            for pattern_index, captures_dict in matches:
+                # captures_dict is {capture_name: [node1, node2, ...]}
+                for capture_name, nodes in captures_dict.items():
                     for node in nodes:
                         results.append(self._create_result_dict(node, capture_name))
-            else:
-                # Old tree-sitter API returns list of tuples
-                for capture in captures:
-                    if isinstance(capture, tuple) and len(capture) == 2:
-                        node, name = capture
-                        results.append(self._create_result_dict(node, name))
             # Apply filters
             if filter_expression and results:

tree_sitter_analyzer/encoding_utils.py CHANGED Viewed

@@ -456,6 +456,61 @@ def extract_text_slice(
     )
+def read_file_safe_streaming(file_path: str | Path):
+    """
+    Context manager for streaming file reading with automatic encoding detection.
+    This function opens a file with the correct encoding detected from the file's
+    content and yields a file handle that can be used for line-by-line reading.
+    This is memory-efficient for large files as it doesn't load the entire content.
+    Args:
+        file_path: Path to the file to read
+    Yields:
+        File handle opened with the correct encoding
+    Example:
+        with read_file_safe_streaming("large_file.txt") as f:
+            for line_num, line in enumerate(f, 1):
+                if line_num >= start_line:
+                    # Process line
+                    pass
+    """
+    import contextlib
+    file_path = Path(file_path)
+    # First, detect encoding by reading a small sample
+    try:
+        with open(file_path, "rb") as f:
+            # Read first 8KB to detect encoding
+            sample_data = f.read(8192)
+        if not sample_data:
+            # Empty file, use default encoding
+            detected_encoding = EncodingManager.DEFAULT_ENCODING
+        else:
+            # Detect encoding from sample with file path for caching
+            detected_encoding = EncodingManager.detect_encoding(sample_data, str(file_path))
+    except OSError as e:
+        log_warning(f"Failed to read file for encoding detection {file_path}: {e}")
+        raise e
+    # Open file with detected encoding for streaming
+    @contextlib.contextmanager
+    def _file_context():
+        try:
+            with open(file_path, "r", encoding=detected_encoding, errors="replace") as f:
+                yield f
+        except OSError as e:
+            log_warning(f"Failed to open file for streaming {file_path}: {e}")
+            raise e
+    return _file_context()
 def clear_encoding_cache() -> None:
     """Clear the global encoding cache"""
     _encoding_cache.clear()

tree_sitter_analyzer/file_handler.py CHANGED Viewed

@@ -5,9 +5,10 @@ File Handler Module
 This module provides file reading functionality with encoding detection and fallback.
 """
+import itertools
 from pathlib import Path
-from .encoding_utils import read_file_safe
+from .encoding_utils import read_file_safe, read_file_safe_streaming
 from .utils import log_error, log_info, log_warning
@@ -81,7 +82,10 @@ def read_file_partial(
     end_column: int | None = None,
 ) -> str | None:
     """
-    Read partial file content by line/column range
+    Read partial file content by line/column range using streaming approach.
+    This function uses a memory-efficient streaming approach that reads only
+    the required lines from the file, making it suitable for very large files.
     Args:
         file_path: Path to file
@@ -109,30 +113,39 @@ def read_file_partial(
         return None
     try:
-        # Read whole file safely
-        content, detected_encoding = read_file_safe(file_path)
-        # Split to lines
-        lines = content.splitlines(keepends=True)
-        total_lines = len(lines)
-        # Adjust line indexes
-        start_idx = start_line - 1  # convert to 0-based
-        end_idx = min(
-            end_line - 1 if end_line is not None else total_lines - 1, total_lines - 1
-        )
-        # Range check
-        if start_idx >= total_lines:
-            log_warning(
-                f"start_line ({start_line}) exceeds file length ({total_lines})"
-            )
-            return ""
-        # Select lines
-        selected_lines = lines[start_idx : end_idx + 1]
+        # Use streaming approach for memory efficiency
+        with read_file_safe_streaming(file_path) as f:
+            # Convert to 0-based indexing
+            start_idx = start_line - 1
+            end_idx = end_line - 1 if end_line is not None else None
+            # Use itertools.islice for efficient line selection
+            if end_idx is not None:
+                # Read specific range
+                selected_lines_iter = itertools.islice(f, start_idx, end_idx + 1)
+            else:
+                # Read from start_line to end of file
+                selected_lines_iter = itertools.islice(f, start_idx, None)
+            # Convert iterator to list for processing
+            selected_lines = list(selected_lines_iter)
+            # Check if we got any lines
+            if not selected_lines:
+                # Check if start_line is beyond file length by counting lines
+                with read_file_safe_streaming(file_path) as f_count:
+                    total_lines = sum(1 for _ in f_count)
+                if start_idx >= total_lines:
+                    log_warning(
+                        f"start_line ({start_line}) exceeds file length ({total_lines})"
+                    )
+                    return ""
+                else:
+                    # File might be empty or other issue
+                    return ""
-        # Handle column range
+        # Handle column range if specified
         if start_column is not None or end_column is not None:
             processed_lines = []
             for i, line in enumerate(selected_lines):
@@ -167,7 +180,7 @@ def read_file_partial(
                 # Preserve original newline (except last line)
                 if i < len(selected_lines) - 1:
                     # Detect original newline char of the line
-                    original_line = lines[start_idx + i]
+                    original_line = selected_lines[i]
                     if original_line.endswith("\r\n"):
                         line_content += "\r\n"
                     elif original_line.endswith("\n"):
@@ -182,9 +195,12 @@ def read_file_partial(
             # No column range: join lines directly
             result = "".join(selected_lines)
+        # Calculate end line for logging
+        actual_end_line = end_line or (start_line + len(selected_lines) - 1)
         log_info(
             f"Successfully read partial file {file_path}: "
-            f"lines {start_line}-{end_line or total_lines}"
+            f"lines {start_line}-{actual_end_line}"
             f"{f', columns {start_column}-{end_column}' if start_column is not None or end_column is not None else ''}"
         )

tree_sitter_analyzer/languages/python_plugin.py CHANGED Viewed

@@ -15,6 +15,7 @@ if TYPE_CHECKING:
 try:
     import tree_sitter
+    from tree_sitter import Query, QueryCursor
     TREE_SITTER_AVAILABLE = True
 except ImportError:
@@ -113,17 +114,23 @@ class PythonElementExtractor(ElementExtractor):
             language = tree.language if hasattr(tree, "language") else None
             if language:
-                query = language.query(class_query)
-                captures = query.captures(tree.root_node)
-                if isinstance(captures, dict):
-                    class_bodies = captures.get("class.body", [])
-                    # For each class body, extract attribute assignments
-                    for class_body in class_bodies:
-                        variables.extend(
-                            self._extract_class_attributes(class_body, source_code)
-                        )
+                # Use new API (tree-sitter 0.25.0+)
+                query = Query(language, class_query)
+                cursor = QueryCursor(query)
+                matches = cursor.matches(tree.root_node)
+                # Process matches to get class bodies
+                class_bodies = []
+                for pattern_index, captures_dict in matches:
+                    for capture_name, nodes in captures_dict.items():
+                        if capture_name == "class.body":
+                            class_bodies.extend(nodes)
+                # For each class body, extract attribute assignments
+                for class_body in class_bodies:
+                    variables.extend(
+                        self._extract_class_attributes(class_body, source_code)
+                    )
         except Exception as e:
             log_warning(f"Could not extract Python class attributes: {e}")
@@ -664,20 +671,29 @@ class PythonElementExtractor(ElementExtractor):
             language = tree.language if hasattr(tree, "language") else None
             if language:
                 for query_string in import_queries:
-                    query = language.query(query_string)
-                    captures = query.captures(tree.root_node)
-                    if isinstance(captures, dict):
-                        # Process different types of imports
-                        for key, nodes in captures.items():
-                            if key.endswith("statement"):
-                                import_type = key.split(".")[0]
-                                for node in nodes:
-                                    imp = self._extract_import_info(
-                                        node, source_code, import_type
-                                    )
-                                    if imp:
-                                        imports.append(imp)
+                    # Use new API (tree-sitter 0.25.0+)
+                    query = Query(language, query_string)
+                    cursor = QueryCursor(query)
+                    matches = cursor.matches(tree.root_node)
+                    # Process matches to get statement nodes
+                    statement_nodes = {}
+                    for pattern_index, captures_dict in matches:
+                        for capture_name, nodes in captures_dict.items():
+                            if capture_name.endswith("statement"):
+                                import_type = capture_name.split(".")[0]
+                                if import_type not in statement_nodes:
+                                    statement_nodes[import_type] = []
+                                statement_nodes[import_type].extend(nodes)
+                    # Process different types of imports
+                    for import_type, nodes in statement_nodes.items():
+                        for node in nodes:
+                            imp = self._extract_import_info(
+                                node, source_code, import_type
+                            )
+                            if imp:
+                                imports.append(imp)
         except Exception as e:
             log_warning(f"Could not extract Python imports: {e}")
@@ -1179,8 +1195,16 @@ class PythonPlugin(LanguagePlugin):
             else:
                 return {"error": f"Unknown query: {query_name}"}
-            query = language.query(query_string)
-            captures = query.captures(tree.root_node)
+            # Use new API (tree-sitter 0.25.0+)
+            query = Query(language, query_string)
+            cursor = QueryCursor(query)
+            matches = list(cursor.matches(tree.root_node))
+            # Convert matches to legacy format for compatibility
+            captures = []
+            for pattern_index, captures_dict in matches:
+                for capture_name, nodes in captures_dict.items():
+                    for node in nodes:
+                        captures.append((node, capture_name))
             return {"captures": captures, "query": query_string}
         except Exception as e:

tree-sitter-analyzer 1.6.1.2__py3-none-any.whl → 1.6.1.4__py3-none-any.whl

Potentially problematic release.

tree-sitter-analyzer 1.6.1.2py3-none-any.whl → 1.6.1.4py3-none-any.whl