PyPI - tree-sitter-analyzer - Versions diffs - 0.9.1__py3-none-any.whl → 0.9.3__py3-none-any.whl - Mend

tree-sitter-analyzer 0.9.1py3-none-any.whl → 0.9.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (64) hide show

tree_sitter_analyzer/__init__.py +132 -132
tree_sitter_analyzer/__main__.py +11 -11
tree_sitter_analyzer/api.py +533 -533
tree_sitter_analyzer/cli/__init__.py +39 -39
tree_sitter_analyzer/cli/__main__.py +12 -12
tree_sitter_analyzer/cli/commands/__init__.py +26 -26
tree_sitter_analyzer/cli/commands/advanced_command.py +88 -88
tree_sitter_analyzer/cli/commands/base_command.py +181 -178
tree_sitter_analyzer/cli/commands/structure_command.py +138 -138
tree_sitter_analyzer/cli/commands/summary_command.py +101 -101
tree_sitter_analyzer/cli_main.py +7 -3
tree_sitter_analyzer/core/__init__.py +15 -15
tree_sitter_analyzer/core/analysis_engine.py +91 -87
tree_sitter_analyzer/core/cache_service.py +320 -320
tree_sitter_analyzer/core/engine.py +566 -566
tree_sitter_analyzer/core/parser.py +293 -293
tree_sitter_analyzer/encoding_utils.py +459 -459
tree_sitter_analyzer/file_handler.py +210 -210
tree_sitter_analyzer/formatters/__init__.py +1 -1
tree_sitter_analyzer/formatters/base_formatter.py +167 -167
tree_sitter_analyzer/formatters/formatter_factory.py +78 -78
tree_sitter_analyzer/formatters/java_formatter.py +18 -18
tree_sitter_analyzer/formatters/python_formatter.py +19 -19
tree_sitter_analyzer/interfaces/__init__.py +9 -9
tree_sitter_analyzer/interfaces/cli.py +528 -528
tree_sitter_analyzer/interfaces/cli_adapter.py +344 -343
tree_sitter_analyzer/interfaces/mcp_adapter.py +206 -206
tree_sitter_analyzer/language_detector.py +53 -53
tree_sitter_analyzer/languages/__init__.py +10 -10
tree_sitter_analyzer/languages/java_plugin.py +1 -1
tree_sitter_analyzer/languages/javascript_plugin.py +446 -446
tree_sitter_analyzer/languages/python_plugin.py +755 -755
tree_sitter_analyzer/mcp/__init__.py +34 -45
tree_sitter_analyzer/mcp/resources/__init__.py +44 -44
tree_sitter_analyzer/mcp/resources/code_file_resource.py +209 -209
tree_sitter_analyzer/mcp/server.py +623 -568
tree_sitter_analyzer/mcp/tools/__init__.py +30 -30
tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +681 -673
tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +247 -247
tree_sitter_analyzer/mcp/tools/base_tool.py +54 -54
tree_sitter_analyzer/mcp/tools/read_partial_tool.py +310 -308
tree_sitter_analyzer/mcp/tools/table_format_tool.py +386 -379
tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +563 -559
tree_sitter_analyzer/mcp/utils/__init__.py +107 -107
tree_sitter_analyzer/models.py +10 -10
tree_sitter_analyzer/output_manager.py +253 -253
tree_sitter_analyzer/plugins/__init__.py +280 -280
tree_sitter_analyzer/plugins/base.py +529 -529
tree_sitter_analyzer/plugins/manager.py +379 -379
tree_sitter_analyzer/project_detector.py +330 -317
tree_sitter_analyzer/queries/__init__.py +26 -26
tree_sitter_analyzer/queries/java.py +391 -391
tree_sitter_analyzer/queries/javascript.py +148 -148
tree_sitter_analyzer/queries/python.py +285 -285
tree_sitter_analyzer/queries/typescript.py +229 -229
tree_sitter_analyzer/query_loader.py +257 -257
tree_sitter_analyzer/security/boundary_manager.py +57 -51
tree_sitter_analyzer/security/validator.py +246 -241
tree_sitter_analyzer/utils.py +294 -277
{tree_sitter_analyzer-0.9.1.dist-info → tree_sitter_analyzer-0.9.3.dist-info}/METADATA +13 -13
tree_sitter_analyzer-0.9.3.dist-info/RECORD +77 -0
{tree_sitter_analyzer-0.9.1.dist-info → tree_sitter_analyzer-0.9.3.dist-info}/entry_points.txt +1 -0
tree_sitter_analyzer-0.9.1.dist-info/RECORD +0 -77
{tree_sitter_analyzer-0.9.1.dist-info → tree_sitter_analyzer-0.9.3.dist-info}/WHEEL +0 -0

tree_sitter_analyzer/encoding_utils.py CHANGED Viewed

@@ -1,459 +1,459 @@
-#!/usr/bin/env python3
-"""
-Optimized Encoding Utilities Module
-This module provides unified encoding/decoding functionality with performance
-optimizations including file-based encoding caching to reduce redundant
-chardet.detect() calls.
-"""
-import os
-import sys
-import threading
-import time
-from pathlib import Path
-from typing import Any
-# Set up encoding environment early
-def _setup_encoding_environment() -> None:
-    """Set up proper encoding environment"""
-    try:
-        os.environ["PYTHONIOENCODING"] = "utf-8"
-        os.environ["PYTHONUTF8"] = "1"
-        # Ensure proper stdout/stderr encoding if possible
-        if hasattr(sys.stdout, "reconfigure"):
-            sys.stdout.reconfigure(encoding="utf-8", errors="replace")
-        if hasattr(sys.stderr, "reconfigure"):
-            sys.stderr.reconfigure(encoding="utf-8", errors="replace")
-    except Exception:
-        pass  # Ignore setup errors, use defaults
-# Set up environment when module is imported
-_setup_encoding_environment()
-# Try to import chardet with fallback
-try:
-    import chardet
-    CHARDET_AVAILABLE = True
-except ImportError:
-    CHARDET_AVAILABLE = False
-# Import utilities with fallback
-try:
-    from .utils import log_debug, log_warning
-except ImportError:
-    # Fallback logging functions with compatible signatures
-    def log_debug(message: str, *args: Any, **kwargs: Any) -> None:
-        print(f"DEBUG: {message}")
-    def log_warning(message: str, *args: Any, **kwargs: Any) -> None:
-        print(f"WARNING: {message}")
-class EncodingCache:
-    """Thread-safe encoding cache for file-based encoding detection optimization"""
-    def __init__(self, max_size: int = 1000, ttl_seconds: int = 3600):
-        """
-        Initialize encoding cache
-        Args:
-            max_size: Maximum number of cached entries
-            ttl_seconds: Time-to-live for cache entries in seconds
-        """
-        self._cache: dict[
-            str, tuple[str, float]
-        ] = {}  # file_path -> (encoding, timestamp)
-        self._lock = threading.RLock()
-        self._max_size = max_size
-        self._ttl_seconds = ttl_seconds
-    def get(self, file_path: str) -> str | None:
-        """
-        Get cached encoding for file path
-        Args:
-            file_path: Path to the file
-        Returns:
-            Cached encoding or None if not found/expired
-        """
-        with self._lock:
-            if file_path not in self._cache:
-                return None
-            encoding, timestamp = self._cache[file_path]
-            current_time = time.time()
-            # Check if entry has expired
-            if current_time - timestamp > self._ttl_seconds:
-                del self._cache[file_path]
-                return None
-            return encoding
-    def set(self, file_path: str, encoding: str) -> None:
-        """
-        Cache encoding for file path
-        Args:
-            file_path: Path to the file
-            encoding: Detected encoding
-        """
-        with self._lock:
-            current_time = time.time()
-            # Clean up expired entries if cache is getting full
-            if len(self._cache) >= self._max_size:
-                self._cleanup_expired()
-            # If still full after cleanup, remove oldest entry
-            if len(self._cache) >= self._max_size:
-                oldest_key = min(self._cache.keys(), key=lambda k: self._cache[k][1])
-                del self._cache[oldest_key]
-            self._cache[file_path] = (encoding, current_time)
-    def _cleanup_expired(self) -> None:
-        """Remove expired entries from cache"""
-        current_time = time.time()
-        expired_keys = [
-            key
-            for key, (_, timestamp) in self._cache.items()
-            if current_time - timestamp > self._ttl_seconds
-        ]
-        for key in expired_keys:
-            del self._cache[key]
-    def clear(self) -> None:
-        """Clear all cached entries"""
-        with self._lock:
-            self._cache.clear()
-    def size(self) -> int:
-        """Get current cache size"""
-        with self._lock:
-            return len(self._cache)
-# Global encoding cache instance
-_encoding_cache = EncodingCache()
-class EncodingManager:
-    """Centralized encoding management for consistent text processing"""
-    DEFAULT_ENCODING = "utf-8"
-    FALLBACK_ENCODINGS = ["utf-8", "cp1252", "iso-8859-1", "shift_jis", "gbk"]
-    @classmethod
-    def safe_encode(cls, text: str | None, encoding: str | None = None) -> bytes:
-        """
-        Safely encode text to bytes with fallback handling
-        Args:
-            text: Text to encode (can be None)
-            encoding: Target encoding (defaults to UTF-8)
-        Returns:
-            Encoded bytes
-        """
-        # Handle None input
-        if text is None:
-            return b""
-        target_encoding = encoding or cls.DEFAULT_ENCODING
-        try:
-            return text.encode(target_encoding)
-        except UnicodeEncodeError as e:
-            log_debug(f"Failed to encode with {target_encoding}, trying fallbacks: {e}")
-            # Try fallback encodings
-            for fallback in cls.FALLBACK_ENCODINGS:
-                if fallback != target_encoding:
-                    try:
-                        return text.encode(fallback, errors="replace")
-                    except UnicodeEncodeError:
-                        continue
-            # Last resort: encode with error replacement
-            log_warning(f"Using error replacement for encoding: {text[:50]}...")
-            return text.encode(cls.DEFAULT_ENCODING, errors="replace")
-    @classmethod
-    def safe_decode(cls, data: bytes, encoding: str | None = None) -> str:
-        """
-        Safely decode bytes to text with fallback handling
-        Args:
-            data: Bytes to decode
-            encoding: Source encoding (auto-detected if None)
-        Returns:
-            Decoded text
-        """
-        if data is None or len(data) == 0:
-            return ""
-        # Use provided encoding or detect
-        target_encoding = encoding
-        if not target_encoding:
-            target_encoding = cls.detect_encoding(data)
-        try:
-            return data.decode(target_encoding)
-        except UnicodeDecodeError as e:
-            log_debug(f"Failed to decode with {target_encoding}, trying fallbacks: {e}")
-            # Try fallback encodings
-            for fallback in cls.FALLBACK_ENCODINGS:
-                if fallback != target_encoding:
-                    try:
-                        return data.decode(fallback, errors="replace")
-                    except UnicodeDecodeError:
-                        continue
-            # Last resort: decode with error replacement
-            log_warning(
-                f"Using error replacement for decoding data (length: {len(data)})"
-            )
-            return data.decode(cls.DEFAULT_ENCODING, errors="replace")
-    @classmethod
-    def detect_encoding(cls, data: bytes, file_path: str | None = None) -> str:
-        """
-        Detect encoding of byte data with optional file-based caching
-        Args:
-            data: Bytes to analyze
-            file_path: Optional file path for caching (improves performance)
-        Returns:
-            Detected encoding name
-        """
-        if not data:
-            return cls.DEFAULT_ENCODING
-        # Check cache first if file_path is provided
-        if file_path:
-            cached_encoding = _encoding_cache.get(file_path)
-            if cached_encoding:
-                log_debug(f"Using cached encoding for {file_path}: {cached_encoding}")
-                return cached_encoding
-        detected_encoding = cls.DEFAULT_ENCODING
-        # If chardet is not available, use simple heuristics
-        if not CHARDET_AVAILABLE:
-            try:
-                # Try UTF-8 first
-                data.decode("utf-8")
-                detected_encoding = "utf-8"
-            except UnicodeDecodeError:
-                # Check for BOM
-                if data.startswith(b"\xff\xfe"):
-                    detected_encoding = "utf-16-le"
-                elif data.startswith(b"\xfe\xff"):
-                    detected_encoding = "utf-16-be"
-                elif data.startswith(b"\xef\xbb\xbf"):
-                    detected_encoding = "utf-8-sig"
-                else:
-                    detected_encoding = cls.DEFAULT_ENCODING
-        else:
-            try:
-                # Use chardet for detection
-                detection = chardet.detect(data)
-                if detection and detection["encoding"]:
-                    confidence = detection.get("confidence", 0)
-                    detected_encoding = detection["encoding"].lower()
-                    # Only trust high-confidence detections
-                    if confidence > 0.7:
-                        log_debug(
-                            f"Detected encoding: {detected_encoding} (confidence: {confidence:.2f})"
-                        )
-                    else:
-                        log_debug(
-                            f"Low confidence encoding detection: {detected_encoding} (confidence: {confidence:.2f}), using default"
-                        )
-                        detected_encoding = cls.DEFAULT_ENCODING
-            except Exception as e:
-                log_debug(f"Encoding detection failed: {e}")
-                detected_encoding = cls.DEFAULT_ENCODING
-        # Cache the result if file_path is provided
-        if file_path and detected_encoding:
-            _encoding_cache.set(file_path, detected_encoding)
-            log_debug(f"Cached encoding for {file_path}: {detected_encoding}")
-        return detected_encoding
-    @classmethod
-    def read_file_safe(cls, file_path: str | Path) -> tuple[str, str]:
-        """
-        Safely read a file with automatic encoding detection and caching
-        Args:
-            file_path: Path to the file
-        Returns:
-            Tuple of (content, detected_encoding)
-        """
-        file_path = Path(file_path)
-        try:
-            # Read raw bytes first
-            with open(file_path, "rb") as f:
-                raw_data = f.read()
-            if not raw_data:
-                return "", cls.DEFAULT_ENCODING
-            # Detect and decode with file path for caching
-            detected_encoding = cls.detect_encoding(raw_data, str(file_path))
-            content = cls.safe_decode(raw_data, detected_encoding)
-            # Normalize line endings for consistency
-            content = cls.normalize_line_endings(content)
-            return content, detected_encoding
-        except OSError as e:
-            log_warning(f"Failed to read file {file_path}: {e}")
-            raise e
-    @classmethod
-    def write_file_safe(
-        cls, file_path: str | Path, content: str, encoding: str | None = None
-    ) -> bool:
-        """
-        Safely write content to a file
-        Args:
-            file_path: Path to the file
-            content: Content to write
-            encoding: Target encoding (defaults to UTF-8)
-        Returns:
-            True if successful, False otherwise
-        """
-        file_path = Path(file_path)
-        target_encoding = encoding or cls.DEFAULT_ENCODING
-        try:
-            encoded_content = cls.safe_encode(content, target_encoding)
-            with open(file_path, "wb") as f:
-                f.write(encoded_content)
-            return True
-        except OSError as e:
-            log_warning(f"Failed to write file {file_path}: {e}")
-            return False
-    @classmethod
-    def normalize_line_endings(cls, text: str) -> str:
-        """
-        Normalize line endings to Unix style (\n)
-        Args:
-            text: Text to normalize
-        Returns:
-            Text with normalized line endings
-        """
-        if not text:
-            return text
-        # Replace Windows (\r\n) and Mac (\r) line endings with Unix (\n)
-        return text.replace("\r\n", "\n").replace("\r", "\n")
-    @classmethod
-    def extract_text_slice(
-        cls,
-        content_bytes: bytes,
-        start_byte: int,
-        end_byte: int,
-        encoding: str | None = None,
-    ) -> str:
-        """
-        Extract a slice of text from bytes with proper encoding handling
-        Args:
-            content_bytes: Source bytes
-            start_byte: Start position
-            end_byte: End position
-            encoding: Encoding to use (auto-detected if None)
-        Returns:
-            Extracted text slice
-        """
-        if not content_bytes or start_byte >= len(content_bytes):
-            return ""
-        # Ensure bounds are valid
-        start_byte = max(0, start_byte)
-        end_byte = min(len(content_bytes), end_byte)
-        if start_byte >= end_byte:
-            return ""
-        # Extract byte slice
-        byte_slice = content_bytes[start_byte:end_byte]
-        # Decode the slice
-        return cls.safe_decode(byte_slice, encoding)
-# Convenience functions for backward compatibility
-def safe_encode(text: str, encoding: str | None = None) -> bytes:
-    """Convenience function for safe encoding"""
-    return EncodingManager.safe_encode(text, encoding)
-def safe_decode(data: bytes, encoding: str | None = None) -> str:
-    """Convenience function for safe decoding"""
-    return EncodingManager.safe_decode(data, encoding)
-def detect_encoding(data: bytes, file_path: str | None = None) -> str:
-    """Convenience function for encoding detection with optional caching"""
-    return EncodingManager.detect_encoding(data, file_path)
-def read_file_safe(file_path: str | Path) -> tuple[str, str]:
-    """Convenience function for safe file reading"""
-    return EncodingManager.read_file_safe(file_path)
-def write_file_safe(
-    file_path: str | Path, content: str, encoding: str | None = None
-) -> bool:
-    """Convenience function for safe file writing"""
-    return EncodingManager.write_file_safe(file_path, content, encoding)
-def extract_text_slice(
-    content_bytes: bytes, start_byte: int, end_byte: int, encoding: str | None = None
-) -> str:
-    """Convenience function for text slice extraction"""
-    return EncodingManager.extract_text_slice(
-        content_bytes, start_byte, end_byte, encoding
-    )
-def clear_encoding_cache() -> None:
-    """Clear the global encoding cache"""
-    _encoding_cache.clear()
-def get_encoding_cache_size() -> int:
-    """Get the current size of the encoding cache"""
-    return _encoding_cache.size()
+#!/usr/bin/env python3
+"""
+Optimized Encoding Utilities Module
+This module provides unified encoding/decoding functionality with performance
+optimizations including file-based encoding caching to reduce redundant
+chardet.detect() calls.
+"""
+import os
+import sys
+import threading
+import time
+from pathlib import Path
+from typing import Any
+# Set up encoding environment early
+def _setup_encoding_environment() -> None:
+    """Set up proper encoding environment"""
+    try:
+        os.environ["PYTHONIOENCODING"] = "utf-8"
+        os.environ["PYTHONUTF8"] = "1"
+        # Ensure proper stdout/stderr encoding if possible
+        if hasattr(sys.stdout, "reconfigure"):
+            sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+        if hasattr(sys.stderr, "reconfigure"):
+            sys.stderr.reconfigure(encoding="utf-8", errors="replace")
+    except Exception:
+        pass  # Ignore setup errors, use defaults
+# Set up environment when module is imported
+_setup_encoding_environment()
+# Try to import chardet with fallback
+try:
+    import chardet
+    CHARDET_AVAILABLE = True
+except ImportError:
+    CHARDET_AVAILABLE = False
+# Import utilities with fallback
+try:
+    from .utils import log_debug, log_warning
+except ImportError:
+    # Fallback logging functions with compatible signatures
+    def log_debug(message: str, *args: Any, **kwargs: Any) -> None:
+        print(f"DEBUG: {message}")
+    def log_warning(message: str, *args: Any, **kwargs: Any) -> None:
+        print(f"WARNING: {message}")
+class EncodingCache:
+    """Thread-safe encoding cache for file-based encoding detection optimization"""
+    def __init__(self, max_size: int = 1000, ttl_seconds: int = 3600):
+        """
+        Initialize encoding cache
+        Args:
+            max_size: Maximum number of cached entries
+            ttl_seconds: Time-to-live for cache entries in seconds
+        """
+        self._cache: dict[
+            str, tuple[str, float]
+        ] = {}  # file_path -> (encoding, timestamp)
+        self._lock = threading.RLock()
+        self._max_size = max_size
+        self._ttl_seconds = ttl_seconds
+    def get(self, file_path: str) -> str | None:
+        """
+        Get cached encoding for file path
+        Args:
+            file_path: Path to the file
+        Returns:
+            Cached encoding or None if not found/expired
+        """
+        with self._lock:
+            if file_path not in self._cache:
+                return None
+            encoding, timestamp = self._cache[file_path]
+            current_time = time.time()
+            # Check if entry has expired
+            if current_time - timestamp > self._ttl_seconds:
+                del self._cache[file_path]
+                return None
+            return encoding
+    def set(self, file_path: str, encoding: str) -> None:
+        """
+        Cache encoding for file path
+        Args:
+            file_path: Path to the file
+            encoding: Detected encoding
+        """
+        with self._lock:
+            current_time = time.time()
+            # Clean up expired entries if cache is getting full
+            if len(self._cache) >= self._max_size:
+                self._cleanup_expired()
+            # If still full after cleanup, remove oldest entry
+            if len(self._cache) >= self._max_size:
+                oldest_key = min(self._cache.keys(), key=lambda k: self._cache[k][1])
+                del self._cache[oldest_key]
+            self._cache[file_path] = (encoding, current_time)
+    def _cleanup_expired(self) -> None:
+        """Remove expired entries from cache"""
+        current_time = time.time()
+        expired_keys = [
+            key
+            for key, (_, timestamp) in self._cache.items()
+            if current_time - timestamp > self._ttl_seconds
+        ]
+        for key in expired_keys:
+            del self._cache[key]
+    def clear(self) -> None:
+        """Clear all cached entries"""
+        with self._lock:
+            self._cache.clear()
+    def size(self) -> int:
+        """Get current cache size"""
+        with self._lock:
+            return len(self._cache)
+# Global encoding cache instance
+_encoding_cache = EncodingCache()
+class EncodingManager:
+    """Centralized encoding management for consistent text processing"""
+    DEFAULT_ENCODING = "utf-8"
+    FALLBACK_ENCODINGS = ["utf-8", "cp1252", "iso-8859-1", "shift_jis", "gbk"]
+    @classmethod
+    def safe_encode(cls, text: str | None, encoding: str | None = None) -> bytes:
+        """
+        Safely encode text to bytes with fallback handling
+        Args:
+            text: Text to encode (can be None)
+            encoding: Target encoding (defaults to UTF-8)
+        Returns:
+            Encoded bytes
+        """
+        # Handle None input
+        if text is None:
+            return b""
+        target_encoding = encoding or cls.DEFAULT_ENCODING
+        try:
+            return text.encode(target_encoding)
+        except UnicodeEncodeError as e:
+            log_debug(f"Failed to encode with {target_encoding}, trying fallbacks: {e}")
+            # Try fallback encodings
+            for fallback in cls.FALLBACK_ENCODINGS:
+                if fallback != target_encoding:
+                    try:
+                        return text.encode(fallback, errors="replace")
+                    except UnicodeEncodeError:
+                        continue
+            # Last resort: encode with error replacement
+            log_warning(f"Using error replacement for encoding: {text[:50]}...")
+            return text.encode(cls.DEFAULT_ENCODING, errors="replace")
+    @classmethod
+    def safe_decode(cls, data: bytes, encoding: str | None = None) -> str:
+        """
+        Safely decode bytes to text with fallback handling
+        Args:
+            data: Bytes to decode
+            encoding: Source encoding (auto-detected if None)
+        Returns:
+            Decoded text
+        """
+        if data is None or len(data) == 0:
+            return ""
+        # Use provided encoding or detect
+        target_encoding = encoding
+        if not target_encoding:
+            target_encoding = cls.detect_encoding(data)
+        try:
+            return data.decode(target_encoding)
+        except UnicodeDecodeError as e:
+            log_debug(f"Failed to decode with {target_encoding}, trying fallbacks: {e}")
+            # Try fallback encodings
+            for fallback in cls.FALLBACK_ENCODINGS:
+                if fallback != target_encoding:
+                    try:
+                        return data.decode(fallback, errors="replace")
+                    except UnicodeDecodeError:
+                        continue
+            # Last resort: decode with error replacement
+            log_warning(
+                f"Using error replacement for decoding data (length: {len(data)})"
+            )
+            return data.decode(cls.DEFAULT_ENCODING, errors="replace")
+    @classmethod
+    def detect_encoding(cls, data: bytes, file_path: str | None = None) -> str:
+        """
+        Detect encoding of byte data with optional file-based caching
+        Args:
+            data: Bytes to analyze
+            file_path: Optional file path for caching (improves performance)
+        Returns:
+            Detected encoding name
+        """
+        if not data:
+            return cls.DEFAULT_ENCODING
+        # Check cache first if file_path is provided
+        if file_path:
+            cached_encoding = _encoding_cache.get(file_path)
+            if cached_encoding:
+                log_debug(f"Using cached encoding for {file_path}: {cached_encoding}")
+                return cached_encoding
+        detected_encoding = cls.DEFAULT_ENCODING
+        # If chardet is not available, use simple heuristics
+        if not CHARDET_AVAILABLE:
+            try:
+                # Try UTF-8 first
+                data.decode("utf-8")
+                detected_encoding = "utf-8"
+            except UnicodeDecodeError:
+                # Check for BOM
+                if data.startswith(b"\xff\xfe"):
+                    detected_encoding = "utf-16-le"
+                elif data.startswith(b"\xfe\xff"):
+                    detected_encoding = "utf-16-be"
+                elif data.startswith(b"\xef\xbb\xbf"):
+                    detected_encoding = "utf-8-sig"
+                else:
+                    detected_encoding = cls.DEFAULT_ENCODING
+        else:
+            try:
+                # Use chardet for detection
+                detection = chardet.detect(data)
+                if detection and detection["encoding"]:
+                    confidence = detection.get("confidence", 0)
+                    detected_encoding = detection["encoding"].lower()
+                    # Only trust high-confidence detections
+                    if confidence > 0.7:
+                        log_debug(
+                            f"Detected encoding: {detected_encoding} (confidence: {confidence:.2f})"
+                        )
+                    else:
+                        log_debug(
+                            f"Low confidence encoding detection: {detected_encoding} (confidence: {confidence:.2f}), using default"
+                        )
+                        detected_encoding = cls.DEFAULT_ENCODING
+            except Exception as e:
+                log_debug(f"Encoding detection failed: {e}")
+                detected_encoding = cls.DEFAULT_ENCODING
+        # Cache the result if file_path is provided
+        if file_path and detected_encoding:
+            _encoding_cache.set(file_path, detected_encoding)
+            log_debug(f"Cached encoding for {file_path}: {detected_encoding}")
+        return detected_encoding
+    @classmethod
+    def read_file_safe(cls, file_path: str | Path) -> tuple[str, str]:
+        """
+        Safely read a file with automatic encoding detection and caching
+        Args:
+            file_path: Path to the file
+        Returns:
+            Tuple of (content, detected_encoding)
+        """
+        file_path = Path(file_path)
+        try:
+            # Read raw bytes first
+            with open(file_path, "rb") as f:
+                raw_data = f.read()
+            if not raw_data:
+                return "", cls.DEFAULT_ENCODING
+            # Detect and decode with file path for caching
+            detected_encoding = cls.detect_encoding(raw_data, str(file_path))
+            content = cls.safe_decode(raw_data, detected_encoding)
+            # Normalize line endings for consistency
+            content = cls.normalize_line_endings(content)
+            return content, detected_encoding
+        except OSError as e:
+            log_warning(f"Failed to read file {file_path}: {e}")
+            raise e
+    @classmethod
+    def write_file_safe(
+        cls, file_path: str | Path, content: str, encoding: str | None = None
+    ) -> bool:
+        """
+        Safely write content to a file
+        Args:
+            file_path: Path to the file
+            content: Content to write
+            encoding: Target encoding (defaults to UTF-8)
+        Returns:
+            True if successful, False otherwise
+        """
+        file_path = Path(file_path)
+        target_encoding = encoding or cls.DEFAULT_ENCODING
+        try:
+            encoded_content = cls.safe_encode(content, target_encoding)
+            with open(file_path, "wb") as f:
+                f.write(encoded_content)
+            return True
+        except OSError as e:
+            log_warning(f"Failed to write file {file_path}: {e}")
+            return False
+    @classmethod
+    def normalize_line_endings(cls, text: str) -> str:
+        """
+        Normalize line endings to Unix style (\n)
+        Args:
+            text: Text to normalize
+        Returns:
+            Text with normalized line endings
+        """
+        if not text:
+            return text
+        # Replace Windows (\r\n) and Mac (\r) line endings with Unix (\n)
+        return text.replace("\r\n", "\n").replace("\r", "\n")
+    @classmethod
+    def extract_text_slice(
+        cls,
+        content_bytes: bytes,
+        start_byte: int,
+        end_byte: int,
+        encoding: str | None = None,
+    ) -> str:
+        """
+        Extract a slice of text from bytes with proper encoding handling
+        Args:
+            content_bytes: Source bytes
+            start_byte: Start position
+            end_byte: End position
+            encoding: Encoding to use (auto-detected if None)
+        Returns:
+            Extracted text slice
+        """
+        if not content_bytes or start_byte >= len(content_bytes):
+            return ""
+        # Ensure bounds are valid
+        start_byte = max(0, start_byte)
+        end_byte = min(len(content_bytes), end_byte)
+        if start_byte >= end_byte:
+            return ""
+        # Extract byte slice
+        byte_slice = content_bytes[start_byte:end_byte]
+        # Decode the slice
+        return cls.safe_decode(byte_slice, encoding)
+# Convenience functions for backward compatibility
+def safe_encode(text: str, encoding: str | None = None) -> bytes:
+    """Convenience function for safe encoding"""
+    return EncodingManager.safe_encode(text, encoding)
+def safe_decode(data: bytes, encoding: str | None = None) -> str:
+    """Convenience function for safe decoding"""
+    return EncodingManager.safe_decode(data, encoding)
+def detect_encoding(data: bytes, file_path: str | None = None) -> str:
+    """Convenience function for encoding detection with optional caching"""
+    return EncodingManager.detect_encoding(data, file_path)
+def read_file_safe(file_path: str | Path) -> tuple[str, str]:
+    """Convenience function for safe file reading"""
+    return EncodingManager.read_file_safe(file_path)
+def write_file_safe(
+    file_path: str | Path, content: str, encoding: str | None = None
+) -> bool:
+    """Convenience function for safe file writing"""
+    return EncodingManager.write_file_safe(file_path, content, encoding)
+def extract_text_slice(
+    content_bytes: bytes, start_byte: int, end_byte: int, encoding: str | None = None
+) -> str:
+    """Convenience function for text slice extraction"""
+    return EncodingManager.extract_text_slice(
+        content_bytes, start_byte, end_byte, encoding
+    )
+def clear_encoding_cache() -> None:
+    """Clear the global encoding cache"""
+    _encoding_cache.clear()
+def get_encoding_cache_size() -> int:
+    """Get the current size of the encoding cache"""
+    return _encoding_cache.size()

tree-sitter-analyzer 0.9.1__py3-none-any.whl → 0.9.3__py3-none-any.whl

Potentially problematic release.

tree-sitter-analyzer 0.9.1py3-none-any.whl → 0.9.3py3-none-any.whl