PyPI - tree-sitter-analyzer - Versions diffs - 1.7.4__py3-none-any.whl → 1.7.7__py3-none-any.whl - Mend

tree-sitter-analyzer 1.7.4py3-none-any.whl → 1.7.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (20) hide show

tree_sitter_analyzer/__init__.py CHANGED Viewed

@@ -11,7 +11,7 @@ Architecture:
 - Data Models: Generic and language-specific code element representations
 """
-__version__ = "1.7.4"
+__version__ = "1.7.7"
 __author__ = "aisheng.yu"
 __email__ = "aimasteracc@gmail.com"

tree_sitter_analyzer/api.py CHANGED Viewed

@@ -11,6 +11,7 @@ import logging
 from pathlib import Path
 from typing import Any
+from . import __version__
 from .core.engine import AnalysisEngine
 from .utils import log_error
@@ -500,7 +501,7 @@ def get_framework_info() -> dict[str, Any]:
         return {
             "name": "tree-sitter-analyzer",
-            "version": "2.0.0",  # New architecture version
+            "version": __version__,
             "supported_languages": engine.get_supported_languages(),
             "total_languages": len(engine.get_supported_languages()),
             "plugin_info": {
@@ -521,7 +522,7 @@ def get_framework_info() -> dict[str, Any]:
         }
     except Exception as e:
         log_error(f"Failed to get framework info: {e}")
-        return {"name": "tree-sitter-analyzer", "version": "2.0.0", "error": str(e)}
+        return {"name": "tree-sitter-analyzer", "version": __version__, "error": str(e)}
 def execute_query(

tree_sitter_analyzer/exceptions.py CHANGED Viewed

@@ -398,3 +398,337 @@ class RegexSecurityError(SecurityError):
         )
         self.pattern = pattern
         self.dangerous_construct = dangerous_construct
+# MCP-specific exceptions for enhanced error handling
+class MCPToolError(MCPError):
+    """Raised when MCP tool execution fails."""
+    def __init__(
+        self,
+        message: str,
+        tool_name: str | None = None,
+        input_params: dict[str, Any] | None = None,
+        execution_stage: str | None = None,
+        **kwargs: Any,
+    ) -> None:
+        context = kwargs.get("context", {})
+        if input_params:
+            # Sanitize sensitive information from input params
+            sanitized_params = self._sanitize_params(input_params)
+            context["input_params"] = sanitized_params
+        if execution_stage:
+            context["execution_stage"] = execution_stage
+        super().__init__(message, tool_name=tool_name, context=context, **kwargs)
+        self.input_params = input_params
+        self.execution_stage = execution_stage
+    @staticmethod
+    def _sanitize_params(params: dict[str, Any]) -> dict[str, Any]:
+        """Sanitize sensitive information from parameters."""
+        sanitized = {}
+        sensitive_keys = {"password", "token", "key", "secret", "auth", "credential"}
+        for key, value in params.items():
+            if any(sensitive in key.lower() for sensitive in sensitive_keys):
+                sanitized[key] = "***REDACTED***"
+            elif isinstance(value, str) and len(value) > 100:
+                sanitized[key] = value[:100] + "...[TRUNCATED]"
+            else:
+                sanitized[key] = value
+        return sanitized
+class MCPResourceError(MCPError):
+    """Raised when MCP resource access fails."""
+    def __init__(
+        self,
+        message: str,
+        resource_uri: str | None = None,
+        resource_type: str | None = None,
+        access_mode: str | None = None,
+        **kwargs: Any,
+    ) -> None:
+        context = kwargs.get("context", {})
+        if resource_type:
+            context["resource_type"] = resource_type
+        if access_mode:
+            context["access_mode"] = access_mode
+        super().__init__(message, resource_uri=resource_uri, context=context, **kwargs)
+        self.resource_type = resource_type
+        self.access_mode = access_mode
+class MCPTimeoutError(MCPError):
+    """Raised when MCP operation times out."""
+    def __init__(
+        self,
+        message: str,
+        timeout_seconds: float | None = None,
+        operation_type: str | None = None,
+        **kwargs: Any,
+    ) -> None:
+        context = kwargs.get("context", {})
+        if timeout_seconds:
+            context["timeout_seconds"] = timeout_seconds
+        if operation_type:
+            context["operation_type"] = operation_type
+        super().__init__(message, context=context, **kwargs)
+        self.timeout_seconds = timeout_seconds
+        self.operation_type = operation_type
+class MCPValidationError(ValidationError):
+    """Raised when MCP input validation fails."""
+    def __init__(
+        self,
+        message: str,
+        tool_name: str | None = None,
+        parameter_name: str | None = None,
+        parameter_value: Any | None = None,
+        validation_rule: str | None = None,
+        **kwargs: Any,
+    ) -> None:
+        context = kwargs.get("context", {})
+        if tool_name:
+            context["tool_name"] = tool_name
+        if parameter_name:
+            context["parameter_name"] = parameter_name
+        if validation_rule:
+            context["validation_rule"] = validation_rule
+        # Sanitize parameter value for logging
+        if parameter_value is not None:
+            if isinstance(parameter_value, str) and len(parameter_value) > 200:
+                context["parameter_value"] = parameter_value[:200] + "...[TRUNCATED]"
+            else:
+                context["parameter_value"] = parameter_value
+        super().__init__(message, validation_type="mcp_parameter", context=context, **kwargs)
+        self.tool_name = tool_name
+        self.parameter_name = parameter_name
+        self.validation_rule = validation_rule
+class FileRestrictionError(SecurityError):
+    """Raised when file access is restricted by mode or security policy."""
+    def __init__(
+        self,
+        message: str,
+        file_path: str | Path | None = None,
+        current_mode: str | None = None,
+        allowed_patterns: list[str] | None = None,
+        **kwargs: Any,
+    ) -> None:
+        context = kwargs.get("context", {})
+        if current_mode:
+            context["current_mode"] = current_mode
+        if allowed_patterns:
+            context["allowed_patterns"] = allowed_patterns
+        super().__init__(
+            message,
+            security_type="file_restriction",
+            file_path=file_path,
+            context=context,
+            **kwargs
+        )
+        self.current_mode = current_mode
+        self.allowed_patterns = allowed_patterns
+# Enhanced error response utilities for MCP
+def create_mcp_error_response(
+    exception: Exception,
+    tool_name: str | None = None,
+    include_debug_info: bool = False,
+    sanitize_sensitive: bool = True,
+) -> dict[str, Any]:
+    """
+    Create standardized MCP error response dictionary.
+    Args:
+        exception: The exception to convert
+        tool_name: Name of the MCP tool that failed
+        include_debug_info: Whether to include debug information
+        sanitize_sensitive: Whether to sanitize sensitive information
+    Returns:
+        MCP-compliant error response dictionary
+    """
+    import traceback
+    response: dict[str, Any] = {
+        "success": False,
+        "error": {
+            "type": exception.__class__.__name__,
+            "message": str(exception),
+            "timestamp": __import__("datetime").datetime.utcnow().isoformat() + "Z"
+        }
+    }
+    # Add tool name if provided
+    if tool_name:
+        response["error"]["tool"] = tool_name
+    # Add context if available
+    if hasattr(exception, "context") and exception.context:
+        context = exception.context.copy()
+        # Sanitize sensitive information if requested
+        if sanitize_sensitive:
+            context = _sanitize_error_context(context)
+        response["error"]["context"] = context
+    # Add error code if available
+    if hasattr(exception, "error_code"):
+        response["error"]["code"] = exception.error_code
+    # Add debug information if requested
+    if include_debug_info:
+        response["error"]["debug"] = {
+            "traceback": traceback.format_exc(),
+            "exception_args": list(exception.args) if exception.args else []
+        }
+    # Add specific error details for known exception types
+    if isinstance(exception, MCPToolError):
+        response["error"]["execution_stage"] = exception.execution_stage
+    elif isinstance(exception, MCPTimeoutError):
+        response["error"]["timeout_seconds"] = exception.timeout_seconds
+    elif isinstance(exception, FileRestrictionError):
+        response["error"]["current_mode"] = exception.current_mode
+        response["error"]["allowed_patterns"] = exception.allowed_patterns
+    return response
+def _sanitize_error_context(context: dict[str, Any]) -> dict[str, Any]:
+    """Sanitize sensitive information from error context."""
+    sanitized = {}
+    sensitive_keys = {
+        "password", "token", "key", "secret", "auth", "credential",
+        "api_key", "access_token", "private_key", "session_id"
+    }
+    for key, value in context.items():
+        if any(sensitive in key.lower() for sensitive in sensitive_keys):
+            sanitized[key] = "***REDACTED***"
+        elif isinstance(value, str) and len(value) > 500:
+            sanitized[key] = value[:500] + "...[TRUNCATED]"
+        elif isinstance(value, (list, tuple)) and len(value) > 10:
+            sanitized[key] = list(value[:10]) + ["...[TRUNCATED]"]
+        elif isinstance(value, dict) and len(value) > 20:
+            # Recursively sanitize nested dictionaries
+            truncated_dict = dict(list(value.items())[:20])
+            sanitized[key] = _sanitize_error_context(truncated_dict)
+            sanitized[key]["__truncated__"] = True
+        else:
+            sanitized[key] = value
+    return sanitized
+# Async exception handling utilities for MCP tools
+async def safe_execute_async(
+    coro: Any,
+    default_return: Any = None,
+    exception_types: tuple[type[Exception], ...] = (Exception,),
+    log_errors: bool = True,
+    tool_name: str | None = None,
+) -> Any:
+    """
+    Safely execute an async function with exception handling.
+    Args:
+        coro: Coroutine to execute
+        default_return: Value to return on exception
+        exception_types: Exception types to catch
+        log_errors: Whether to log errors
+        tool_name: Name of the tool for error context
+    Returns:
+        Coroutine result or default_return on exception
+    """
+    try:
+        return await coro
+    except exception_types as e:
+        if log_errors:
+            from .utils import log_error
+            error_context = {"tool_name": tool_name} if tool_name else {}
+            log_error(f"Async execution failed: {e}", extra=error_context)
+        return default_return
+def mcp_exception_handler(
+    tool_name: str,
+    include_debug: bool = False,
+    sanitize_sensitive: bool = True,
+) -> Any:
+    """
+    Decorator for MCP tool exception handling.
+    Args:
+        tool_name: Name of the MCP tool
+        include_debug: Whether to include debug information
+        sanitize_sensitive: Whether to sanitize sensitive information
+    """
+    def decorator(func: Any) -> Any:
+        async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
+            try:
+                return await func(*args, **kwargs)
+            except Exception as e:
+                from .utils import log_error
+                # Log the error with tool context
+                log_error(
+                    f"MCP tool '{tool_name}' failed: {e}",
+                    extra={"tool_name": tool_name, "exception_type": type(e).__name__}
+                )
+                # Return standardized error response
+                return create_mcp_error_response(
+                    e,
+                    tool_name=tool_name,
+                    include_debug_info=include_debug,
+                    sanitize_sensitive=sanitize_sensitive
+                )
+        def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
+            try:
+                return func(*args, **kwargs)
+            except Exception as e:
+                from .utils import log_error
+                # Log the error with tool context
+                log_error(
+                    f"MCP tool '{tool_name}' failed: {e}",
+                    extra={"tool_name": tool_name, "exception_type": type(e).__name__}
+                )
+                # Return standardized error response
+                return create_mcp_error_response(
+                    e,
+                    tool_name=tool_name,
+                    include_debug_info=include_debug,
+                    sanitize_sensitive=sanitize_sensitive
+                )
+        # Return appropriate wrapper based on function type
+        if __import__("asyncio").iscoroutinefunction(func):
+            return async_wrapper
+        else:
+            return sync_wrapper
+    return decorator

tree_sitter_analyzer/file_handler.py CHANGED Viewed

@@ -8,7 +8,22 @@ This module provides file reading functionality with encoding detection and fall
 from pathlib import Path
 from .encoding_utils import read_file_safe
-from .utils import log_error, log_info, log_warning
+from .utils import setup_logger
+# Set up logger for this module
+logger = setup_logger(__name__)
+def log_error(message: str, *args, **kwargs) -> None:
+    """Log error message"""
+    logger.error(message, *args, **kwargs)
+def log_info(message: str, *args, **kwargs) -> None:
+    """Log info message"""
+    logger.info(message, *args, **kwargs)
+def log_warning(message: str, *args, **kwargs) -> None:
+    """Log warning message"""
+    logger.warning(message, *args, **kwargs)
 def detect_language_from_extension(file_path: str) -> str:

tree_sitter_analyzer/interfaces/mcp_server.py CHANGED Viewed

@@ -12,6 +12,8 @@ import logging
 import sys
 from typing import Any
+from .. import __version__
 try:
     from mcp.server import Server
     from mcp.server.models import InitializationOptions
@@ -68,7 +70,7 @@ class TreeSitterAnalyzerMCPServer:
         self.server: Server | None = None
         self.name = "tree-sitter-analyzer"
-        self.version = "2.0.0"
+        self.version = __version__
         log_info(f"Initializing {self.name} v{self.version}")

tree_sitter_analyzer/language_detector.py CHANGED Viewed

@@ -66,6 +66,10 @@ class LanguageDetector:
         ".mkd": "markdown",
         ".mkdn": "markdown",
         ".mdx": "markdown",
+        # JSON系
+        ".json": "json",
+        ".jsonc": "json",
+        ".json5": "json",
     }
     # Ambiguous extensions (map to multiple languages)
@@ -100,6 +104,7 @@ class LanguageDetector:
         "rust",
         "go",
         "markdown",
+        "json",
     }
     def __init__(self) -> None:
@@ -143,6 +148,10 @@ class LanguageDetector:
             ".mkd": ("markdown", 0.8),
             ".mkdn": ("markdown", 0.8),
             ".mdx": ("markdown", 0.7),  # MDX might be mixed with JSX
+            # JSON extensions
+            ".json": ("json", 0.9),
+            ".jsonc": ("json", 0.8),  # JSON with comments
+            ".json5": ("json", 0.8),  # JSON5 format
         }
         # Content-based detection patterns
@@ -412,7 +421,9 @@ def detect_language_from_file(file_path: str) -> str:
     Returns:
         Detected language name
     """
-    return detector.detect_from_extension(file_path)
+    # Create a fresh instance to ensure latest configuration
+    fresh_detector = LanguageDetector()
+    return fresh_detector.detect_from_extension(file_path)
 def is_language_supported(language: str) -> bool:

tree_sitter_analyzer/languages/markdown_plugin.py CHANGED Viewed

@@ -184,6 +184,17 @@ class MarkdownElementExtractor(ElementExtractor):
             log_debug(f"Error during link extraction: {e}")
             return []
+        # 重複除去: 同じtextとurlを持つ要素を除去
+        seen = set()
+        unique_links = []
+        for link in links:
+            key = (getattr(link, 'text', '') or "", getattr(link, 'url', '') or "")
+            if key not in seen:
+                seen.add(key)
+                unique_links.append(link)
+        links = unique_links
         log_debug(f"Extracted {len(links)} Markdown links")
         return links
@@ -209,6 +220,17 @@ class MarkdownElementExtractor(ElementExtractor):
             log_debug(f"Error during image extraction: {e}")
             return []
+        # 重複除去: 同じalt_textとurlを持つ要素を除去
+        seen = set()
+        unique_images = []
+        for img in images:
+            key = (img.alt_text or "", img.url or "")
+            if key not in seen:
+                seen.add(key)
+                unique_images.append(img)
+        images = unique_images
         log_debug(f"Extracted {len(images)} Markdown images")
         return images

tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py CHANGED Viewed

@@ -379,19 +379,27 @@ class AnalyzeScaleTool(BaseMCPTool):
         include_details = arguments.get("include_details", False)
         include_guidance = arguments.get("include_guidance", True)
+        # Security validation BEFORE path resolution to catch symlinks
+        is_valid, error_msg = self.security_validator.validate_file_path(file_path)
+        if not is_valid:
+            logger.warning(
+                f"Security validation failed for file path: {file_path} - {error_msg}"
+            )
+            raise ValueError(f"Invalid file path: {error_msg}")
         # Resolve file path to absolute path
         resolved_file_path = self.path_resolver.resolve(file_path)
         logger.info(f"Analyzing file: {file_path} (resolved to: {resolved_file_path})")
-        # Security validation using resolved path
+        # Additional security validation on resolved path
         is_valid, error_msg = self.security_validator.validate_file_path(
             resolved_file_path
         )
         if not is_valid:
             logger.warning(
-                f"Security validation failed for file path: {resolved_file_path} - {error_msg}"
+                f"Security validation failed for resolved path: {resolved_file_path} - {error_msg}"
             )
-            raise ValueError(f"Invalid file path: {error_msg}")
+            raise ValueError(f"Invalid resolved path: {error_msg}")
         # Sanitize inputs
         if language:
@@ -423,6 +431,12 @@ class AnalyzeScaleTool(BaseMCPTool):
                 # Calculate basic file metrics
                 file_metrics = self._calculate_file_metrics(resolved_file_path)
+                # Handle JSON files specially - they don't need structural analysis
+                if language == "json":
+                    return self._create_json_file_analysis(
+                        resolved_file_path, file_metrics, include_guidance
+                    )
                 # Use appropriate analyzer based on language
                 if language == "java":
                     # Use AdvancedAnalyzer for comprehensive analysis
@@ -472,6 +486,7 @@ class AnalyzeScaleTool(BaseMCPTool):
                 # Build enhanced result structure
                 result = {
+                    "success": True,
                     "file_path": file_path,
                     "language": language,
                     "file_metrics": file_metrics,
@@ -688,6 +703,56 @@ class AnalyzeScaleTool(BaseMCPTool):
         return True
+    def _create_json_file_analysis(
+        self, file_path: str, file_metrics: dict[str, Any], include_guidance: bool
+    ) -> dict[str, Any]:
+        """
+        Create analysis result for JSON files.
+        Args:
+            file_path: Path to the JSON file
+            file_metrics: Basic file metrics
+            include_guidance: Whether to include guidance
+        Returns:
+            Analysis result for JSON file
+        """
+        result = {
+            "success": True,
+            "file_path": file_path,
+            "language": "json",
+            "file_size_bytes": file_metrics["file_size_bytes"],
+            "total_lines": file_metrics["total_lines"],
+            "non_empty_lines": file_metrics["total_lines"] - file_metrics["blank_lines"],
+            "estimated_tokens": file_metrics["estimated_tokens"],
+            "complexity_metrics": {
+                "total_elements": 0,
+                "max_depth": 0,
+                "avg_complexity": 0.0,
+            },
+            "structural_overview": {
+                "classes": [],
+                "methods": [],
+                "fields": [],
+            },
+            "scale_category": "small" if file_metrics["total_lines"] < 100 else "medium" if file_metrics["total_lines"] < 1000 else "large",
+            "analysis_recommendations": {
+                "suitable_for_full_analysis": file_metrics["total_lines"] < 1000,
+                "recommended_approach": "JSON files are configuration/data files - structural analysis not applicable",
+                "token_efficiency_notes": "JSON files can be read directly without tree-sitter parsing",
+            },
+        }
+        if include_guidance:
+            result["llm_analysis_guidance"] = {
+                "file_characteristics": "JSON configuration/data file",
+                "recommended_workflow": "Direct file reading for content analysis",
+                "token_optimization": "Use simple file reading tools for JSON content",
+                "analysis_focus": "Data structure and configuration values",
+            }
+        return result
     def get_tool_definition(self) -> dict[str, Any]:
         """
         Get the MCP tool definition for check_code_scale.

tree_sitter_analyzer/mcp/tools/fd_rg_utils.py CHANGED Viewed

@@ -11,6 +11,7 @@ from __future__ import annotations
 import asyncio
 import json
 import os
+import shutil
 import tempfile
 from dataclasses import dataclass
 from pathlib import Path
@@ -27,6 +28,21 @@ DEFAULT_RG_TIMEOUT_MS = 4000
 RG_TIMEOUT_HARD_CAP_MS = 30000
+def check_external_command(command: str) -> bool:
+    """Check if an external command is available in the system PATH."""
+    return shutil.which(command) is not None
+def get_missing_commands() -> list[str]:
+    """Get list of missing external commands required by fd/rg tools."""
+    missing = []
+    if not check_external_command("fd"):
+        missing.append("fd")
+    if not check_external_command("rg"):
+        missing.append("rg")
+    return missing
 def clamp_int(value: int | None, default_value: int, hard_cap: int) -> int:
     if value is None:
         return default_value
@@ -64,13 +80,22 @@ async def run_command_capture(
     Returns (returncode, stdout, stderr). On timeout, kills process and returns 124.
     Separated into a util for easy monkeypatching in tests.
     """
-    # Create process
-    proc = await asyncio.create_subprocess_exec(
-        *cmd,
-        stdin=asyncio.subprocess.PIPE if input_data is not None else None,
-        stdout=asyncio.subprocess.PIPE,
-        stderr=asyncio.subprocess.PIPE,
-    )
+    # Check if command exists before attempting to run
+    if cmd and not check_external_command(cmd[0]):
+        error_msg = f"Command '{cmd[0]}' not found in PATH. Please install {cmd[0]} to use this functionality."
+        return 127, b"", error_msg.encode()
+    try:
+        # Create process
+        proc = await asyncio.create_subprocess_exec(
+            *cmd,
+            stdin=asyncio.subprocess.PIPE if input_data is not None else None,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+    except FileNotFoundError as e:
+        error_msg = f"Command '{cmd[0]}' not found: {e}"
+        return 127, b"", error_msg.encode()
     # Compute timeout seconds
     timeout_s: float | None = None

tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py CHANGED Viewed

@@ -239,6 +239,16 @@ class FindAndGrepTool(BaseMCPTool):
     @handle_mcp_errors("find_and_grep")
     async def execute(self, arguments: dict[str, Any]) -> dict[str, Any]:
+        # Check if both fd and rg commands are available
+        missing_commands = fd_rg_utils.get_missing_commands()
+        if missing_commands:
+            return {
+                "success": False,
+                "error": f"Required commands not found: {', '.join(missing_commands)}. Please install fd (https://github.com/sharkdp/fd) and ripgrep (https://github.com/BurntSushi/ripgrep) to use this tool.",
+                "count": 0,
+                "results": []
+            }
         self.validate_arguments(arguments)
         roots = self._validate_roots(arguments["roots"])  # absolute validated

tree-sitter-analyzer 1.7.4__py3-none-any.whl → 1.7.7__py3-none-any.whl

Potentially problematic release.

tree-sitter-analyzer 1.7.4py3-none-any.whl → 1.7.7py3-none-any.whl