PyPI - tree-sitter-analyzer - Versions diffs - 1.6.1.2__py3-none-any.whl → 1.6.1.4__py3-none-any.whl - Mend

tree-sitter-analyzer 1.6.1.2py3-none-any.whl → 1.6.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (17) hide show

tree_sitter_analyzer/logging_manager.py ADDED Viewed

@@ -0,0 +1,361 @@
+#!/usr/bin/env python3
+"""
+統一ログ管理システム
+ログ出力の重複問題を解決するためのLoggerManagerクラスを提供します。
+シングルトンパターンによりロガーインスタンスを一意に管理し、
+重複ハンドラーを防止します。
+"""
+import logging
+import os
+import sys
+import tempfile
+import threading
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+class LoggerManager:
+    """
+    統一されたロガー管理クラス
+    シングルトンパターンでロガーインスタンスを管理し、
+    重複ハンドラーを防止する。
+    """
+    _instance: Optional['LoggerManager'] = None
+    _lock: threading.Lock = threading.Lock()
+    _loggers: Dict[str, logging.Logger] = {}
+    _handler_registry: Dict[str, List[str]] = {}
+    _initialized: bool = False
+    _file_log_message_shown: bool = False
+    def __new__(cls) -> 'LoggerManager':
+        """スレッドセーフなシングルトン実装"""
+        if cls._instance is None:
+            with cls._lock:
+                if cls._instance is None:
+                    cls._instance = super().__new__(cls)
+        return cls._instance
+    def __init__(self) -> None:
+        """初期化（シングルトンのため一度のみ実行）"""
+        if not self._initialized:
+            with self._lock:
+                if not self._initialized:
+                    self._loggers = {}
+                    self._handler_registry = {}
+                    self._initialized = True
+    def get_logger(
+        self,
+        name: str = "tree_sitter_analyzer",
+        level: int | str = logging.WARNING
+    ) -> logging.Logger:
+        """
+        重複を防ぐロガー取得
+        Args:
+            name: ロガー名
+            level: ログレベル
+        Returns:
+            設定済みロガーインスタンス
+        """
+        with self._lock:
+            if name not in self._loggers:
+                self._loggers[name] = self._create_logger(name, level)
+            else:
+                # 既存のロガーでもレベルを更新
+                numeric_level = self._convert_level(level)
+                # 環境変数からのレベル設定が優先
+                env_level = os.environ.get("LOG_LEVEL", "").upper()
+                if env_level and env_level in ["DEBUG", "INFO", "WARNING", "ERROR"]:
+                    numeric_level = getattr(logging, env_level)
+                self._loggers[name].setLevel(numeric_level)
+            return self._loggers[name]
+    def _create_logger(self, name: str, level: int | str) -> logging.Logger:
+        """
+        ロガー作成とハンドラー設定
+        Args:
+            name: ロガー名
+            level: ログレベル
+        Returns:
+            設定済みロガーインスタンス
+        """
+        # レベル変換処理
+        numeric_level = self._convert_level(level)
+        # 環境変数からのレベル設定
+        env_level = os.environ.get("LOG_LEVEL", "").upper()
+        if env_level and env_level in ["DEBUG", "INFO", "WARNING", "ERROR"]:
+            numeric_level = getattr(logging, env_level)
+        logger = logging.getLogger(name)
+        # 重複ハンドラーチェック
+        if not self._has_required_handlers(logger, name):
+            self._setup_handlers(logger, name, numeric_level)
+        # ロガーレベル設定
+        logger.setLevel(numeric_level)
+        return logger
+    def _convert_level(self, level: int | str) -> int:
+        """ログレベル文字列を数値に変換"""
+        if isinstance(level, str):
+            level_upper = level.upper()
+            level_map = {
+                "DEBUG": logging.DEBUG,
+                "INFO": logging.INFO,
+                "WARNING": logging.WARNING,
+                "ERROR": logging.ERROR,
+            }
+            return level_map.get(level_upper, logging.WARNING)
+        return level
+    def _has_required_handlers(self, logger: logging.Logger, name: str) -> bool:
+        """
+        必要なハンドラーが既に設定されているかチェック
+        Args:
+            logger: チェック対象ロガー
+            name: ロガー名
+        Returns:
+            必要なハンドラーが設定済みの場合True
+        """
+        if name in self._handler_registry:
+            # 既に管理されているロガーの場合は設定済みとみなす
+            return True
+        # 既存ハンドラーの有無をチェック
+        has_stream_handler = any(
+            isinstance(h, logging.StreamHandler) and not isinstance(h, logging.FileHandler)
+            for h in logger.handlers
+        )
+        if has_stream_handler:
+            # ハンドラー登録を記録
+            handler_types = [type(h).__name__ for h in logger.handlers]
+            self._handler_registry[name] = handler_types
+            return True
+        return False
+    def _setup_handlers(self, logger: logging.Logger, name: str, level: int) -> None:
+        """
+        ロガーにハンドラーを設定
+        Args:
+            logger: 設定対象ロガー
+            name: ロガー名
+            level: ログレベル
+        """
+        # メインハンドラー（stderr）の追加
+        if not self._has_stream_handler(logger):
+            stream_handler = SafeStreamHandler()
+            formatter = logging.Formatter(
+                "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+            )
+            stream_handler.setFormatter(formatter)
+            logger.addHandler(stream_handler)
+        # ファイルログハンドラーの追加（環境変数で有効化）
+        enable_file_log = (
+            os.environ.get("TREE_SITTER_ANALYZER_ENABLE_FILE_LOG", "").lower() == "true"
+        )
+        if enable_file_log and not self._has_file_handler(logger):
+            file_handler = self._create_file_handler(level)
+            if file_handler:
+                logger.addHandler(file_handler)
+        # ハンドラー登録を記録
+        handler_types = [type(h).__name__ for h in logger.handlers]
+        self._handler_registry[name] = handler_types
+    def _has_stream_handler(self, logger: logging.Logger) -> bool:
+        """StreamHandlerの存在チェック"""
+        return any(
+            isinstance(h, logging.StreamHandler) and not isinstance(h, logging.FileHandler)
+            for h in logger.handlers
+        )
+    def _has_file_handler(self, logger: logging.Logger) -> bool:
+        """FileHandlerの存在チェック"""
+        return any(isinstance(h, logging.FileHandler) for h in logger.handlers)
+    def _create_file_handler(self, level: int) -> Optional[logging.FileHandler]:
+        """
+        ファイルハンドラーの作成
+        Args:
+            level: ログレベル
+        Returns:
+            作成されたFileHandlerまたはNone
+        """
+        try:
+            # ログディレクトリの決定
+            log_dir = os.environ.get("TREE_SITTER_ANALYZER_LOG_DIR")
+            if log_dir:
+                log_path = Path(log_dir) / "tree_sitter_analyzer.log"
+                Path(log_dir).mkdir(parents=True, exist_ok=True)
+            else:
+                temp_dir = tempfile.gettempdir()
+                log_path = Path(temp_dir) / "tree_sitter_analyzer.log"
+            # ファイルログレベルの決定
+            file_log_level_str = os.environ.get(
+                "TREE_SITTER_ANALYZER_FILE_LOG_LEVEL", ""
+            ).upper()
+            file_log_level = level  # デフォルトはメインレベル
+            if file_log_level_str in ["DEBUG", "INFO", "WARNING", "ERROR"]:
+                file_log_level = getattr(logging, file_log_level_str)
+            # ファイルハンドラー作成
+            file_handler = logging.FileHandler(str(log_path), encoding="utf-8")
+            formatter = logging.Formatter(
+                "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+            )
+            file_handler.setFormatter(formatter)
+            file_handler.setLevel(file_log_level)
+            # ファイルパス情報を出力（1回のみ）
+            if not LoggerManager._file_log_message_shown:
+                LoggerManager._file_log_message_shown = True
+                if hasattr(sys, "stderr") and hasattr(sys.stderr, "write"):
+                    try:
+                        sys.stderr.write(
+                            f"[LoggerManager] File logging enabled: {log_path}\n"
+                        )
+                    except Exception:
+                        pass
+            return file_handler
+        except Exception as e:
+            # ファイルハンドラー作成に失敗してもメインの動作は継続
+            if hasattr(sys, "stderr") and hasattr(sys.stderr, "write"):
+                try:
+                    sys.stderr.write(
+                        f"[LoggerManager] File handler creation failed: {e}\n"
+                    )
+                except Exception:
+                    pass
+            return None
+    def reset_for_testing(self) -> None:
+        """
+        テスト用リセット機能
+        Note:
+            本番環境では使用しないこと
+        """
+        with self._lock:
+            # 全ハンドラーのクリーンアップ
+            for logger in self._loggers.values():
+                for handler in logger.handlers[:]:
+                    try:
+                        handler.close()
+                        logger.removeHandler(handler)
+                    except Exception:
+                        pass
+            self._loggers.clear()
+            self._handler_registry.clear()
+            LoggerManager._file_log_message_shown = False
+class SafeStreamHandler(logging.StreamHandler):
+    """
+    安全なStreamHandler実装
+    MCPプロトコルのstdio通信やテスト環境での
+    ストリームクローズ問題に対応。
+    """
+    def __init__(self, stream=None):
+        # デフォルトでstderrを使用（stdoutはMCP用に保持）
+        super().__init__(stream if stream is not None else sys.stderr)
+    def emit(self, record: Any) -> None:
+        """
+        レコードの安全な出力
+        Args:
+            record: ログレコード
+        """
+        try:
+            # ストリームの状態チェック
+            if hasattr(self.stream, "closed") and self.stream.closed:
+                return
+            if not hasattr(self.stream, "write"):
+                return
+            # pytest環境での特別処理
+            stream_name = getattr(self.stream, "name", "")
+            if stream_name is None or "pytest" in str(type(self.stream)).lower():
+                try:
+                    super().emit(record)
+                    return
+                except (ValueError, OSError, AttributeError, UnicodeError):
+                    return
+            # 通常のストリーム書き込み可能性チェック
+            try:
+                if hasattr(self.stream, "writable") and not self.stream.writable():
+                    return
+            except (ValueError, OSError, AttributeError, UnicodeError):
+                return
+            super().emit(record)
+        except (ValueError, OSError, AttributeError, UnicodeError):
+            # I/Oエラーは静かに無視（シャットダウン時やpytestキャプチャ時）
+            pass
+        except Exception:
+            # その他の予期しないエラーも静かに無視
+            pass
+# グローバルインスタンス
+_logger_manager = LoggerManager()
+def get_logger_manager() -> LoggerManager:
+    """
+    LoggerManagerのグローバルインスタンス取得
+    Returns:
+        LoggerManagerインスタンス
+    """
+    return _logger_manager
+def get_unified_logger(
+    name: str = "tree_sitter_analyzer",
+    level: int | str = logging.WARNING
+) -> logging.Logger:
+    """
+    統一されたロガー取得関数
+    Args:
+        name: ロガー名
+        level: ログレベル
+    Returns:
+        設定済みロガーインスタンス
+    """
+    return _logger_manager.get_logger(name, level)

tree_sitter_analyzer/mcp/server.py CHANGED Viewed

@@ -67,7 +67,7 @@ from .tools.read_partial_tool import ReadPartialTool
 from .tools.search_content_tool import SearchContentTool
 from .tools.table_format_tool import TableFormatTool
-# Set up logging
+# Set up logging using unified LoggerManager
 logger = setup_logger(__name__)

tree_sitter_analyzer/mcp/tools/output_format_validator.py ADDED Viewed

@@ -0,0 +1,147 @@
+#!/usr/bin/env python3
+"""
+Output format parameter validation for search_content tool.
+Ensures mutual exclusion of output format parameters to prevent conflicts.
+"""
+import locale
+import os
+from typing import Any
+class OutputFormatValidator:
+    """Validator for output format parameters mutual exclusion."""
+    # Output format parameters that are mutually exclusive
+    OUTPUT_FORMAT_PARAMS = {
+        "total_only",
+        "count_only_matches",
+        "summary_only",
+        "group_by_file",
+        "optimize_paths"
+    }
+    # Token efficiency guidance for error messages
+    FORMAT_EFFICIENCY_GUIDE = {
+        "total_only": "~10 tokens (most efficient for count queries)",
+        "count_only_matches": "~50-200 tokens (file distribution analysis)",
+        "summary_only": "~500-2000 tokens (initial investigation)",
+        "group_by_file": "~2000-10000 tokens (context-aware review)",
+        "optimize_paths": "10-30% reduction (path compression)"
+    }
+    def _detect_language(self) -> str:
+        """Detect preferred language from environment."""
+        # Check environment variables for language preference
+        lang = os.environ.get('LANG', '')
+        if lang.startswith('ja'):
+            return 'ja'
+        # Check locale
+        try:
+            current_locale = locale.getlocale()[0]
+            if current_locale and current_locale.startswith('ja'):
+                return 'ja'
+        except Exception:
+            pass
+        # Default to English
+        return 'en'
+    def _get_error_message(self, specified_formats: list[str]) -> str:
+        """Generate localized error message with usage examples."""
+        lang = self._detect_language()
+        format_list = ", ".join(specified_formats)
+        if lang == 'ja':
+            # Japanese error message
+            base_message = (
+                f"⚠️ 出力形式パラメータエラー: 複数指定できません: {format_list}\n\n"
+                f"📋 排他的パラメータ: {', '.join(self.OUTPUT_FORMAT_PARAMS)}\n\n"
+                f"💡 効率性ガイド:\n"
+            )
+            for param, desc in self.FORMAT_EFFICIENCY_GUIDE.items():
+                base_message += f"  • {param}: {desc}\n"
+            base_message += (
+                "\n✅ 推奨パターン:\n"
+                "  • 件数確認: total_only=true\n"
+                "  • ファイル分布: count_only_matches=true\n"
+                "  • 初期調査: summary_only=true\n"
+                "  • 詳細レビュー: group_by_file=true\n"
+                "  • パス最適化: optimize_paths=true\n\n"
+                "❌ 間違った例: {\"total_only\": true, \"summary_only\": true}\n"
+                "✅ 正しい例: {\"total_only\": true}"
+            )
+        else:
+            # English error message
+            base_message = (
+                f"⚠️ Output Format Parameter Error: Multiple formats specified: {format_list}\n\n"
+                f"📋 Mutually Exclusive Parameters: {', '.join(self.OUTPUT_FORMAT_PARAMS)}\n\n"
+                f"💡 Token Efficiency Guide:\n"
+            )
+            for param, desc in self.FORMAT_EFFICIENCY_GUIDE.items():
+                base_message += f"  • {param}: {desc}\n"
+            base_message += (
+                "\n✅ Recommended Usage Patterns:\n"
+                "  • Count validation: total_only=true\n"
+                "  • File distribution: count_only_matches=true\n"
+                "  • Initial investigation: summary_only=true\n"
+                "  • Detailed review: group_by_file=true\n"
+                "  • Path optimization: optimize_paths=true\n\n"
+                "❌ Incorrect: {\"total_only\": true, \"summary_only\": true}\n"
+                "✅ Correct: {\"total_only\": true}"
+            )
+        return base_message
+    def validate_output_format_exclusion(self, arguments: dict[str, Any]) -> None:
+        """
+        Validate that only one output format parameter is specified.
+        Args:
+            arguments: Tool arguments dictionary
+        Raises:
+            ValueError: If multiple output format parameters are specified
+        """
+        specified_formats = []
+        for param in self.OUTPUT_FORMAT_PARAMS:
+            if arguments.get(param, False):
+                specified_formats.append(param)
+        if len(specified_formats) > 1:
+            error_message = self._get_error_message(specified_formats)
+            raise ValueError(error_message)
+    def get_active_format(self, arguments: dict[str, Any]) -> str:
+        """
+        Get the active output format from arguments.
+        Args:
+            arguments: Tool arguments dictionary
+        Returns:
+            Active format name or "normal" if none specified
+        """
+        for param in self.OUTPUT_FORMAT_PARAMS:
+            if arguments.get(param, False):
+                return param
+        return "normal"
+# Global validator instance
+_default_validator = None
+def get_default_validator() -> OutputFormatValidator:
+    """Get the default output format validator instance."""
+    global _default_validator
+    if _default_validator is None:
+        _default_validator = OutputFormatValidator()
+    return _default_validator

tree_sitter_analyzer/mcp/tools/search_content_tool.py CHANGED Viewed

@@ -17,6 +17,7 @@ from ..utils.gitignore_detector import get_default_detector
 from ..utils.search_cache import get_default_cache
 from . import fd_rg_utils
 from .base_tool import BaseMCPTool
+from .output_format_validator import get_default_validator
 logger = logging.getLogger(__name__)
@@ -40,7 +41,26 @@ class SearchContentTool(BaseMCPTool):
     def get_tool_definition(self) -> dict[str, Any]:
         return {
             "name": "search_content",
-            "description": "Search text content inside files using ripgrep. Supports regex patterns, case sensitivity, context lines, and various output formats. Can search in directories or specific files.",
+            "description": """Search text content inside files using ripgrep. Supports regex patterns, case sensitivity, context lines, and various output formats. Can search in directories or specific files.
+⚠️ IMPORTANT: Token Efficiency Guide
+Choose output format parameters based on your needs to minimize token usage and maximize performance with efficient search strategies:
+🎯 RECOMMENDED WORKFLOW (Most Efficient Approach):
+1. START with total_only=true parameter for initial count validation (~10 tokens)
+2. IF more detail needed, use count_only_matches=true parameter for file distribution (~50-200 tokens)
+3. IF context needed, use summary_only=true parameter for overview (~500-2000 tokens)
+4. ONLY use full results when specific content review is required (~2000-50000+ tokens)
+💡 TOKEN EFFICIENCY COMPARISON:
+- total_only: ~10 tokens (single number) - MOST EFFICIENT for count queries
+- count_only_matches: ~50-200 tokens (file counts) - Good for file distribution analysis
+- summary_only: ~500-2000 tokens (condensed overview) - initial investigation
+- group_by_file: ~2000-10000 tokens (organized by file) - Context-aware review
+- optimize_paths: 10-30% reduction (path compression) - Use with deep directory structures
+- Full results: ~2000-50000+ tokens - Use sparingly for detailed analysis
+⚠️ MUTUALLY EXCLUSIVE: Only one output format parameter can be true at a time. Cannot be combined with other format parameters.""",
             "inputSchema": {
                 "type": "object",
                 "properties": {
@@ -131,27 +151,27 @@ class SearchContentTool(BaseMCPTool):
                     "count_only_matches": {
                         "type": "boolean",
                         "default": False,
-                        "description": "Return only match counts per file instead of full match details. Useful for statistics and performance",
+                        "description": "⚠️ EXCLUSIVE: Return only match counts per file (~50-200 tokens). RECOMMENDED for: File distribution analysis, understanding match spread across files. Cannot be combined with other output formats.",
                     },
                     "summary_only": {
                         "type": "boolean",
                         "default": False,
-                        "description": "Return a condensed summary of results to reduce context size. Shows top files and sample matches",
+                        "description": "⚠️ EXCLUSIVE: Return condensed overview with top files and sample matches (~500-2000 tokens). RECOMMENDED for: Initial investigation, scope confirmation, pattern validation. Cannot be combined with other output formats.",
                     },
                     "optimize_paths": {
                         "type": "boolean",
                         "default": False,
-                        "description": "Optimize file paths in results by removing common prefixes and shortening long paths. Saves tokens in output",
+                        "description": "⚠️ EXCLUSIVE: Optimize file paths by removing common prefixes (10-30% token reduction). RECOMMENDED for: Deep directory structures, large codebases. Cannot be combined with other output formats.",
                     },
                     "group_by_file": {
                         "type": "boolean",
                         "default": False,
-                        "description": "Group results by file to eliminate file path duplication when multiple matches exist in the same file. Significantly reduces tokens",
+                        "description": "⚠️ EXCLUSIVE: Group results by file, eliminating path duplication (~2000-10000 tokens). RECOMMENDED for: Context-aware review, analyzing matches within specific files. Cannot be combined with other output formats.",
                     },
                     "total_only": {
                         "type": "boolean",
                         "default": False,
-                        "description": "Return only the total match count as a number. Most token-efficient option for count queries. Takes priority over all other formats",
+                        "description": "⚠️ EXCLUSIVE: Return only total match count as single number (~10 tokens - MOST EFFICIENT). RECOMMENDED for: Count validation, filtering decisions, existence checks. Takes priority over all other formats. Cannot be combined with other output formats.",
                     },
                 },
                 "required": ["query"],
@@ -214,6 +234,9 @@ class SearchContentTool(BaseMCPTool):
             "no_ignore",
             "count_only_matches",
             "summary_only",
+            "total_only",
+            "group_by_file",
+            "optimize_paths",
         ]:
             if key in arguments and not isinstance(arguments[key], bool):
                 raise ValueError(f"{key} must be a boolean")
@@ -226,6 +249,10 @@ class SearchContentTool(BaseMCPTool):
                 if not isinstance(v, list) or not all(isinstance(x, str) for x in v):
                     raise ValueError(f"{key} must be an array of strings")
+        # Validate output format parameter exclusion
+        validator = get_default_validator()
+        validator.validate_output_format_exclusion(arguments)
         # Validate roots and files if provided
         if "roots" in arguments:
             self._validate_roots(arguments["roots"])
@@ -310,13 +337,19 @@ class SearchContentTool(BaseMCPTool):
                 if isinstance(cached_result, dict):
                     cached_result = cached_result.copy()
                     cached_result["cache_hit"] = True
-                return cached_result
+                    return cached_result
+                elif isinstance(cached_result, int):
+                    # Handle int results (for total_only)
+                    return cached_result
+                else:
+                    # Convert other types to dict format for type safety
+                    return {"success": True, "cache_hit": True, "value": cached_result}
         # Clamp counts to safety limits
         max_count = fd_rg_utils.clamp_int(
             arguments.get("max_count"),
             fd_rg_utils.DEFAULT_RESULTS_LIMIT,
-            fd_rg_utils.DEFAULT_RESULTS_LIMIT,
+            fd_rg_utils.MAX_RESULTS_HARD_CAP,
         )
         timeout_ms = arguments.get("timeout_ms")

tree-sitter-analyzer 1.6.1.2__py3-none-any.whl → 1.6.1.4__py3-none-any.whl

Potentially problematic release.

tree-sitter-analyzer 1.6.1.2py3-none-any.whl → 1.6.1.4py3-none-any.whl