PyPI - tree-sitter-analyzer - Versions diffs - 1.8.4__py3-none-any.whl → 1.9.0__py3-none-any.whl - Mend

tree-sitter-analyzer 1.8.4py3-none-any.whl → 1.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (64) hide show

tree_sitter_analyzer/__init__.py +1 -1
tree_sitter_analyzer/api.py +4 -4
tree_sitter_analyzer/cli/argument_validator.py +29 -17
tree_sitter_analyzer/cli/commands/advanced_command.py +7 -5
tree_sitter_analyzer/cli/commands/structure_command.py +7 -5
tree_sitter_analyzer/cli/commands/summary_command.py +10 -6
tree_sitter_analyzer/cli/commands/table_command.py +8 -7
tree_sitter_analyzer/cli/info_commands.py +1 -1
tree_sitter_analyzer/cli_main.py +3 -2
tree_sitter_analyzer/core/analysis_engine.py +5 -5
tree_sitter_analyzer/core/cache_service.py +3 -1
tree_sitter_analyzer/core/query.py +17 -5
tree_sitter_analyzer/core/query_service.py +1 -1
tree_sitter_analyzer/encoding_utils.py +3 -3
tree_sitter_analyzer/exceptions.py +61 -50
tree_sitter_analyzer/file_handler.py +3 -0
tree_sitter_analyzer/formatters/base_formatter.py +10 -5
tree_sitter_analyzer/formatters/formatter_registry.py +83 -68
tree_sitter_analyzer/formatters/html_formatter.py +90 -54
tree_sitter_analyzer/formatters/javascript_formatter.py +21 -16
tree_sitter_analyzer/formatters/language_formatter_factory.py +7 -6
tree_sitter_analyzer/formatters/markdown_formatter.py +247 -124
tree_sitter_analyzer/formatters/python_formatter.py +61 -38
tree_sitter_analyzer/formatters/typescript_formatter.py +113 -45
tree_sitter_analyzer/interfaces/mcp_server.py +2 -2
tree_sitter_analyzer/language_detector.py +6 -6
tree_sitter_analyzer/language_loader.py +3 -1
tree_sitter_analyzer/languages/css_plugin.py +120 -61
tree_sitter_analyzer/languages/html_plugin.py +159 -62
tree_sitter_analyzer/languages/java_plugin.py +42 -34
tree_sitter_analyzer/languages/javascript_plugin.py +59 -30
tree_sitter_analyzer/languages/markdown_plugin.py +402 -368
tree_sitter_analyzer/languages/python_plugin.py +111 -64
tree_sitter_analyzer/languages/typescript_plugin.py +241 -132
tree_sitter_analyzer/mcp/server.py +22 -18
tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +13 -8
tree_sitter_analyzer/mcp/tools/base_tool.py +2 -2
tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +232 -26
tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +31 -23
tree_sitter_analyzer/mcp/tools/list_files_tool.py +21 -19
tree_sitter_analyzer/mcp/tools/query_tool.py +17 -18
tree_sitter_analyzer/mcp/tools/read_partial_tool.py +30 -31
tree_sitter_analyzer/mcp/tools/search_content_tool.py +131 -77
tree_sitter_analyzer/mcp/tools/table_format_tool.py +29 -16
tree_sitter_analyzer/mcp/utils/file_output_factory.py +64 -51
tree_sitter_analyzer/mcp/utils/file_output_manager.py +34 -24
tree_sitter_analyzer/mcp/utils/gitignore_detector.py +8 -4
tree_sitter_analyzer/models.py +7 -5
tree_sitter_analyzer/plugins/base.py +9 -7
tree_sitter_analyzer/plugins/manager.py +1 -0
tree_sitter_analyzer/queries/css.py +2 -21
tree_sitter_analyzer/queries/html.py +2 -15
tree_sitter_analyzer/queries/markdown.py +30 -41
tree_sitter_analyzer/queries/python.py +20 -5
tree_sitter_analyzer/query_loader.py +5 -5
tree_sitter_analyzer/security/validator.py +114 -86
tree_sitter_analyzer/utils/__init__.py +58 -28
tree_sitter_analyzer/utils/tree_sitter_compat.py +72 -65
tree_sitter_analyzer/utils.py +26 -15
{tree_sitter_analyzer-1.8.4.dist-info → tree_sitter_analyzer-1.9.0.dist-info}/METADATA +1 -1
tree_sitter_analyzer-1.9.0.dist-info/RECORD +109 -0
tree_sitter_analyzer-1.8.4.dist-info/RECORD +0 -109
{tree_sitter_analyzer-1.8.4.dist-info → tree_sitter_analyzer-1.9.0.dist-info}/WHEEL +0 -0
{tree_sitter_analyzer-1.8.4.dist-info → tree_sitter_analyzer-1.9.0.dist-info}/entry_points.txt +0 -0

tree_sitter_analyzer/mcp/server.py CHANGED Viewed

@@ -29,7 +29,7 @@ except ImportError:
         pass
     class InitializationOptions:
-        def __init__(self, **kwargs):
+        def __init__(self, **kwargs: Any) -> None:
             pass
     class Tool:
@@ -41,7 +41,7 @@ except ImportError:
     class TextContent:
         pass
-    def stdio_server():
+    def stdio_server() -> None:
         pass
@@ -71,7 +71,7 @@ from .tools.table_format_tool import TableFormatTool
 try:
     from .tools.universal_analyze_tool import UniversalAnalyzeTool
 except ImportError:
-    UniversalAnalyzeTool = None
+    UniversalAnalyzeTool: type[Any] | None = None
 # Set up logging
 logger = setup_logger(__name__)
@@ -85,7 +85,7 @@ class TreeSitterAnalyzerMCPServer:
     integrating with existing analyzer components.
     """
-    def __init__(self, project_root: str = None) -> None:
+    def __init__(self, project_root: str | None = None) -> None:
         """Initialize the MCP server with analyzer components."""
         self.server: Server | None = None
         self._initialization_complete = False
@@ -116,9 +116,9 @@ class TreeSitterAnalyzerMCPServer:
             try:
                 self.universal_analyze_tool = UniversalAnalyzeTool(project_root)
             except Exception:
-                self.universal_analyze_tool = None
+                self.universal_analyze_tool: Any = None
         else:
-            self.universal_analyze_tool = None
+            self.universal_analyze_tool: Any = None
         # Initialize MCP resources
         self.code_file_resource = CodeFileResource()
@@ -132,7 +132,9 @@ class TreeSitterAnalyzerMCPServer:
         self._initialization_complete = True
         try:
-            logger.info(f"MCP server initialization complete: {self.name} v{self.version}")
+            logger.info(
+                f"MCP server initialization complete: {self.name} v{self.version}"
+            )
         except Exception:
             # Gracefully handle logging failures during initialization
             pass
@@ -215,7 +217,9 @@ class TreeSitterAnalyzerMCPServer:
         if analysis_result is None or not analysis_result.success:
             error_msg = (
-                analysis_result.error_message if analysis_result else "Unknown error"
+                analysis_result.error_message or "Unknown error"
+                if analysis_result
+                else "Unknown error"
             )
             raise RuntimeError(f"Failed to analyze file: {file_path} - {error_msg}")
@@ -293,7 +297,7 @@ class TreeSitterAnalyzerMCPServer:
                 if hasattr(elem, "__dict__"):
                     detailed_elements.append(elem.__dict__)
                 else:
-                    detailed_elements.append(str(elem))
+                    detailed_elements.append({"element": str(elem)})
             result["detailed_elements"] = detailed_elements
         return result
@@ -301,24 +305,24 @@ class TreeSitterAnalyzerMCPServer:
     async def _read_resource(self, uri: str) -> dict[str, Any]:
         """
         Read a resource by URI.
         Args:
             uri: Resource URI to read
         Returns:
             Resource content
         Raises:
             ValueError: If URI is invalid or resource not found
         """
         if uri.startswith("code://file/"):
             # Extract file path from URI
-            file_path = uri.replace("code://file/", "")
-            return await self.code_file_resource.read_resource(uri)
+            result = await self.code_file_resource.read_resource(uri)
+            return {"content": result}
         elif uri.startswith("code://stats/"):
             # Extract stats type from URI
-            stats_type = uri.replace("code://stats/", "")
-            return await self.project_stats_resource.read_resource(uri)
+            result = await self.project_stats_resource.read_resource(uri)
+            return {"content": result}
         else:
             raise ValueError(f"Unknown resource URI: {uri}")
@@ -727,7 +731,7 @@ class TreeSitterAnalyzerMCPServer:
                 pass  # Silently ignore logging errors during shutdown
-def parse_mcp_args(args=None) -> argparse.Namespace:
+def parse_mcp_args(args: list[str] | None = None) -> argparse.Namespace:
     """Parse command line arguments for MCP server."""
     parser = argparse.ArgumentParser(
         description="Tree-sitter Analyzer MCP Server",
@@ -798,7 +802,7 @@ async def main() -> None:
         server = TreeSitterAnalyzerMCPServer(project_root)
         await server.run()
         # Exit successfully after server run completes
         sys.exit(0)
     except KeyboardInterrupt:

tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py CHANGED Viewed

@@ -36,7 +36,7 @@ class AnalyzeScaleTool(BaseMCPTool):
     for LLM workflow efficiency.
     """
-    def __init__(self, project_root: str = None) -> None:
+    def __init__(self, project_root: str | None = None) -> None:
         """Initialize the analyze scale tool."""
         # Use unified analysis engine instead of deprecated AdvancedAnalyzer
         super().__init__(project_root)
@@ -464,7 +464,7 @@ class AnalyzeScaleTool(BaseMCPTool):
                     universal_result = await self.analysis_engine.analyze(request)
                     if not universal_result or not universal_result.success:
                         error_msg = (
-                            universal_result.error_message
+                            universal_result.error_message or "Unknown error"
                             if universal_result
                             else "Unknown error"
                         )
@@ -708,12 +708,12 @@ class AnalyzeScaleTool(BaseMCPTool):
     ) -> dict[str, Any]:
         """
         Create analysis result for JSON files.
         Args:
             file_path: Path to the JSON file
             file_metrics: Basic file metrics
             include_guidance: Whether to include guidance
         Returns:
             Analysis result for JSON file
         """
@@ -723,7 +723,8 @@ class AnalyzeScaleTool(BaseMCPTool):
             "language": "json",
             "file_size_bytes": file_metrics["file_size_bytes"],
             "total_lines": file_metrics["total_lines"],
-            "non_empty_lines": file_metrics["total_lines"] - file_metrics["blank_lines"],
+            "non_empty_lines": file_metrics["total_lines"]
+            - file_metrics["blank_lines"],
             "estimated_tokens": file_metrics["estimated_tokens"],
             "complexity_metrics": {
                 "total_elements": 0,
@@ -735,14 +736,18 @@ class AnalyzeScaleTool(BaseMCPTool):
                 "methods": [],
                 "fields": [],
             },
-            "scale_category": "small" if file_metrics["total_lines"] < 100 else "medium" if file_metrics["total_lines"] < 1000 else "large",
+            "scale_category": "small"
+            if file_metrics["total_lines"] < 100
+            else "medium"
+            if file_metrics["total_lines"] < 1000
+            else "large",
             "analysis_recommendations": {
                 "suitable_for_full_analysis": file_metrics["total_lines"] < 1000,
                 "recommended_approach": "JSON files are configuration/data files - structural analysis not applicable",
                 "token_efficiency_notes": "JSON files can be read directly without tree-sitter parsing",
             },
         }
         if include_guidance:
             result["llm_analysis_guidance"] = {
                 "file_characteristics": "JSON configuration/data file",
@@ -750,7 +755,7 @@ class AnalyzeScaleTool(BaseMCPTool):
                 "token_optimization": "Use simple file reading tools for JSON content",
                 "analysis_focus": "Data structure and configuration values",
             }
         return result
     def get_tool_definition(self) -> dict[str, Any]:

tree_sitter_analyzer/mcp/tools/base_tool.py CHANGED Viewed

@@ -121,7 +121,7 @@ class MCPTool(BaseMCPTool):
         Returns:
             Dictionary containing execution results
         """
-        ...
+        raise NotImplementedError("Subclasses must implement execute method")
     def validate_arguments(self, arguments: dict[str, Any]) -> bool:
         """
@@ -136,4 +136,4 @@ class MCPTool(BaseMCPTool):
         Raises:
             ValueError: If arguments are invalid
         """
-        ...
+        raise NotImplementedError("Subclasses must implement validate_arguments method")

tree_sitter_analyzer/mcp/tools/fd_rg_utils.py CHANGED Viewed

@@ -84,7 +84,7 @@ async def run_command_capture(
     if cmd and not check_external_command(cmd[0]):
         error_msg = f"Command '{cmd[0]}' not found in PATH. Please install {cmd[0]} to use this functionality."
         return 127, b"", error_msg.encode()
     try:
         # Create process
         proc = await asyncio.create_subprocess_exec(
@@ -106,7 +106,7 @@ async def run_command_capture(
         stdout, stderr = await asyncio.wait_for(
             proc.communicate(input=input_data), timeout=timeout_s
         )
-        return proc.returncode, stdout, stderr
+        return proc.returncode or 0, stdout, stderr
     except asyncio.TimeoutError:
         try:
             proc.kill()
@@ -222,7 +222,7 @@ def build_rg_command(
     """Build ripgrep command with JSON output and options."""
     if count_only_matches:
         # Use --count-matches for count-only mode (no JSON output)
-        cmd: list[str] = [
+        cmd = [
             "rg",
             "--count-matches",
             "--no-heading",
@@ -231,7 +231,7 @@ def build_rg_command(
         ]
     else:
         # Use --json for full match details
-        cmd: list[str] = [
+        cmd = [
             "rg",
             "--json",
             "--no-heading",
@@ -286,11 +286,15 @@ def build_rg_command(
     # Normalize filesize
     cmd += ["--max-filesize", normalize_max_filesize(max_filesize)]
-    # Only add timeout if supported (check if timeout_ms is provided and > 0)
-    # Note: --timeout flag may not be available in all ripgrep versions
-    # For now, we'll skip the timeout flag to ensure compatibility
-    # effective_timeout = clamp_int(timeout_ms, DEFAULT_RG_TIMEOUT_MS, RG_TIMEOUT_HARD_CAP_MS)
-    # cmd += ["--timeout", str(effective_timeout)]
+    # Add timeout if provided and > 0 (enable timeout for performance optimization)
+    if timeout_ms is not None and timeout_ms > 0:
+        # effective_timeout = clamp_int(
+        #     timeout_ms, DEFAULT_RG_TIMEOUT_MS, RG_TIMEOUT_HARD_CAP_MS
+        # )  # Commented out as not used yet
+        # Use timeout in milliseconds for better control
+        # Note: We'll handle timeout at the process level instead of ripgrep flag
+        # to ensure compatibility across ripgrep versions
+        pass
     # Query must be last before roots/files
     cmd.append(query)
@@ -307,39 +311,63 @@ def build_rg_command(
 def parse_rg_json_lines_to_matches(stdout_bytes: bytes) -> list[dict[str, Any]]:
     """Parse ripgrep JSON event stream and keep only match events."""
     results: list[dict[str, Any]] = []
-    for raw_line in stdout_bytes.splitlines():
+    lines = stdout_bytes.splitlines()
+    # Batch process lines for better performance
+    for raw_line in lines:
         if not raw_line.strip():
             continue
         try:
-            evt = json.loads(raw_line.decode("utf-8", errors="replace"))
+            # Decode once and parse JSON
+            line_str = raw_line.decode("utf-8", errors="replace")
+            evt = json.loads(line_str)
         except (json.JSONDecodeError, UnicodeDecodeError):  # nosec B112
             continue
+        # Quick type check to skip non-match events
         if evt.get("type") != "match":
             continue
         data = evt.get("data", {})
-        path_text = (data.get("path", {}) or {}).get("text")
+        if not data:
+            continue
+        # Extract data with safe defaults
+        path_data = data.get("path", {})
+        path_text = path_data.get("text") if path_data else None
+        if not path_text:
+            continue
         line_number = data.get("line_number")
-        line_text = (data.get("lines", {}) or {}).get("text")
-        submatches_raw = data.get("submatches", []) or []
-        # Normalize line content to reduce token usage
+        lines_data = data.get("lines", {})
+        line_text = lines_data.get("text") if lines_data else ""
+        # Normalize line content to reduce token usage (optimized)
         normalized_line = " ".join(line_text.split()) if line_text else ""
-        # Simplify submatches - remove redundant match text, keep only positions
+        # Simplify submatches - keep only essential position data
+        submatches_raw = data.get("submatches", [])
         simplified_matches = []
-        for sm in submatches_raw:
-            start = sm.get("start")
-            end = sm.get("end")
-            if start is not None and end is not None:
-                simplified_matches.append([start, end])
+        if submatches_raw:
+            for sm in submatches_raw:
+                start = sm.get("start")
+                end = sm.get("end")
+                if start is not None and end is not None:
+                    simplified_matches.append([start, end])
         results.append(
             {
                 "file": path_text,
-                "line": line_number,  # Shortened field name
-                "text": normalized_line,  # Normalized content
-                "matches": simplified_matches,  # Simplified match positions
+                "line": line_number,
+                "text": normalized_line,
+                "matches": simplified_matches,
             }
         )
+        # Early exit if we have too many results to prevent memory issues
+        if len(results) >= MAX_RESULTS_HARD_CAP:
+            break
     return results
@@ -572,7 +600,9 @@ class TempFileList:
     def __enter__(self) -> TempFileList:
         return self
-    def __exit__(self, exc_type, exc, tb) -> None:
+    def __exit__(
+        self, exc_type: type[BaseException] | None, exc: BaseException | None, tb: Any
+    ) -> None:
         with contextlib.suppress(Exception):
             Path(self.path).unlink(missing_ok=True)
@@ -585,7 +615,12 @@ class contextlib:  # minimal shim for suppress without importing globally
         def __enter__(self) -> None:  # noqa: D401
             return None
-        def __exit__(self, exc_type, exc, tb) -> bool:
+        def __exit__(
+            self,
+            exc_type: type[BaseException] | None,
+            exc: BaseException | None,
+            tb: Any,
+        ) -> bool:
             return exc_type is not None and issubclass(exc_type, self.exceptions)
@@ -595,3 +630,174 @@ def write_files_to_temp(files: list[str]) -> TempFileList:
     content = "\n".join(files)
     Path(temp_path).write_text(content, encoding="utf-8")
     return TempFileList(path=temp_path)
+async def run_parallel_rg_searches(
+    commands: list[list[str]],
+    timeout_ms: int | None = None,
+    max_concurrent: int = 4,
+) -> list[tuple[int, bytes, bytes]]:
+    """
+    Run multiple ripgrep commands in parallel with concurrency control.
+    Args:
+        commands: List of ripgrep command lists to execute
+        timeout_ms: Timeout in milliseconds for each command
+        max_concurrent: Maximum number of concurrent processes (default: 4)
+    Returns:
+        List of (returncode, stdout, stderr) tuples in the same order as commands
+    """
+    if not commands:
+        return []
+    # Create semaphore to limit concurrent processes
+    semaphore = asyncio.Semaphore(max_concurrent)
+    async def run_single_command(cmd: list[str]) -> tuple[int, bytes, bytes]:
+        async with semaphore:
+            return await run_command_capture(cmd, timeout_ms=timeout_ms)
+    # Execute all commands concurrently
+    tasks = [run_single_command(cmd) for cmd in commands]
+    results = await asyncio.gather(*tasks, return_exceptions=True)
+    # Handle exceptions and convert to proper format
+    processed_results: list[tuple[int, bytes, bytes]] = []
+    for _i, result in enumerate(results):
+        if isinstance(result, Exception):
+            # Convert exception to error result
+            error_msg = f"Command failed: {str(result)}"
+            processed_results.append((1, b"", error_msg.encode()))
+        elif isinstance(result, tuple) and len(result) == 3:
+            processed_results.append(result)
+        else:
+            # Fallback for unexpected result types
+            processed_results.append((1, b"", b"Unexpected result type"))
+    return processed_results
+def merge_rg_results(
+    results: list[tuple[int, bytes, bytes]],
+    count_only_mode: bool = False,
+) -> tuple[int, bytes, bytes]:
+    """
+    Merge results from multiple ripgrep executions.
+    Args:
+        results: List of (returncode, stdout, stderr) tuples
+        count_only_mode: Whether the results are from count-only mode
+    Returns:
+        Merged (returncode, stdout, stderr) tuple
+    """
+    if not results:
+        return (1, b"", b"No results to merge")
+    # Check if any command failed critically (not just "no matches found")
+    critical_failures = []
+    successful_results = []
+    for rc, stdout, stderr in results:
+        if rc not in (0, 1):  # 0=matches found, 1=no matches, others=errors
+            critical_failures.append((rc, stdout, stderr))
+        else:
+            successful_results.append((rc, stdout, stderr))
+    # If all commands failed critically, return the first failure
+    if not successful_results:
+        return critical_failures[0] if critical_failures else (1, b"", b"")
+    # Merge successful results
+    if count_only_mode:
+        return _merge_count_results(successful_results)
+    else:
+        return _merge_json_results(successful_results)
+def _merge_count_results(
+    results: list[tuple[int, bytes, bytes]],
+) -> tuple[int, bytes, bytes]:
+    """Merge count-only results from multiple ripgrep executions."""
+    merged_counts: dict[str, int] = {}
+    total_matches = 0
+    for rc, stdout, _stderr in results:
+        if rc in (0, 1):  # Success or no matches
+            file_counts = parse_rg_count_output(stdout)
+            # Remove the __total__ key and merge file counts
+            for file_path, count in file_counts.items():
+                if file_path != "__total__":
+                    merged_counts[file_path] = merged_counts.get(file_path, 0) + count
+                    total_matches += count
+    # Format as ripgrep count output
+    output_lines = []
+    for file_path, count in merged_counts.items():
+        output_lines.append(f"{file_path}:{count}")
+    merged_stdout = "\n".join(output_lines).encode("utf-8")
+    # Return code 0 if we have matches, 1 if no matches
+    return_code = 0 if total_matches > 0 else 1
+    return (return_code, merged_stdout, b"")
+def _merge_json_results(
+    results: list[tuple[int, bytes, bytes]],
+) -> tuple[int, bytes, bytes]:
+    """Merge JSON results from multiple ripgrep executions."""
+    merged_lines = []
+    has_matches = False
+    for rc, stdout, _stderr in results:
+        if rc in (0, 1):  # Success or no matches
+            if stdout.strip():
+                merged_lines.extend(stdout.splitlines())
+                if rc == 0:  # Has matches
+                    has_matches = True
+    merged_stdout = b"\n".join(merged_lines)
+    return_code = 0 if has_matches else 1
+    return (return_code, merged_stdout, b"")
+def split_roots_for_parallel_processing(
+    roots: list[str], max_chunks: int = 4
+) -> list[list[str]]:
+    """
+    Split roots into chunks for parallel processing.
+    Args:
+        roots: List of root directories
+        max_chunks: Maximum number of chunks to create
+    Returns:
+        List of root chunks for parallel processing
+    """
+    if not roots:
+        return []
+    if len(roots) <= max_chunks:
+        # Each root gets its own chunk
+        return [[root] for root in roots]
+    # Distribute roots across chunks
+    chunk_size = len(roots) // max_chunks
+    remainder = len(roots) % max_chunks
+    chunks = []
+    start = 0
+    for i in range(max_chunks):
+        # Add one extra item to first 'remainder' chunks
+        current_chunk_size = chunk_size + (1 if i < remainder else 0)
+        end = start + current_chunk_size
+        if start < len(roots):
+            chunks.append(roots[start:end])
+        start = end
+    return [chunk for chunk in chunks if chunk]  # Remove empty chunks

tree-sitter-analyzer 1.8.4__py3-none-any.whl → 1.9.0__py3-none-any.whl

Potentially problematic release.

tree-sitter-analyzer 1.8.4py3-none-any.whl → 1.9.0py3-none-any.whl