PyPI - tree-sitter-analyzer - Versions diffs - 1.8.3__py3-none-any.whl → 1.9.0__py3-none-any.whl - Mend

tree-sitter-analyzer 1.8.3py3-none-any.whl → 1.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (64) hide show

tree_sitter_analyzer/__init__.py +1 -1
tree_sitter_analyzer/api.py +4 -4
tree_sitter_analyzer/cli/argument_validator.py +29 -17
tree_sitter_analyzer/cli/commands/advanced_command.py +7 -5
tree_sitter_analyzer/cli/commands/structure_command.py +7 -5
tree_sitter_analyzer/cli/commands/summary_command.py +10 -6
tree_sitter_analyzer/cli/commands/table_command.py +8 -7
tree_sitter_analyzer/cli/info_commands.py +1 -1
tree_sitter_analyzer/cli_main.py +3 -2
tree_sitter_analyzer/core/analysis_engine.py +5 -5
tree_sitter_analyzer/core/cache_service.py +3 -1
tree_sitter_analyzer/core/query.py +17 -5
tree_sitter_analyzer/core/query_service.py +1 -1
tree_sitter_analyzer/encoding_utils.py +3 -3
tree_sitter_analyzer/exceptions.py +61 -50
tree_sitter_analyzer/file_handler.py +3 -0
tree_sitter_analyzer/formatters/base_formatter.py +10 -5
tree_sitter_analyzer/formatters/formatter_registry.py +83 -68
tree_sitter_analyzer/formatters/html_formatter.py +90 -54
tree_sitter_analyzer/formatters/javascript_formatter.py +21 -16
tree_sitter_analyzer/formatters/language_formatter_factory.py +7 -6
tree_sitter_analyzer/formatters/markdown_formatter.py +247 -124
tree_sitter_analyzer/formatters/python_formatter.py +61 -38
tree_sitter_analyzer/formatters/typescript_formatter.py +113 -45
tree_sitter_analyzer/interfaces/mcp_server.py +2 -2
tree_sitter_analyzer/language_detector.py +6 -6
tree_sitter_analyzer/language_loader.py +3 -1
tree_sitter_analyzer/languages/css_plugin.py +120 -61
tree_sitter_analyzer/languages/html_plugin.py +159 -62
tree_sitter_analyzer/languages/java_plugin.py +42 -34
tree_sitter_analyzer/languages/javascript_plugin.py +59 -30
tree_sitter_analyzer/languages/markdown_plugin.py +402 -368
tree_sitter_analyzer/languages/python_plugin.py +111 -64
tree_sitter_analyzer/languages/typescript_plugin.py +241 -132
tree_sitter_analyzer/mcp/server.py +22 -18
tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +13 -8
tree_sitter_analyzer/mcp/tools/base_tool.py +2 -2
tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +232 -26
tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +31 -23
tree_sitter_analyzer/mcp/tools/list_files_tool.py +21 -19
tree_sitter_analyzer/mcp/tools/query_tool.py +17 -18
tree_sitter_analyzer/mcp/tools/read_partial_tool.py +30 -31
tree_sitter_analyzer/mcp/tools/search_content_tool.py +131 -77
tree_sitter_analyzer/mcp/tools/table_format_tool.py +29 -16
tree_sitter_analyzer/mcp/utils/file_output_factory.py +64 -51
tree_sitter_analyzer/mcp/utils/file_output_manager.py +34 -24
tree_sitter_analyzer/mcp/utils/gitignore_detector.py +8 -4
tree_sitter_analyzer/models.py +7 -5
tree_sitter_analyzer/plugins/base.py +9 -7
tree_sitter_analyzer/plugins/manager.py +1 -0
tree_sitter_analyzer/queries/css.py +2 -21
tree_sitter_analyzer/queries/html.py +2 -15
tree_sitter_analyzer/queries/markdown.py +30 -41
tree_sitter_analyzer/queries/python.py +20 -5
tree_sitter_analyzer/query_loader.py +5 -5
tree_sitter_analyzer/security/validator.py +114 -86
tree_sitter_analyzer/utils/__init__.py +58 -28
tree_sitter_analyzer/utils/tree_sitter_compat.py +72 -65
tree_sitter_analyzer/utils.py +83 -25
{tree_sitter_analyzer-1.8.3.dist-info → tree_sitter_analyzer-1.9.0.dist-info}/METADATA +19 -5
tree_sitter_analyzer-1.9.0.dist-info/RECORD +109 -0
tree_sitter_analyzer-1.8.3.dist-info/RECORD +0 -109
{tree_sitter_analyzer-1.8.3.dist-info → tree_sitter_analyzer-1.9.0.dist-info}/WHEEL +0 -0
{tree_sitter_analyzer-1.8.3.dist-info → tree_sitter_analyzer-1.9.0.dist-info}/entry_points.txt +0 -0

tree_sitter_analyzer/mcp/tools/search_content_tool.py CHANGED Viewed

@@ -175,6 +175,11 @@ class SearchContentTool(BaseMCPTool):
                         "description": "When true and output_file is specified, suppress detailed output in response to save tokens",
                         "default": False,
                     },
+                    "enable_parallel": {
+                        "type": "boolean",
+                        "description": "Enable parallel processing for multiple root directories to improve performance. Default: True",
+                        "default": True,
+                    },
                 },
                 "required": ["query"],
                 "anyOf": [
@@ -236,6 +241,7 @@ class SearchContentTool(BaseMCPTool):
             "no_ignore",
             "count_only_matches",
             "summary_only",
+            "enable_parallel",
         ]:
             if key in arguments and not isinstance(arguments[key], bool):
                 raise ValueError(f"{key} must be a boolean")
@@ -306,9 +312,9 @@ class SearchContentTool(BaseMCPTool):
                 "success": False,
                 "error": "rg (ripgrep) command not found. Please install ripgrep (https://github.com/BurntSushi/ripgrep) to use this tool.",
                 "count": 0,
-                "results": []
+                "results": [],
             }
         self.validate_arguments(arguments)
         roots = arguments.get("roots")
@@ -318,30 +324,38 @@ class SearchContentTool(BaseMCPTool):
         if files:
             files = self._validate_files(files)
-        # Check cache if enabled
+        # Check cache if enabled (simplified for performance)
         cache_key = None
         if self.cache:
-            # Create cache key with relevant parameters (excluding 'query' and 'roots' from kwargs)
+            # Create simplified cache key for better performance
             cache_params = {
                 k: v
                 for k, v in arguments.items()
-                if k not in ["query", "roots", "files"]
+                if k
+                not in ["query", "roots", "files", "output_file", "suppress_output"]
             }
             cache_key = self.cache.create_cache_key(
                 query=arguments["query"], roots=roots or [], **cache_params
             )
-            # Try smart cross-format caching first
-            requested_format = self._determine_requested_format(arguments)
-            cached_result = self.cache.get_compatible_result(
-                cache_key, requested_format
-            )
+            # Simple cache lookup without complex cross-format logic for performance
+            cached_result = self.cache.get(cache_key)
             if cached_result is not None:
                 # Add cache hit indicator to result
                 if isinstance(cached_result, dict):
                     cached_result = cached_result.copy()
                     cached_result["cache_hit"] = True
-                return cached_result
+                    return cached_result
+                elif isinstance(cached_result, int):
+                    # For integer results (like total_only mode), return as-is
+                    return cached_result
+                else:
+                    # For other types, convert to dict format
+                    return {
+                        "success": True,
+                        "cached_result": cached_result,
+                        "cache_hit": True,
+                    }
         # Handle max_count parameter properly
         # If user specifies max_count, use it directly (with reasonable upper limit)
@@ -411,30 +425,82 @@ class SearchContentTool(BaseMCPTool):
                 )
         # Roots mode
-        cmd = fd_rg_utils.build_rg_command(
-            query=arguments["query"],
-            case=arguments.get("case", "smart"),
-            fixed_strings=bool(arguments.get("fixed_strings", False)),
-            word=bool(arguments.get("word", False)),
-            multiline=bool(arguments.get("multiline", False)),
-            include_globs=arguments.get("include_globs"),
-            exclude_globs=arguments.get("exclude_globs"),
-            follow_symlinks=bool(arguments.get("follow_symlinks", False)),
-            hidden=bool(arguments.get("hidden", False)),
-            no_ignore=no_ignore,  # Use the potentially auto-detected value
-            max_filesize=arguments.get("max_filesize"),
-            context_before=arguments.get("context_before"),
-            context_after=arguments.get("context_after"),
-            encoding=arguments.get("encoding"),
-            max_count=max_count,
-            timeout_ms=timeout_ms,
-            roots=roots,
-            files_from=None,
-            count_only_matches=count_only_matches,
+        # Determine if we should use parallel processing
+        use_parallel = (
+            roots is not None
+            and len(roots) > 1
+            and arguments.get("enable_parallel", True)
         )
         started = time.time()
-        rc, out, err = await fd_rg_utils.run_command_capture(cmd, timeout_ms=timeout_ms)
+        if use_parallel and roots is not None:
+            # Split roots for parallel processing
+            root_chunks = fd_rg_utils.split_roots_for_parallel_processing(
+                roots, max_chunks=4
+            )
+            # Build commands for each chunk
+            commands = []
+            for chunk in root_chunks:
+                cmd = fd_rg_utils.build_rg_command(
+                    query=arguments["query"],
+                    case=arguments.get("case", "smart"),
+                    fixed_strings=bool(arguments.get("fixed_strings", False)),
+                    word=bool(arguments.get("word", False)),
+                    multiline=bool(arguments.get("multiline", False)),
+                    include_globs=arguments.get("include_globs"),
+                    exclude_globs=arguments.get("exclude_globs"),
+                    follow_symlinks=bool(arguments.get("follow_symlinks", False)),
+                    hidden=bool(arguments.get("hidden", False)),
+                    no_ignore=no_ignore,
+                    max_filesize=arguments.get("max_filesize"),
+                    context_before=arguments.get("context_before"),
+                    context_after=arguments.get("context_after"),
+                    encoding=arguments.get("encoding"),
+                    max_count=max_count,
+                    timeout_ms=timeout_ms,
+                    roots=chunk,
+                    files_from=None,
+                    count_only_matches=count_only_matches,
+                )
+                commands.append(cmd)
+            # Execute commands in parallel
+            results = await fd_rg_utils.run_parallel_rg_searches(
+                commands, timeout_ms=timeout_ms, max_concurrent=4
+            )
+            # Merge results
+            rc, out, err = fd_rg_utils.merge_rg_results(results, count_only_matches)
+        else:
+            # Single command execution (original behavior)
+            cmd = fd_rg_utils.build_rg_command(
+                query=arguments["query"],
+                case=arguments.get("case", "smart"),
+                fixed_strings=bool(arguments.get("fixed_strings", False)),
+                word=bool(arguments.get("word", False)),
+                multiline=bool(arguments.get("multiline", False)),
+                include_globs=arguments.get("include_globs"),
+                exclude_globs=arguments.get("exclude_globs"),
+                follow_symlinks=bool(arguments.get("follow_symlinks", False)),
+                hidden=bool(arguments.get("hidden", False)),
+                no_ignore=no_ignore,
+                max_filesize=arguments.get("max_filesize"),
+                context_before=arguments.get("context_before"),
+                context_after=arguments.get("context_after"),
+                encoding=arguments.get("encoding"),
+                max_count=max_count,
+                timeout_ms=timeout_ms,
+                roots=roots,
+                files_from=None,
+                count_only_matches=count_only_matches,
+            )
+            rc, out, err = await fd_rg_utils.run_command_capture(
+                cmd, timeout_ms=timeout_ms
+            )
         elapsed_ms = int((time.time() - started) * 1000)
         if rc not in (0, 1):
@@ -498,7 +564,7 @@ class SearchContentTool(BaseMCPTool):
         # Handle normal mode
         matches = fd_rg_utils.parse_rg_json_lines_to_matches(out)
         # Apply user-specified max_count limit if provided
         # Note: ripgrep's -m option limits matches per file, not total matches
         # So we need to apply the total limit here in post-processing
@@ -530,19 +596,19 @@ class SearchContentTool(BaseMCPTool):
                 try:
                     # Save full result to file
                     import json
                     json_content = json.dumps(result, indent=2, ensure_ascii=False)
                     file_path = self.file_output_manager.save_to_file(
-                        content=json_content,
-                        base_name=output_file
+                        content=json_content, base_name=output_file
                     )
                     # If suppress_output is True, return minimal response
                     if suppress_output:
                         minimal_result = {
                             "success": result.get("success", True),
                             "count": result.get("count", 0),
                             "output_file": output_file,
-                            "file_saved": f"Results saved to {file_path}"
+                            "file_saved": f"Results saved to {file_path}",
                         }
                         # Cache the full result, not the minimal one
                         if self.cache and cache_key:
@@ -562,7 +628,7 @@ class SearchContentTool(BaseMCPTool):
                     "success": result.get("success", True),
                     "count": result.get("count", 0),
                     "summary": result.get("summary", {}),
-                    "meta": result.get("meta", {})
+                    "meta": result.get("meta", {}),
                 }
                 # Cache the full result, not the minimal one
                 if self.cache and cache_key:
@@ -595,19 +661,19 @@ class SearchContentTool(BaseMCPTool):
                 try:
                     # Save full result to file
                     import json
                     json_content = json.dumps(result, indent=2, ensure_ascii=False)
                     file_path = self.file_output_manager.save_to_file(
-                        content=json_content,
-                        base_name=output_file
+                        content=json_content, base_name=output_file
                     )
                     # If suppress_output is True, return minimal response
                     if suppress_output:
                         minimal_result = {
                             "success": result.get("success", True),
                             "count": result.get("count", 0),
                             "output_file": output_file,
-                            "file_saved": f"Results saved to {file_path}"
+                            "file_saved": f"Results saved to {file_path}",
                         }
                         # Cache the full result, not the minimal one
                         if self.cache and cache_key:
@@ -627,7 +693,7 @@ class SearchContentTool(BaseMCPTool):
                     "success": result.get("success", True),
                     "count": result.get("count", 0),
                     "summary": result.get("summary", {}),
-                    "elapsed_ms": result.get("elapsed_ms", 0)
+                    "elapsed_ms": result.get("elapsed_ms", 0),
                 }
                 # Cache the full result, not the minimal one
                 if self.cache and cache_key:
@@ -651,7 +717,7 @@ class SearchContentTool(BaseMCPTool):
         output_file = arguments.get("output_file")
         suppress_output = arguments.get("suppress_output", False)
-        # Always add results to the base result for file saving
+        # Always add results to the base result for caching
         result["results"] = matches
         # Handle file output if requested
@@ -665,58 +731,46 @@ class SearchContentTool(BaseMCPTool):
                     "elapsed_ms": elapsed_ms,
                     "results": matches,
                     "summary": fd_rg_utils.summarize_search_results(matches),
-                    "grouped_by_file": fd_rg_utils.group_matches_by_file(matches)["files"] if matches else []
+                    "grouped_by_file": fd_rg_utils.group_matches_by_file(matches)[
+                        "files"
+                    ]
+                    if matches
+                    else [],
                 }
                 # Convert to JSON for file output
                 import json
                 json_content = json.dumps(file_content, indent=2, ensure_ascii=False)
                 # Save to file
                 saved_file_path = self.file_output_manager.save_to_file(
-                    content=json_content,
-                    base_name=output_file
+                    content=json_content, base_name=output_file
                 )
+                result["output_file"] = output_file
                 result["output_file_path"] = saved_file_path
                 result["file_saved"] = True
                 logger.info(f"Search results saved to: {saved_file_path}")
+                # If suppress_output is True, return minimal response
+                if suppress_output:
+                    minimal_result = {
+                        "success": result.get("success", True),
+                        "count": result.get("count", 0),
+                        "output_file": output_file,
+                        "file_saved": f"Results saved to {saved_file_path}",
+                    }
+                    # Cache the full result, not the minimal one
+                    if self.cache and cache_key:
+                        self.cache.set(cache_key, result)
+                    return minimal_result
             except Exception as e:
                 logger.error(f"Failed to save output to file: {e}")
                 result["file_save_error"] = str(e)
                 result["file_saved"] = False
-        # Handle file output and suppression
-        output_file = arguments.get("output_file")
-        suppress_output = arguments.get("suppress_output", False)
-        if output_file:
-            # Save full result to file
-            import json
-            json_content = json.dumps(result, indent=2, ensure_ascii=False)
-            file_path = self.file_output_manager.save_to_file(
-                content=json_content,
-                base_name=output_file
-            )
-            # If suppress_output is True, return minimal response
-            if suppress_output:
-                minimal_result = {
-                    "success": result.get("success", True),
-                    "count": result.get("count", 0),
-                    "output_file": output_file,
-                    "file_saved": f"Results saved to {file_path}"
-                }
-                # Cache the full result, not the minimal one
-                if self.cache and cache_key:
-                    self.cache.set(cache_key, result)
-                return minimal_result
-            else:
-                # Include file info in full response
-                result["output_file"] = output_file
-                result["file_saved"] = f"Results saved to {file_path}"
         elif suppress_output:
             # If suppress_output is True but no output_file, remove results from response
             result_copy = result.copy()

tree_sitter_analyzer/mcp/tools/table_format_tool.py CHANGED Viewed

@@ -38,7 +38,7 @@ class TableFormatTool(BaseMCPTool):
     structured table output through the MCP protocol.
     """
-    def __init__(self, project_root: str = None) -> None:
+    def __init__(self, project_root: str | None = None) -> None:
         """Initialize the table format tool."""
         super().__init__(project_root)
         self.analysis_engine = get_analysis_engine(project_root)
@@ -74,7 +74,12 @@ class TableFormatTool(BaseMCPTool):
                 "format_type": {
                     "type": "string",
                     "description": "Table format type",
-                    "enum": list(set(FormatterRegistry.get_available_formats() + ["full", "compact", "csv", "json"])),
+                    "enum": list(
+                        set(
+                            FormatterRegistry.get_available_formats()
+                            + ["full", "compact", "csv", "json"]
+                        )
+                    ),
                     "default": "full",
                 },
                 "language": {
@@ -124,11 +129,18 @@ class TableFormatTool(BaseMCPTool):
             format_type = arguments["format_type"]
             if not isinstance(format_type, str):
                 raise ValueError("format_type must be a string")
             # Check both new FormatterRegistry formats and legacy formats
-            available_formats = list(set(FormatterRegistry.get_available_formats() + ["full", "compact", "csv", "json"]))
+            available_formats = list(
+                set(
+                    FormatterRegistry.get_available_formats()
+                    + ["full", "compact", "csv", "json"]
+                )
+            )
             if format_type not in available_formats:
-                raise ValueError(f"format_type must be one of: {', '.join(sorted(available_formats))}")
+                raise ValueError(
+                    f"format_type must be one of: {', '.join(sorted(available_formats))}"
+                )
         # Validate language if provided
         if "language" in arguments:
@@ -454,7 +466,7 @@ class TableFormatTool(BaseMCPTool):
                 # Always convert analysis result to dict for metadata extraction
                 structure_dict = self._convert_analysis_result_to_dict(structure_result)
                 # Try to use new FormatterRegistry first, fallback to legacy TableFormatter
                 try:
                     if FormatterRegistry.is_format_supported(format_type):
@@ -463,13 +475,15 @@ class TableFormatTool(BaseMCPTool):
                         table_output = formatter.format(structure_result.elements)
                     else:
                         # Fallback to legacy TableFormatter for backward compatibility
-                        formatter = TableFormatter(format_type)
-                        table_output = formatter.format_structure(structure_dict)
+                        formatter: Any = TableFormatter(format_type)
+                        table_output = formatter.format_structure(structure_dict)  # type: ignore[attr-defined]
                 except Exception as e:
                     # If FormatterRegistry fails, fallback to legacy TableFormatter
-                    logger.warning(f"FormatterRegistry failed, using legacy formatter: {e}")
-                    formatter = TableFormatter(format_type)
-                    table_output = formatter.format_structure(structure_dict)
+                    logger.warning(
+                        f"FormatterRegistry failed, using legacy formatter: {e}"
+                    )
+                    formatter: Any = TableFormatter(format_type)
+                    table_output = formatter.format_structure(structure_dict)  # type: ignore[attr-defined]
                 # Ensure output format matches CLI exactly
                 # Fix line ending differences: normalize to Unix-style LF (\n)
@@ -515,15 +529,14 @@ class TableFormatTool(BaseMCPTool):
                         # Save to file with automatic extension detection
                         saved_file_path = self.file_output_manager.save_to_file(
-                            content=table_output,
-                            base_name=base_name
+                            content=table_output, base_name=base_name
                         )
                         result["output_file_path"] = saved_file_path
                         result["file_saved"] = True
                         self.logger.info(f"Analysis output saved to: {saved_file_path}")
                     except Exception as e:
                         self.logger.error(f"Failed to save output to file: {e}")
                         result["file_save_error"] = str(e)

tree-sitter-analyzer 1.8.3__py3-none-any.whl → 1.9.0__py3-none-any.whl

Potentially problematic release.

tree-sitter-analyzer 1.8.3py3-none-any.whl → 1.9.0py3-none-any.whl