PyPI - tree-sitter-analyzer - Versions diffs - 1.7.7__py3-none-any.whl → 1.8.3__py3-none-any.whl - Mend

tree-sitter-analyzer 1.7.7py3-none-any.whl → 1.8.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (42) hide show

tree_sitter_analyzer/__init__.py +1 -1
tree_sitter_analyzer/api.py +23 -30
tree_sitter_analyzer/cli/argument_validator.py +77 -0
tree_sitter_analyzer/cli/commands/table_command.py +7 -2
tree_sitter_analyzer/cli_main.py +17 -3
tree_sitter_analyzer/core/cache_service.py +15 -5
tree_sitter_analyzer/core/query.py +33 -22
tree_sitter_analyzer/core/query_service.py +179 -154
tree_sitter_analyzer/formatters/formatter_registry.py +355 -0
tree_sitter_analyzer/formatters/html_formatter.py +462 -0
tree_sitter_analyzer/formatters/language_formatter_factory.py +3 -0
tree_sitter_analyzer/formatters/markdown_formatter.py +1 -1
tree_sitter_analyzer/language_detector.py +80 -7
tree_sitter_analyzer/languages/css_plugin.py +390 -0
tree_sitter_analyzer/languages/html_plugin.py +395 -0
tree_sitter_analyzer/languages/java_plugin.py +116 -0
tree_sitter_analyzer/languages/javascript_plugin.py +113 -0
tree_sitter_analyzer/languages/markdown_plugin.py +266 -46
tree_sitter_analyzer/languages/python_plugin.py +176 -33
tree_sitter_analyzer/languages/typescript_plugin.py +130 -1
tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +12 -1
tree_sitter_analyzer/mcp/tools/query_tool.py +101 -60
tree_sitter_analyzer/mcp/tools/search_content_tool.py +12 -1
tree_sitter_analyzer/mcp/tools/table_format_tool.py +26 -12
tree_sitter_analyzer/mcp/utils/file_output_factory.py +204 -0
tree_sitter_analyzer/mcp/utils/file_output_manager.py +52 -2
tree_sitter_analyzer/models.py +53 -0
tree_sitter_analyzer/output_manager.py +1 -1
tree_sitter_analyzer/plugins/base.py +50 -0
tree_sitter_analyzer/plugins/manager.py +5 -1
tree_sitter_analyzer/queries/css.py +634 -0
tree_sitter_analyzer/queries/html.py +556 -0
tree_sitter_analyzer/queries/markdown.py +54 -164
tree_sitter_analyzer/query_loader.py +16 -3
tree_sitter_analyzer/security/validator.py +182 -44
tree_sitter_analyzer/utils/__init__.py +113 -0
tree_sitter_analyzer/utils/tree_sitter_compat.py +282 -0
tree_sitter_analyzer/utils.py +62 -24
{tree_sitter_analyzer-1.7.7.dist-info → tree_sitter_analyzer-1.8.3.dist-info}/METADATA +135 -31
{tree_sitter_analyzer-1.7.7.dist-info → tree_sitter_analyzer-1.8.3.dist-info}/RECORD +42 -32
{tree_sitter_analyzer-1.7.7.dist-info → tree_sitter_analyzer-1.8.3.dist-info}/entry_points.txt +2 -0
{tree_sitter_analyzer-1.7.7.dist-info → tree_sitter_analyzer-1.8.3.dist-info}/WHEEL +0 -0

tree_sitter_analyzer/core/query_service.py CHANGED Viewed

@@ -6,11 +6,14 @@ Unified query service for both CLI and MCP interfaces to avoid code duplication.
 Provides core tree-sitter query functionality including predefined and custom queries.
 """
+import asyncio
 import logging
 from typing import Any
 from ..encoding_utils import read_file_safe
+from ..plugins.manager import PluginManager
 from ..query_loader import query_loader
+from ..utils.tree_sitter_compat import TreeSitterQueryCompat, get_node_text_safe
 from .parser import Parser
 from .query_filter import QueryFilter
@@ -25,6 +28,8 @@ class QueryService:
         self.project_root = project_root
         self.parser = Parser()
         self.filter = QueryFilter()
+        self.plugin_manager = PluginManager()
+        self.plugin_manager.load_plugins()
     async def execute_query(
         self,
@@ -60,7 +65,7 @@ class QueryService:
         try:
             # Read file content
-            content, encoding = read_file_safe(file_path)
+            content, encoding = await self._read_file_async(file_path)
             # Parse file
             parse_result = self.parser.parse_code(content, language, file_path)
@@ -80,45 +85,43 @@ class QueryService:
                         f"Query '{query_key}' not found for language '{language}'"
                     )
-            # Execute tree-sitter query using new API with fallback
-            import tree_sitter
-            captures = []
-            # Try to create and execute the query
+            # Execute tree-sitter query using modern API
             try:
-                ts_query = tree_sitter.Query(language_obj, query_string)
-                # Try to execute the query
-                captures = ts_query.captures(tree.root_node)
-                # If captures is empty or not in expected format, try manual fallback
-                if not captures or (isinstance(captures, list) and len(captures) == 0):
-                    captures = self._manual_query_execution(tree.root_node, query_key, language)
-            except (AttributeError, Exception) as e:
-                # If query creation or execution fails, use manual fallback
-                captures = self._manual_query_execution(tree.root_node, query_key, language)
+                captures = TreeSitterQueryCompat.safe_execute_query(
+                    language_obj, query_string or "", tree.root_node, fallback_result=[]
+                )
+                # If captures is empty, use plugin fallback
+                if not captures:
+                    captures = self._execute_plugin_query(
+                        tree.root_node, query_key, language, content
+                    )
+            except Exception as e:
+                logger.debug(
+                    f"Tree-sitter query execution failed, using plugin fallback: {e}"
+                )
+                # If query creation or execution fails, use plugin fallback
+                captures = self._execute_plugin_query(
+                    tree.root_node, query_key, language, content
+                )
             # Process capture results
             results = []
-            if isinstance(captures, dict):
-                # New tree-sitter API returns dictionary
-                for capture_name, nodes in captures.items():
-                    for node in nodes:
-                        results.append(self._create_result_dict(node, capture_name))
-            elif isinstance(captures, list):
-                # Handle both old API (list of tuples) and manual execution (list of tuples)
+            if isinstance(captures, list):
+                # Handle list of tuples from modern API and plugin execution
                 for capture in captures:
                     if isinstance(capture, tuple) and len(capture) == 2:
                         node, name = capture
-                        results.append(self._create_result_dict(node, name))
-            else:
-                # If captures is not in expected format, try manual fallback
-                manual_captures = self._manual_query_execution(tree.root_node, query_key, language)
-                for capture in manual_captures:
-                    if isinstance(capture, tuple) and len(capture) == 2:
-                        node, name = capture
-                        results.append(self._create_result_dict(node, name))
+                        results.append(self._create_result_dict(node, name, content))
+            # Note: This else block is unreachable due to the logic above, but kept for safety
+            # else:
+            #     # If captures is not in expected format, use plugin fallback
+            #     plugin_captures = self._execute_plugin_query(tree.root_node, query_key, language, content)
+            #     for capture in plugin_captures:
+            #         if isinstance(capture, tuple) and len(capture) == 2:
+            #             node, name = capture
+            #             results.append(self._create_result_dict(node, name, content))
             # Apply filters
             if filter_expression and results:
@@ -130,17 +133,23 @@ class QueryService:
             logger.error(f"Query execution failed: {e}")
             raise
-    def _create_result_dict(self, node: Any, capture_name: str) -> dict[str, Any]:
+    def _create_result_dict(
+        self, node: Any, capture_name: str, source_code: str = ""
+    ) -> dict[str, Any]:
         """
         Create result dictionary from tree-sitter node
         Args:
             node: tree-sitter node
             capture_name: capture name
+            source_code: source code content for text extraction
         Returns:
             Result dictionary
         """
+        # Use safe text extraction with source code
+        content = get_node_text_safe(node, source_code)
         return {
             "capture_name": capture_name,
             "node_type": node.type if hasattr(node, "type") else "unknown",
@@ -148,11 +157,7 @@ class QueryService:
                 node.start_point[0] + 1 if hasattr(node, "start_point") else 0
             ),
             "end_line": node.end_point[0] + 1 if hasattr(node, "end_point") else 0,
-            "content": (
-                node.text.decode("utf-8", errors="replace")
-                if hasattr(node, "text") and node.text
-                else ""
-            ),
+            "content": content,
         }
     def get_available_queries(self, language: str) -> list[str]:
@@ -183,130 +188,150 @@ class QueryService:
         except Exception:
             return None
-    def _manual_query_execution(self, root_node: Any, query_key: str | None, language: str) -> list[tuple[Any, str]]:
+    def _execute_plugin_query(
+        self, root_node: Any, query_key: str | None, language: str, source_code: str
+    ) -> list[tuple[Any, str]]:
         """
-        Manual query execution fallback for tree-sitter 0.25.x compatibility
+        Execute query using plugin-based dynamic dispatch
         Args:
             root_node: Root node of the parsed tree
             query_key: Query key to execute (can be None for custom queries)
             language: Programming language
+            source_code: Source code content
         Returns:
             List of (node, capture_name) tuples
         """
         captures = []
-        def walk_tree(node):
-            """Walk the tree and find matching nodes"""
-            # If query_key is None, this is a custom query - try to match common patterns
-            if query_key is None:
-                # For custom queries, try to match common node types
-                if language == "java":
-                    if node.type == "method_declaration":
-                        captures.append((node, "method"))
-                    elif node.type == "class_declaration":
-                        captures.append((node, "class"))
-                    elif node.type == "field_declaration":
-                        captures.append((node, "field"))
-                elif language == "python":
-                    if node.type == "function_definition":
-                        captures.append((node, "function"))
-                    elif node.type == "class_definition":
-                        captures.append((node, "class"))
-                    elif node.type in ["import_statement", "import_from_statement"]:
-                        captures.append((node, "import"))
-                elif language in ["javascript", "typescript"]:
-                    if node.type in ["function_declaration", "method_definition"]:
-                        captures.append((node, "function"))
-                    elif node.type == "class_declaration":
-                        captures.append((node, "class"))
-            # Markdown-specific queries
-            elif language == "markdown":
-                if query_key == "headers" and node.type in ["atx_heading", "setext_heading"]:
-                    captures.append((node, "headers"))
-                elif query_key == "code_blocks" and node.type in ["fenced_code_block", "indented_code_block"]:
-                    captures.append((node, "code_blocks"))
-                elif query_key == "links" and node.type == "inline":
-                    # リンクは inline ノード内のパターンとして検出
-                    node_text = node.text.decode('utf-8', errors='replace') if hasattr(node, 'text') and node.text else ""
-                    if '[' in node_text and '](' in node_text:
-                        captures.append((node, "links"))
-                elif query_key == "images" and node.type == "inline":
-                    # 画像は inline ノード内のパターンとして検出
-                    node_text = node.text.decode('utf-8', errors='replace') if hasattr(node, 'text') and node.text else ""
-                    if '![' in node_text and '](' in node_text:
-                        captures.append((node, "images"))
-                elif query_key == "lists" and node.type in ["list", "list_item"]:
-                    captures.append((node, "lists"))
-                elif query_key == "emphasis" and node.type == "inline":
-                    # 強調は inline ノード内の * や ** パターンとして検出
-                    node_text = node.text.decode('utf-8', errors='replace') if hasattr(node, 'text') and node.text else ""
-                    if '*' in node_text or '_' in node_text:
-                        captures.append((node, "emphasis"))
-                elif query_key == "blockquotes" and node.type == "block_quote":
-                    captures.append((node, "blockquotes"))
-                elif query_key == "tables" and node.type == "pipe_table":
-                    captures.append((node, "tables"))
-                elif query_key == "horizontal_rules" and node.type == "thematic_break":
-                    captures.append((node, "horizontal_rules"))
-                elif query_key == "html_blocks" and node.type == "html_block":
-                    captures.append((node, "html_blocks"))
-                elif query_key == "inline_html" and node.type == "html_tag":
-                    captures.append((node, "inline_html"))
-                elif query_key == "inline_code" and node.type == "code_span":
-                    captures.append((node, "inline_code"))
-                elif query_key == "text_content" and node.type in ["paragraph", "inline"]:
-                    captures.append((node, "text_content"))
-                elif query_key == "all_elements" and node.type in [
-                    "atx_heading", "setext_heading", "fenced_code_block", "indented_code_block",
-                    "inline", "list", "list_item", "block_quote", "pipe_table",
-                    "paragraph", "section"
-                ]:
-                    captures.append((node, "all_elements"))
-            # Python-specific queries
-            elif language == "python":
-                if query_key in ["function", "functions"] and node.type == "function_definition":
-                    captures.append((node, "function"))
-                elif query_key in ["class", "classes"] and node.type == "class_definition":
-                    captures.append((node, "class"))
-                elif query_key in ["import", "imports"] and node.type in ["import_statement", "import_from_statement"]:
-                    captures.append((node, "import"))
-            # JavaScript/TypeScript-specific queries
-            elif language in ["javascript", "typescript"]:
-                if query_key in ["function", "functions"] and node.type in ["function_declaration", "function_expression", "arrow_function", "method_definition"]:
-                    captures.append((node, "function"))
-                elif query_key in ["class", "classes"] and node.type in ["class_declaration", "class_expression"]:
-                    captures.append((node, "class"))
-                elif query_key in ["method", "methods"] and node.type == "method_definition":
-                    captures.append((node, "method"))
-                elif query_key in ["interface", "interfaces"] and node.type == "interface_declaration" and language == "typescript":
-                    captures.append((node, "interface"))
-                elif query_key in ["type", "types"] and node.type == "type_alias_declaration" and language == "typescript":
-                    captures.append((node, "type"))
-                elif query_key in ["variable", "variables"] and node.type in ["variable_declaration", "lexical_declaration"]:
-                    captures.append((node, "variable"))
-                elif query_key in ["import", "imports"] and node.type == "import_statement":
-                    captures.append((node, "import"))
-                elif query_key in ["export", "exports"] and node.type == "export_statement":
-                    captures.append((node, "export"))
-            # Java-specific queries
-            elif language == "java":
-                if query_key in ["method", "methods"] and node.type == "method_declaration":
-                    # Always use "method" as capture name for consistency
-                    captures.append((node, "method"))
-                elif query_key in ["class", "classes"] and node.type == "class_declaration":
-                    captures.append((node, "class"))
-                elif query_key == "field" and node.type == "field_declaration":
-                    captures.append((node, "field"))
+        # Try to get plugin for the language
+        plugin = self.plugin_manager.get_plugin(language)
+        if not plugin:
+            logger.warning(f"No plugin found for language: {language}")
+            return self._fallback_query_execution(root_node, query_key)
+        # Use plugin's execute_query_strategy method
+        try:
+            # Create a mock tree object for plugin compatibility
+            class MockTree:
+                def __init__(self, root_node: Any) -> None:
+                    self.root_node = root_node
+            # Execute plugin query strategy
+            elements = plugin.execute_query_strategy(
+                source_code, query_key or "function"
+            )
+            # Convert elements to captures format
+            if elements:
+                for element in elements:
+                    if hasattr(element, "start_line") and hasattr(element, "end_line"):
+                        # Create a mock node for compatibility
+                        class MockNode:
+                            def __init__(self, element: Any) -> None:
+                                self.type = getattr(
+                                    element, "element_type", query_key or "unknown"
+                                )
+                                self.start_point = (
+                                    getattr(element, "start_line", 1) - 1,
+                                    0,
+                                )
+                                self.end_point = (
+                                    getattr(element, "end_line", 1) - 1,
+                                    0,
+                                )
+                                self.text = getattr(element, "raw_text", "").encode(
+                                    "utf-8"
+                                )
+                        mock_node = MockNode(element)
+                        captures.append((mock_node, query_key or "element"))
+            return captures
+        except Exception as e:
+            logger.debug(f"Plugin query strategy failed: {e}")
+        # Fallback: Use plugin's element categories for tree traversal
+        try:
+            element_categories = plugin.get_element_categories()
+            if element_categories and query_key and query_key in element_categories:
+                node_types = element_categories[query_key]
+                def walk_tree(node: Any) -> None:
+                    """Walk the tree and find matching nodes using plugin categories"""
+                    if node.type in node_types:
+                        captures.append((node, query_key))
+                    # Recursively process children
+                    for child in node.children:
+                        walk_tree(child)
+                walk_tree(root_node)
+                return captures
+        except Exception as e:
+            logger.debug(f"Plugin element categories failed: {e}")
+        # Final fallback
+        return self._fallback_query_execution(root_node, query_key)
+    def _fallback_query_execution(
+        self, root_node: Any, query_key: str | None
+    ) -> list[tuple[Any, str]]:
+        """
+        Basic fallback query execution for unsupported languages
+        Args:
+            root_node: Root node of the parsed tree
+            query_key: Query key to execute
+        Returns:
+            List of (node, capture_name) tuples
+        """
+        captures = []
+        def walk_tree_basic(node: Any) -> None:
+            """Basic tree walking for unsupported languages"""
+            # Get node type safely
+            node_type = getattr(node, "type", "")
+            if not isinstance(node_type, str):
+                node_type = str(node_type)
+            # Generic node type matching (support both singular and plural forms)
+            if query_key in ("function", "functions") and "function" in node_type:
+                captures.append((node, query_key))
+            elif query_key in ("class", "classes") and "class" in node_type:
+                captures.append((node, query_key))
+            elif query_key in ("method", "methods") and "method" in node_type:
+                captures.append((node, query_key))
+            elif query_key in ("variable", "variables") and "variable" in node_type:
+                captures.append((node, query_key))
+            elif query_key in ("import", "imports") and "import" in node_type:
+                captures.append((node, query_key))
+            elif query_key in ("header", "headers") and "heading" in node_type:
+                captures.append((node, query_key))
             # Recursively process children
-            for child in node.children:
-                walk_tree(child)
-        walk_tree(root_node)
+            children = getattr(node, "children", [])
+            for child in children:
+                walk_tree_basic(child)
+        walk_tree_basic(root_node)
         return captures
+    async def _read_file_async(self, file_path: str) -> tuple[str, str]:
+        """
+        非同期ファイル読み込み
+        Args:
+            file_path: ファイルパス
+        Returns:
+            tuple[str, str]: (content, encoding)
+        """
+        # CPU集約的でない単純なファイル読み込みなので、
+        # run_in_executorを使用して非同期化
+        loop = asyncio.get_event_loop()
+        return await loop.run_in_executor(None, read_file_safe, file_path)

tree-sitter-analyzer 1.7.7__py3-none-any.whl → 1.8.3__py3-none-any.whl

Potentially problematic release.

tree-sitter-analyzer 1.7.7py3-none-any.whl → 1.8.3py3-none-any.whl