PyPI - tree-sitter-analyzer - Versions diffs - 1.7.7__py3-none-any.whl → 1.8.2__py3-none-any.whl - Mend

tree-sitter-analyzer 1.7.7py3-none-any.whl → 1.8.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (38) hide show

tree_sitter_analyzer/__init__.py +1 -1
tree_sitter_analyzer/api.py +23 -30
tree_sitter_analyzer/cli/argument_validator.py +77 -0
tree_sitter_analyzer/cli/commands/table_command.py +7 -2
tree_sitter_analyzer/cli_main.py +17 -3
tree_sitter_analyzer/core/cache_service.py +15 -5
tree_sitter_analyzer/core/query.py +33 -22
tree_sitter_analyzer/core/query_service.py +179 -154
tree_sitter_analyzer/formatters/formatter_registry.py +355 -0
tree_sitter_analyzer/formatters/html_formatter.py +462 -0
tree_sitter_analyzer/formatters/language_formatter_factory.py +3 -0
tree_sitter_analyzer/formatters/markdown_formatter.py +1 -1
tree_sitter_analyzer/language_detector.py +80 -7
tree_sitter_analyzer/languages/css_plugin.py +390 -0
tree_sitter_analyzer/languages/html_plugin.py +395 -0
tree_sitter_analyzer/languages/java_plugin.py +116 -0
tree_sitter_analyzer/languages/javascript_plugin.py +113 -0
tree_sitter_analyzer/languages/markdown_plugin.py +266 -46
tree_sitter_analyzer/languages/python_plugin.py +176 -33
tree_sitter_analyzer/languages/typescript_plugin.py +130 -1
tree_sitter_analyzer/mcp/tools/query_tool.py +99 -58
tree_sitter_analyzer/mcp/tools/table_format_tool.py +24 -10
tree_sitter_analyzer/models.py +53 -0
tree_sitter_analyzer/output_manager.py +1 -1
tree_sitter_analyzer/plugins/base.py +50 -0
tree_sitter_analyzer/plugins/manager.py +5 -1
tree_sitter_analyzer/queries/css.py +634 -0
tree_sitter_analyzer/queries/html.py +556 -0
tree_sitter_analyzer/queries/markdown.py +54 -164
tree_sitter_analyzer/query_loader.py +16 -3
tree_sitter_analyzer/security/validator.py +182 -44
tree_sitter_analyzer/utils/__init__.py +113 -0
tree_sitter_analyzer/utils/tree_sitter_compat.py +282 -0
tree_sitter_analyzer/utils.py +62 -24
{tree_sitter_analyzer-1.7.7.dist-info → tree_sitter_analyzer-1.8.2.dist-info}/METADATA +120 -14
{tree_sitter_analyzer-1.7.7.dist-info → tree_sitter_analyzer-1.8.2.dist-info}/RECORD +38 -29
{tree_sitter_analyzer-1.7.7.dist-info → tree_sitter_analyzer-1.8.2.dist-info}/entry_points.txt +2 -0
{tree_sitter_analyzer-1.7.7.dist-info → tree_sitter_analyzer-1.8.2.dist-info}/WHEEL +0 -0

tree_sitter_analyzer/languages/markdown_plugin.py CHANGED Viewed

@@ -24,6 +24,7 @@ from ..encoding_utils import extract_text_slice, safe_encode
 from ..models import AnalysisResult, CodeElement
 from ..plugins.base import ElementExtractor, LanguagePlugin
 from ..utils import log_debug, log_error, log_warning
+from ..utils.tree_sitter_compat import TreeSitterQueryCompat, get_node_text_safe
 class MarkdownElement(CodeElement):
@@ -1447,22 +1448,9 @@ class MarkdownPlugin(LanguagePlugin):
                 import tree_sitter
                 import tree_sitter_markdown as tsmarkdown
-                # Support for newer versions of tree-sitter-markdown
-                try:
-                    # New API (0.3.1+)
-                    language_capsule = tsmarkdown.language()
-                    self._language_cache = tree_sitter.Language(language_capsule)
-                except (AttributeError, TypeError):
-                    # For older API or different format
-                    try:
-                        # Get Language object directly
-                        self._language_cache = tsmarkdown.language()
-                    except Exception:
-                        # Last resort: get directly from module
-                        if hasattr(tsmarkdown, 'LANGUAGE'):
-                            self._language_cache = tree_sitter.Language(tsmarkdown.LANGUAGE)
-                        else:
-                            raise ImportError("Cannot access markdown language")
+                # Use modern tree-sitter-markdown API
+                language_capsule = tsmarkdown.language()
+                self._language_cache = tree_sitter.Language(language_capsule)
             except ImportError:
                 log_error("tree-sitter-markdown not available")
                 return None
@@ -1637,35 +1625,11 @@ class MarkdownPlugin(LanguagePlugin):
             except KeyError:
                 return {"error": f"Unknown query: {query_name}"}
-            # Use new tree-sitter 0.25.x API
-            query = tree_sitter.Query(language, query_string)
-            # Execute query using the new API
-            # In tree-sitter 0.25.x, we need to use a different approach
-            matches = []
-            captures = []
-            # Walk through the tree and find matches manually
-            def walk_tree(node):
-                # This is a simplified approach - in practice, you'd want to use
-                # the proper query execution method when it becomes available
-                if query_name == "headers" and node.type in ["atx_heading", "setext_heading"]:
-                    matches.append(node)
-                elif query_name == "code_blocks" and node.type in ["fenced_code_block", "indented_code_block"]:
-                    matches.append(node)
-                elif query_name == "links" and node.type in ["link", "autolink", "reference_link"]:
-                    matches.append(node)
-                for child in node.children:
-                    walk_tree(child)
-            walk_tree(tree.root_node)
-            # Convert matches to capture format
-            for match in matches:
-                captures.append((match, query_name))
-            return {"captures": captures, "query": query_string, "matches": len(matches)}
+            # Use tree-sitter API with modern handling
+            captures = TreeSitterQueryCompat.safe_execute_query(
+                language, query_string, tree.root_node, fallback_result=[]
+            )
+            return {"captures": captures, "query": query_string, "matches": len(captures)}
         except Exception as e:
             log_error(f"Query execution failed: {e}")
@@ -1692,4 +1656,260 @@ class MarkdownPlugin(LanguagePlugin):
         except Exception as e:
             log_error(f"Failed to extract elements: {e}")
-        return elements
+        return elements
+    def execute_query_strategy(self, tree: "tree_sitter.Tree", source_code: str, query_key: str) -> list[CodeElement]:
+        """Execute Markdown-specific query strategy based on query_key"""
+        if not tree or not source_code:
+            return []
+        # Initialize extractor with source code
+        self._extractor.source_code = source_code
+        self._extractor.content_lines = source_code.split("\n")
+        self._extractor._reset_caches()
+        # Map query_key to appropriate extraction method
+        query_mapping = {
+            # Header-related queries (mapped to functions)
+            "function": lambda: self._extractor.extract_headers(tree, source_code),
+            "headers": lambda: self._extractor.extract_headers(tree, source_code),
+            "heading": lambda: self._extractor.extract_headers(tree, source_code),
+            # Code block-related queries (mapped to classes)
+            "class": lambda: self._extractor.extract_code_blocks(tree, source_code),
+            "code_blocks": lambda: self._extractor.extract_code_blocks(tree, source_code),
+            "code_block": lambda: self._extractor.extract_code_blocks(tree, source_code),
+            # Link and image queries (mapped to variables)
+            "variable": lambda: self._extractor.extract_links(tree, source_code) + self._extractor.extract_images(tree, source_code),
+            "links": lambda: self._extractor.extract_links(tree, source_code),
+            "link": lambda: self._extractor.extract_links(tree, source_code),
+            "images": lambda: self._extractor.extract_images(tree, source_code),
+            "image": lambda: self._extractor.extract_images(tree, source_code),
+            # Reference queries (mapped to imports)
+            "import": lambda: self._extractor.extract_references(tree, source_code),
+            "references": lambda: self._extractor.extract_references(tree, source_code),
+            "reference": lambda: self._extractor.extract_references(tree, source_code),
+            # List and table queries
+            "lists": lambda: self._extractor.extract_lists(tree, source_code),
+            "list": lambda: self._extractor.extract_lists(tree, source_code),
+            "task_lists": lambda: [l for l in self._extractor.extract_lists(tree, source_code) if getattr(l, 'element_type', '') == 'task_list'],
+            "tables": lambda: self._extractor.extract_tables(tree, source_code),
+            "table": lambda: self._extractor.extract_tables(tree, source_code),
+            # Content structure queries
+            "blockquotes": lambda: self._extractor.extract_blockquotes(tree, source_code),
+            "blockquote": lambda: self._extractor.extract_blockquotes(tree, source_code),
+            "horizontal_rules": lambda: self._extractor.extract_horizontal_rules(tree, source_code),
+            "horizontal_rule": lambda: self._extractor.extract_horizontal_rules(tree, source_code),
+            # HTML and formatting queries
+            "html_blocks": lambda: self._extractor.extract_html_elements(tree, source_code),
+            "html_block": lambda: self._extractor.extract_html_elements(tree, source_code),
+            "html": lambda: self._extractor.extract_html_elements(tree, source_code),
+            "emphasis": lambda: self._extractor.extract_text_formatting(tree, source_code),
+            "formatting": lambda: self._extractor.extract_text_formatting(tree, source_code),
+            "text_formatting": lambda: self._extractor.extract_text_formatting(tree, source_code),
+            "inline_code": lambda: [f for f in self._extractor.extract_text_formatting(tree, source_code) if getattr(f, 'element_type', '') == 'inline_code'],
+            "strikethrough": lambda: [f for f in self._extractor.extract_text_formatting(tree, source_code) if getattr(f, 'element_type', '') == 'strikethrough'],
+            # Footnote queries
+            "footnotes": lambda: self._extractor.extract_footnotes(tree, source_code),
+            "footnote": lambda: self._extractor.extract_footnotes(tree, source_code),
+            # Comprehensive queries
+            "all_elements": lambda: self.extract_elements(tree, source_code),
+            "text_content": lambda: self._extractor.extract_headers(tree, source_code) + self._extractor.extract_text_formatting(tree, source_code),
+        }
+        # Execute the appropriate extraction method
+        if query_key in query_mapping:
+            try:
+                return query_mapping[query_key]()
+            except Exception as e:
+                log_error(f"Error executing Markdown query '{query_key}': {e}")
+                return []
+        else:
+            log_warning(f"Unsupported Markdown query key: {query_key}")
+            return []
+    def get_element_categories(self) -> dict[str, list[str]]:
+        """Get Markdown element categories mapping query_key to node_types"""
+        return {
+            # Header categories (function-like)
+            "function": [
+                "atx_heading",
+                "setext_heading"
+            ],
+            "headers": [
+                "atx_heading",
+                "setext_heading"
+            ],
+            "heading": [
+                "atx_heading",
+                "setext_heading"
+            ],
+            # Code block categories (class-like)
+            "class": [
+                "fenced_code_block",
+                "indented_code_block"
+            ],
+            "code_blocks": [
+                "fenced_code_block",
+                "indented_code_block"
+            ],
+            "code_block": [
+                "fenced_code_block",
+                "indented_code_block"
+            ],
+            # Link and image categories (variable-like)
+            "variable": [
+                "inline",  # Contains links and images
+                "link",
+                "autolink",
+                "reference_link",
+                "image"
+            ],
+            "links": [
+                "inline",  # Contains inline links
+                "link",
+                "autolink",
+                "reference_link"
+            ],
+            "link": [
+                "inline",
+                "link",
+                "autolink",
+                "reference_link"
+            ],
+            "images": [
+                "inline",  # Contains inline images
+                "image"
+            ],
+            "image": [
+                "inline",
+                "image"
+            ],
+            # Reference categories (import-like)
+            "import": [
+                "link_reference_definition"
+            ],
+            "references": [
+                "link_reference_definition"
+            ],
+            "reference": [
+                "link_reference_definition"
+            ],
+            # List categories
+            "lists": [
+                "list",
+                "list_item"
+            ],
+            "list": [
+                "list",
+                "list_item"
+            ],
+            "task_lists": [
+                "list",
+                "list_item"
+            ],
+            # Table categories
+            "tables": [
+                "pipe_table",
+                "table"
+            ],
+            "table": [
+                "pipe_table",
+                "table"
+            ],
+            # Content structure categories
+            "blockquotes": [
+                "block_quote"
+            ],
+            "blockquote": [
+                "block_quote"
+            ],
+            "horizontal_rules": [
+                "thematic_break"
+            ],
+            "horizontal_rule": [
+                "thematic_break"
+            ],
+            # HTML categories
+            "html_blocks": [
+                "html_block",
+                "inline"  # Contains inline HTML
+            ],
+            "html_block": [
+                "html_block",
+                "inline"
+            ],
+            "html": [
+                "html_block",
+                "inline"
+            ],
+            # Text formatting categories
+            "emphasis": [
+                "inline"  # Contains emphasis elements
+            ],
+            "formatting": [
+                "inline"
+            ],
+            "text_formatting": [
+                "inline"
+            ],
+            "inline_code": [
+                "inline"
+            ],
+            "strikethrough": [
+                "inline"
+            ],
+            # Footnote categories
+            "footnotes": [
+                "inline",  # Contains footnote references
+                "paragraph"  # Contains footnote definitions
+            ],
+            "footnote": [
+                "inline",
+                "paragraph"
+            ],
+            # Comprehensive categories
+            "all_elements": [
+                "atx_heading",
+                "setext_heading",
+                "fenced_code_block",
+                "indented_code_block",
+                "inline",
+                "link",
+                "autolink",
+                "reference_link",
+                "image",
+                "link_reference_definition",
+                "list",
+                "list_item",
+                "pipe_table",
+                "table",
+                "block_quote",
+                "thematic_break",
+                "html_block",
+                "paragraph"
+            ],
+            "text_content": [
+                "atx_heading",
+                "setext_heading",
+                "inline",
+                "paragraph"
+            ]
+        }

tree_sitter_analyzer/languages/python_plugin.py CHANGED Viewed

@@ -25,6 +25,7 @@ from ..encoding_utils import extract_text_slice, safe_encode
 from ..models import AnalysisResult, Class, CodeElement, Function, Import, Variable
 from ..plugins.base import ElementExtractor, LanguagePlugin
 from ..utils import log_debug, log_error, log_warning
+from ..utils.tree_sitter_compat import TreeSitterQueryCompat, get_node_text_safe
 class PythonElementExtractor(ElementExtractor):
@@ -118,7 +119,7 @@ class PythonElementExtractor(ElementExtractor):
         # Only extract class-level attributes, not function-level variables
         try:
-            # Find class declarations
+            # Find class declarations using compatible API
             class_query = """
             (class_definition
                 body: (block) @class.body) @class.definition
@@ -126,18 +127,23 @@ class PythonElementExtractor(ElementExtractor):
             language = tree.language if hasattr(tree, "language") else None
             if language:
-                import tree_sitter
-                query = tree_sitter.Query(language, class_query)
-                captures = query.captures(tree.root_node)
-                if isinstance(captures, dict):
-                    class_bodies = captures.get("class.body", [])
-                    # For each class body, extract attribute assignments
-                    for class_body in class_bodies:
-                        variables.extend(
-                            self._extract_class_attributes(class_body, source_code)
-                        )
+                try:
+                    captures = TreeSitterQueryCompat.safe_execute_query(
+                        language, class_query, tree.root_node, fallback_result=[]
+                    )
+                    class_bodies = []
+                    for node, capture_name in captures:
+                        if capture_name == "class.body":
+                            class_bodies.append(node)
+                except Exception as e:
+                    log_debug(f"Could not extract Python class attributes using query: {e}")
+                    class_bodies = []
+                # For each class body, extract attribute assignments
+                for class_body in class_bodies:
+                    variables.extend(
+                        self._extract_class_attributes(class_body, source_code)
+                    )
         except Exception as e:
             log_warning(f"Could not extract Python class attributes: {e}")
@@ -731,24 +737,30 @@ class PythonElementExtractor(ElementExtractor):
             if language:
                 for query_string in import_queries:
                     try:
-                        import tree_sitter
-                        query = tree_sitter.Query(language, query_string)
-                        captures = query.captures(tree.root_node)
-                        if isinstance(captures, dict):
-                            # Process different types of imports
-                            for key, nodes in captures.items():
-                                if key.endswith("statement"):
-                                    import_type = key.split(".")[0]
-                                    for node in nodes:
-                                        imp = self._extract_import_info(
-                                            node, source_code, import_type
-                                        )
-                                        if imp:
-                                            imports.append(imp)
+                        captures = TreeSitterQueryCompat.safe_execute_query(
+                            language, query_string, tree.root_node, fallback_result=[]
+                        )
+                        # Group captures by name
+                        captures_dict = {}
+                        for node, capture_name in captures:
+                            if capture_name not in captures_dict:
+                                captures_dict[capture_name] = []
+                            captures_dict[capture_name].append(node)
+                        # Process different types of imports
+                        for key, nodes in captures_dict.items():
+                            if key.endswith("statement"):
+                                import_type = key.split(".")[0]
+                                for node in nodes:
+                                    imp = self._extract_import_info(
+                                        node, source_code, import_type
+                                    )
+                                    if imp:
+                                        imports.append(imp)
                     except Exception as query_error:
-                        # Fallback to manual extraction for tree-sitter 0.25.x compatibility
-                        log_warning(f"Query execution failed, using manual extraction: {query_error}")
+                        # Fallback to manual extraction for tree-sitter compatibility
+                        log_debug(f"Query execution failed, using manual extraction: {query_error}")
                         imports.extend(self._extract_imports_manual(tree.root_node, source_code))
                         break
@@ -1247,6 +1259,137 @@ class PythonPlugin(LanguagePlugin):
             ],
         }
+    def execute_query_strategy(self, tree: "tree_sitter.Tree", source_code: str, query_key: str) -> list[dict]:
+        """
+        Execute query strategy for Python language
+        Args:
+            tree: Tree-sitter tree object
+            source_code: Source code string
+            query_key: Query key to execute
+        Returns:
+            List of query results
+        """
+        # Use the extractor to get elements based on query_key
+        extractor = self.get_extractor()
+        # Map query keys to extraction methods
+        if query_key in ["function", "functions", "method", "methods"]:
+            elements = extractor.extract_functions(tree, source_code)
+        elif query_key in ["class", "classes"]:
+            elements = extractor.extract_classes(tree, source_code)
+        elif query_key in ["variable", "variables"]:
+            elements = extractor.extract_variables(tree, source_code)
+        elif query_key in ["import", "imports", "from_import", "from_imports"]:
+            elements = extractor.extract_imports(tree, source_code)
+        else:
+            # For unknown query keys, return empty list
+            return []
+        # Convert elements to query result format
+        results = []
+        for element in elements:
+            result = {
+                "capture_name": query_key,
+                "node_type": self._get_node_type_for_element(element),
+                "start_line": element.start_line,
+                "end_line": element.end_line,
+                "text": element.raw_text,
+                "name": element.name,
+            }
+            results.append(result)
+        return results
+    def _get_node_type_for_element(self, element) -> str:
+        """Get appropriate node type for element"""
+        from ..models import Function, Class, Variable, Import
+        if isinstance(element, Function):
+            return "function_definition"
+        elif isinstance(element, Class):
+            return "class_definition"
+        elif isinstance(element, Variable):
+            return "assignment"
+        elif isinstance(element, Import):
+            return "import_statement"
+        else:
+            return "unknown"
+    def get_element_categories(self) -> dict[str, list[str]]:
+        """
+        Get element categories mapping query keys to node types
+        Returns:
+            Dictionary mapping query keys to lists of node types
+        """
+        return {
+            # Function-related queries
+            "function": ["function_definition"],
+            "functions": ["function_definition"],
+            "async_function": ["function_definition"],
+            "async_functions": ["function_definition"],
+            "method": ["function_definition"],
+            "methods": ["function_definition"],
+            "lambda": ["lambda"],
+            "lambdas": ["lambda"],
+            # Class-related queries
+            "class": ["class_definition"],
+            "classes": ["class_definition"],
+            # Import-related queries
+            "import": ["import_statement", "import_from_statement"],
+            "imports": ["import_statement", "import_from_statement"],
+            "from_import": ["import_from_statement"],
+            "from_imports": ["import_from_statement"],
+            # Variable-related queries
+            "variable": ["assignment"],
+            "variables": ["assignment"],
+            # Decorator-related queries
+            "decorator": ["decorator"],
+            "decorators": ["decorator"],
+            # Exception-related queries
+            "exception": ["raise_statement", "except_clause"],
+            "exceptions": ["raise_statement", "except_clause"],
+            # Comprehension-related queries
+            "comprehension": ["list_comprehension", "set_comprehension", "dictionary_comprehension", "generator_expression"],
+            "comprehensions": ["list_comprehension", "set_comprehension", "dictionary_comprehension", "generator_expression"],
+            # Context manager queries
+            "context_manager": ["with_statement"],
+            "context_managers": ["with_statement"],
+            # Type hint queries
+            "type_hint": ["type"],
+            "type_hints": ["type"],
+            # Docstring queries
+            "docstring": ["string"],
+            "docstrings": ["string"],
+            # Framework-specific queries
+            "django_model": ["class_definition"],
+            "django_models": ["class_definition"],
+            "flask_route": ["decorator"],
+            "flask_routes": ["decorator"],
+            "fastapi_endpoint": ["function_definition"],
+            "fastapi_endpoints": ["function_definition"],
+            # Generic queries
+            "all_elements": [
+                "function_definition", "class_definition", "import_statement", "import_from_statement",
+                "assignment", "decorator", "raise_statement", "except_clause",
+                "list_comprehension", "set_comprehension", "dictionary_comprehension", "generator_expression",
+                "with_statement", "type", "string", "lambda"
+            ],
+        }
     async def analyze_file(
         self, file_path: str, request: AnalysisRequest
     ) -> AnalysisResult:
@@ -1330,9 +1473,9 @@ class PythonPlugin(LanguagePlugin):
             else:
                 return {"error": f"Unknown query: {query_name}"}
-            import tree_sitter
-            query = tree_sitter.Query(language, query_string)
-            captures = query.captures(tree.root_node)
+            captures = TreeSitterQueryCompat.safe_execute_query(
+                language, query_string, tree.root_node, fallback_result=[]
+            )
             return {"captures": captures, "query": query_string}
         except Exception as e:

tree-sitter-analyzer 1.7.7__py3-none-any.whl → 1.8.2__py3-none-any.whl

Potentially problematic release.

tree-sitter-analyzer 1.7.7py3-none-any.whl → 1.8.2py3-none-any.whl