tree-sitter-analyzer 1.7.7__py3-none-any.whl → 1.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (42) hide show
  1. tree_sitter_analyzer/__init__.py +1 -1
  2. tree_sitter_analyzer/api.py +23 -30
  3. tree_sitter_analyzer/cli/argument_validator.py +77 -0
  4. tree_sitter_analyzer/cli/commands/table_command.py +7 -2
  5. tree_sitter_analyzer/cli_main.py +17 -3
  6. tree_sitter_analyzer/core/cache_service.py +15 -5
  7. tree_sitter_analyzer/core/query.py +33 -22
  8. tree_sitter_analyzer/core/query_service.py +179 -154
  9. tree_sitter_analyzer/formatters/formatter_registry.py +355 -0
  10. tree_sitter_analyzer/formatters/html_formatter.py +462 -0
  11. tree_sitter_analyzer/formatters/language_formatter_factory.py +3 -0
  12. tree_sitter_analyzer/formatters/markdown_formatter.py +1 -1
  13. tree_sitter_analyzer/language_detector.py +80 -7
  14. tree_sitter_analyzer/languages/css_plugin.py +390 -0
  15. tree_sitter_analyzer/languages/html_plugin.py +395 -0
  16. tree_sitter_analyzer/languages/java_plugin.py +116 -0
  17. tree_sitter_analyzer/languages/javascript_plugin.py +113 -0
  18. tree_sitter_analyzer/languages/markdown_plugin.py +266 -46
  19. tree_sitter_analyzer/languages/python_plugin.py +176 -33
  20. tree_sitter_analyzer/languages/typescript_plugin.py +130 -1
  21. tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +12 -1
  22. tree_sitter_analyzer/mcp/tools/query_tool.py +101 -60
  23. tree_sitter_analyzer/mcp/tools/search_content_tool.py +12 -1
  24. tree_sitter_analyzer/mcp/tools/table_format_tool.py +26 -12
  25. tree_sitter_analyzer/mcp/utils/file_output_factory.py +204 -0
  26. tree_sitter_analyzer/mcp/utils/file_output_manager.py +52 -2
  27. tree_sitter_analyzer/models.py +53 -0
  28. tree_sitter_analyzer/output_manager.py +1 -1
  29. tree_sitter_analyzer/plugins/base.py +50 -0
  30. tree_sitter_analyzer/plugins/manager.py +5 -1
  31. tree_sitter_analyzer/queries/css.py +634 -0
  32. tree_sitter_analyzer/queries/html.py +556 -0
  33. tree_sitter_analyzer/queries/markdown.py +54 -164
  34. tree_sitter_analyzer/query_loader.py +16 -3
  35. tree_sitter_analyzer/security/validator.py +182 -44
  36. tree_sitter_analyzer/utils/__init__.py +113 -0
  37. tree_sitter_analyzer/utils/tree_sitter_compat.py +282 -0
  38. tree_sitter_analyzer/utils.py +62 -24
  39. {tree_sitter_analyzer-1.7.7.dist-info → tree_sitter_analyzer-1.8.3.dist-info}/METADATA +135 -31
  40. {tree_sitter_analyzer-1.7.7.dist-info → tree_sitter_analyzer-1.8.3.dist-info}/RECORD +42 -32
  41. {tree_sitter_analyzer-1.7.7.dist-info → tree_sitter_analyzer-1.8.3.dist-info}/entry_points.txt +2 -0
  42. {tree_sitter_analyzer-1.7.7.dist-info → tree_sitter_analyzer-1.8.3.dist-info}/WHEEL +0 -0
@@ -6,11 +6,14 @@ Unified query service for both CLI and MCP interfaces to avoid code duplication.
6
6
  Provides core tree-sitter query functionality including predefined and custom queries.
7
7
  """
8
8
 
9
+ import asyncio
9
10
  import logging
10
11
  from typing import Any
11
12
 
12
13
  from ..encoding_utils import read_file_safe
14
+ from ..plugins.manager import PluginManager
13
15
  from ..query_loader import query_loader
16
+ from ..utils.tree_sitter_compat import TreeSitterQueryCompat, get_node_text_safe
14
17
  from .parser import Parser
15
18
  from .query_filter import QueryFilter
16
19
 
@@ -25,6 +28,8 @@ class QueryService:
25
28
  self.project_root = project_root
26
29
  self.parser = Parser()
27
30
  self.filter = QueryFilter()
31
+ self.plugin_manager = PluginManager()
32
+ self.plugin_manager.load_plugins()
28
33
 
29
34
  async def execute_query(
30
35
  self,
@@ -60,7 +65,7 @@ class QueryService:
60
65
 
61
66
  try:
62
67
  # Read file content
63
- content, encoding = read_file_safe(file_path)
68
+ content, encoding = await self._read_file_async(file_path)
64
69
 
65
70
  # Parse file
66
71
  parse_result = self.parser.parse_code(content, language, file_path)
@@ -80,45 +85,43 @@ class QueryService:
80
85
  f"Query '{query_key}' not found for language '{language}'"
81
86
  )
82
87
 
83
- # Execute tree-sitter query using new API with fallback
84
- import tree_sitter
85
- captures = []
86
-
87
- # Try to create and execute the query
88
+ # Execute tree-sitter query using modern API
88
89
  try:
89
- ts_query = tree_sitter.Query(language_obj, query_string)
90
-
91
- # Try to execute the query
92
- captures = ts_query.captures(tree.root_node)
93
-
94
- # If captures is empty or not in expected format, try manual fallback
95
- if not captures or (isinstance(captures, list) and len(captures) == 0):
96
- captures = self._manual_query_execution(tree.root_node, query_key, language)
97
-
98
- except (AttributeError, Exception) as e:
99
- # If query creation or execution fails, use manual fallback
100
- captures = self._manual_query_execution(tree.root_node, query_key, language)
90
+ captures = TreeSitterQueryCompat.safe_execute_query(
91
+ language_obj, query_string or "", tree.root_node, fallback_result=[]
92
+ )
93
+
94
+ # If captures is empty, use plugin fallback
95
+ if not captures:
96
+ captures = self._execute_plugin_query(
97
+ tree.root_node, query_key, language, content
98
+ )
99
+
100
+ except Exception as e:
101
+ logger.debug(
102
+ f"Tree-sitter query execution failed, using plugin fallback: {e}"
103
+ )
104
+ # If query creation or execution fails, use plugin fallback
105
+ captures = self._execute_plugin_query(
106
+ tree.root_node, query_key, language, content
107
+ )
101
108
 
102
109
  # Process capture results
103
110
  results = []
104
- if isinstance(captures, dict):
105
- # New tree-sitter API returns dictionary
106
- for capture_name, nodes in captures.items():
107
- for node in nodes:
108
- results.append(self._create_result_dict(node, capture_name))
109
- elif isinstance(captures, list):
110
- # Handle both old API (list of tuples) and manual execution (list of tuples)
111
+ if isinstance(captures, list):
112
+ # Handle list of tuples from modern API and plugin execution
111
113
  for capture in captures:
112
114
  if isinstance(capture, tuple) and len(capture) == 2:
113
115
  node, name = capture
114
- results.append(self._create_result_dict(node, name))
115
- else:
116
- # If captures is not in expected format, try manual fallback
117
- manual_captures = self._manual_query_execution(tree.root_node, query_key, language)
118
- for capture in manual_captures:
119
- if isinstance(capture, tuple) and len(capture) == 2:
120
- node, name = capture
121
- results.append(self._create_result_dict(node, name))
116
+ results.append(self._create_result_dict(node, name, content))
117
+ # Note: This else block is unreachable due to the logic above, but kept for safety
118
+ # else:
119
+ # # If captures is not in expected format, use plugin fallback
120
+ # plugin_captures = self._execute_plugin_query(tree.root_node, query_key, language, content)
121
+ # for capture in plugin_captures:
122
+ # if isinstance(capture, tuple) and len(capture) == 2:
123
+ # node, name = capture
124
+ # results.append(self._create_result_dict(node, name, content))
122
125
 
123
126
  # Apply filters
124
127
  if filter_expression and results:
@@ -130,17 +133,23 @@ class QueryService:
130
133
  logger.error(f"Query execution failed: {e}")
131
134
  raise
132
135
 
133
- def _create_result_dict(self, node: Any, capture_name: str) -> dict[str, Any]:
136
+ def _create_result_dict(
137
+ self, node: Any, capture_name: str, source_code: str = ""
138
+ ) -> dict[str, Any]:
134
139
  """
135
140
  Create result dictionary from tree-sitter node
136
141
 
137
142
  Args:
138
143
  node: tree-sitter node
139
144
  capture_name: capture name
145
+ source_code: source code content for text extraction
140
146
 
141
147
  Returns:
142
148
  Result dictionary
143
149
  """
150
+ # Use safe text extraction with source code
151
+ content = get_node_text_safe(node, source_code)
152
+
144
153
  return {
145
154
  "capture_name": capture_name,
146
155
  "node_type": node.type if hasattr(node, "type") else "unknown",
@@ -148,11 +157,7 @@ class QueryService:
148
157
  node.start_point[0] + 1 if hasattr(node, "start_point") else 0
149
158
  ),
150
159
  "end_line": node.end_point[0] + 1 if hasattr(node, "end_point") else 0,
151
- "content": (
152
- node.text.decode("utf-8", errors="replace")
153
- if hasattr(node, "text") and node.text
154
- else ""
155
- ),
160
+ "content": content,
156
161
  }
157
162
 
158
163
  def get_available_queries(self, language: str) -> list[str]:
@@ -183,130 +188,150 @@ class QueryService:
183
188
  except Exception:
184
189
  return None
185
190
 
186
- def _manual_query_execution(self, root_node: Any, query_key: str | None, language: str) -> list[tuple[Any, str]]:
191
+ def _execute_plugin_query(
192
+ self, root_node: Any, query_key: str | None, language: str, source_code: str
193
+ ) -> list[tuple[Any, str]]:
187
194
  """
188
- Manual query execution fallback for tree-sitter 0.25.x compatibility
189
-
195
+ Execute query using plugin-based dynamic dispatch
196
+
190
197
  Args:
191
198
  root_node: Root node of the parsed tree
192
199
  query_key: Query key to execute (can be None for custom queries)
193
200
  language: Programming language
194
-
201
+ source_code: Source code content
202
+
195
203
  Returns:
196
204
  List of (node, capture_name) tuples
197
205
  """
198
206
  captures = []
199
-
200
- def walk_tree(node):
201
- """Walk the tree and find matching nodes"""
202
- # If query_key is None, this is a custom query - try to match common patterns
203
- if query_key is None:
204
- # For custom queries, try to match common node types
205
- if language == "java":
206
- if node.type == "method_declaration":
207
- captures.append((node, "method"))
208
- elif node.type == "class_declaration":
209
- captures.append((node, "class"))
210
- elif node.type == "field_declaration":
211
- captures.append((node, "field"))
212
- elif language == "python":
213
- if node.type == "function_definition":
214
- captures.append((node, "function"))
215
- elif node.type == "class_definition":
216
- captures.append((node, "class"))
217
- elif node.type in ["import_statement", "import_from_statement"]:
218
- captures.append((node, "import"))
219
- elif language in ["javascript", "typescript"]:
220
- if node.type in ["function_declaration", "method_definition"]:
221
- captures.append((node, "function"))
222
- elif node.type == "class_declaration":
223
- captures.append((node, "class"))
224
-
225
- # Markdown-specific queries
226
- elif language == "markdown":
227
- if query_key == "headers" and node.type in ["atx_heading", "setext_heading"]:
228
- captures.append((node, "headers"))
229
- elif query_key == "code_blocks" and node.type in ["fenced_code_block", "indented_code_block"]:
230
- captures.append((node, "code_blocks"))
231
- elif query_key == "links" and node.type == "inline":
232
- # リンクは inline ノード内のパターンとして検出
233
- node_text = node.text.decode('utf-8', errors='replace') if hasattr(node, 'text') and node.text else ""
234
- if '[' in node_text and '](' in node_text:
235
- captures.append((node, "links"))
236
- elif query_key == "images" and node.type == "inline":
237
- # 画像は inline ノード内のパターンとして検出
238
- node_text = node.text.decode('utf-8', errors='replace') if hasattr(node, 'text') and node.text else ""
239
- if '![' in node_text and '](' in node_text:
240
- captures.append((node, "images"))
241
- elif query_key == "lists" and node.type in ["list", "list_item"]:
242
- captures.append((node, "lists"))
243
- elif query_key == "emphasis" and node.type == "inline":
244
- # 強調は inline ノード内の * や ** パターンとして検出
245
- node_text = node.text.decode('utf-8', errors='replace') if hasattr(node, 'text') and node.text else ""
246
- if '*' in node_text or '_' in node_text:
247
- captures.append((node, "emphasis"))
248
- elif query_key == "blockquotes" and node.type == "block_quote":
249
- captures.append((node, "blockquotes"))
250
- elif query_key == "tables" and node.type == "pipe_table":
251
- captures.append((node, "tables"))
252
- elif query_key == "horizontal_rules" and node.type == "thematic_break":
253
- captures.append((node, "horizontal_rules"))
254
- elif query_key == "html_blocks" and node.type == "html_block":
255
- captures.append((node, "html_blocks"))
256
- elif query_key == "inline_html" and node.type == "html_tag":
257
- captures.append((node, "inline_html"))
258
- elif query_key == "inline_code" and node.type == "code_span":
259
- captures.append((node, "inline_code"))
260
- elif query_key == "text_content" and node.type in ["paragraph", "inline"]:
261
- captures.append((node, "text_content"))
262
- elif query_key == "all_elements" and node.type in [
263
- "atx_heading", "setext_heading", "fenced_code_block", "indented_code_block",
264
- "inline", "list", "list_item", "block_quote", "pipe_table",
265
- "paragraph", "section"
266
- ]:
267
- captures.append((node, "all_elements"))
268
-
269
- # Python-specific queries
270
- elif language == "python":
271
- if query_key in ["function", "functions"] and node.type == "function_definition":
272
- captures.append((node, "function"))
273
- elif query_key in ["class", "classes"] and node.type == "class_definition":
274
- captures.append((node, "class"))
275
- elif query_key in ["import", "imports"] and node.type in ["import_statement", "import_from_statement"]:
276
- captures.append((node, "import"))
277
-
278
- # JavaScript/TypeScript-specific queries
279
- elif language in ["javascript", "typescript"]:
280
- if query_key in ["function", "functions"] and node.type in ["function_declaration", "function_expression", "arrow_function", "method_definition"]:
281
- captures.append((node, "function"))
282
- elif query_key in ["class", "classes"] and node.type in ["class_declaration", "class_expression"]:
283
- captures.append((node, "class"))
284
- elif query_key in ["method", "methods"] and node.type == "method_definition":
285
- captures.append((node, "method"))
286
- elif query_key in ["interface", "interfaces"] and node.type == "interface_declaration" and language == "typescript":
287
- captures.append((node, "interface"))
288
- elif query_key in ["type", "types"] and node.type == "type_alias_declaration" and language == "typescript":
289
- captures.append((node, "type"))
290
- elif query_key in ["variable", "variables"] and node.type in ["variable_declaration", "lexical_declaration"]:
291
- captures.append((node, "variable"))
292
- elif query_key in ["import", "imports"] and node.type == "import_statement":
293
- captures.append((node, "import"))
294
- elif query_key in ["export", "exports"] and node.type == "export_statement":
295
- captures.append((node, "export"))
296
-
297
- # Java-specific queries
298
- elif language == "java":
299
- if query_key in ["method", "methods"] and node.type == "method_declaration":
300
- # Always use "method" as capture name for consistency
301
- captures.append((node, "method"))
302
- elif query_key in ["class", "classes"] and node.type == "class_declaration":
303
- captures.append((node, "class"))
304
- elif query_key == "field" and node.type == "field_declaration":
305
- captures.append((node, "field"))
207
+
208
+ # Try to get plugin for the language
209
+ plugin = self.plugin_manager.get_plugin(language)
210
+ if not plugin:
211
+ logger.warning(f"No plugin found for language: {language}")
212
+ return self._fallback_query_execution(root_node, query_key)
213
+
214
+ # Use plugin's execute_query_strategy method
215
+ try:
216
+ # Create a mock tree object for plugin compatibility
217
+ class MockTree:
218
+ def __init__(self, root_node: Any) -> None:
219
+ self.root_node = root_node
220
+
221
+ # Execute plugin query strategy
222
+ elements = plugin.execute_query_strategy(
223
+ source_code, query_key or "function"
224
+ )
225
+
226
+ # Convert elements to captures format
227
+ if elements:
228
+ for element in elements:
229
+ if hasattr(element, "start_line") and hasattr(element, "end_line"):
230
+ # Create a mock node for compatibility
231
+ class MockNode:
232
+ def __init__(self, element: Any) -> None:
233
+ self.type = getattr(
234
+ element, "element_type", query_key or "unknown"
235
+ )
236
+ self.start_point = (
237
+ getattr(element, "start_line", 1) - 1,
238
+ 0,
239
+ )
240
+ self.end_point = (
241
+ getattr(element, "end_line", 1) - 1,
242
+ 0,
243
+ )
244
+ self.text = getattr(element, "raw_text", "").encode(
245
+ "utf-8"
246
+ )
247
+
248
+ mock_node = MockNode(element)
249
+ captures.append((mock_node, query_key or "element"))
250
+
251
+ return captures
252
+
253
+ except Exception as e:
254
+ logger.debug(f"Plugin query strategy failed: {e}")
255
+
256
+ # Fallback: Use plugin's element categories for tree traversal
257
+ try:
258
+ element_categories = plugin.get_element_categories()
259
+ if element_categories and query_key and query_key in element_categories:
260
+ node_types = element_categories[query_key]
261
+
262
+ def walk_tree(node: Any) -> None:
263
+ """Walk the tree and find matching nodes using plugin categories"""
264
+ if node.type in node_types:
265
+ captures.append((node, query_key))
266
+
267
+ # Recursively process children
268
+ for child in node.children:
269
+ walk_tree(child)
270
+
271
+ walk_tree(root_node)
272
+ return captures
273
+
274
+ except Exception as e:
275
+ logger.debug(f"Plugin element categories failed: {e}")
276
+
277
+ # Final fallback
278
+ return self._fallback_query_execution(root_node, query_key)
279
+
280
+ def _fallback_query_execution(
281
+ self, root_node: Any, query_key: str | None
282
+ ) -> list[tuple[Any, str]]:
283
+ """
284
+ Basic fallback query execution for unsupported languages
285
+
286
+ Args:
287
+ root_node: Root node of the parsed tree
288
+ query_key: Query key to execute
289
+
290
+ Returns:
291
+ List of (node, capture_name) tuples
292
+ """
293
+ captures = []
294
+
295
+ def walk_tree_basic(node: Any) -> None:
296
+ """Basic tree walking for unsupported languages"""
297
+ # Get node type safely
298
+ node_type = getattr(node, "type", "")
299
+ if not isinstance(node_type, str):
300
+ node_type = str(node_type)
306
301
 
302
+ # Generic node type matching (support both singular and plural forms)
303
+ if query_key in ("function", "functions") and "function" in node_type:
304
+ captures.append((node, query_key))
305
+ elif query_key in ("class", "classes") and "class" in node_type:
306
+ captures.append((node, query_key))
307
+ elif query_key in ("method", "methods") and "method" in node_type:
308
+ captures.append((node, query_key))
309
+ elif query_key in ("variable", "variables") and "variable" in node_type:
310
+ captures.append((node, query_key))
311
+ elif query_key in ("import", "imports") and "import" in node_type:
312
+ captures.append((node, query_key))
313
+ elif query_key in ("header", "headers") and "heading" in node_type:
314
+ captures.append((node, query_key))
315
+
307
316
  # Recursively process children
308
- for child in node.children:
309
- walk_tree(child)
310
-
311
- walk_tree(root_node)
317
+ children = getattr(node, "children", [])
318
+ for child in children:
319
+ walk_tree_basic(child)
320
+
321
+ walk_tree_basic(root_node)
312
322
  return captures
323
+
324
+ async def _read_file_async(self, file_path: str) -> tuple[str, str]:
325
+ """
326
+ 非同期ファイル読み込み
327
+
328
+ Args:
329
+ file_path: ファイルパス
330
+
331
+ Returns:
332
+ tuple[str, str]: (content, encoding)
333
+ """
334
+ # CPU集約的でない単純なファイル読み込みなので、
335
+ # run_in_executorを使用して非同期化
336
+ loop = asyncio.get_event_loop()
337
+ return await loop.run_in_executor(None, read_file_safe, file_path)