tree-sitter-analyzer 1.7.5__py3-none-any.whl → 1.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (47) hide show
  1. tree_sitter_analyzer/__init__.py +1 -1
  2. tree_sitter_analyzer/api.py +26 -32
  3. tree_sitter_analyzer/cli/argument_validator.py +77 -0
  4. tree_sitter_analyzer/cli/commands/table_command.py +7 -2
  5. tree_sitter_analyzer/cli_main.py +17 -3
  6. tree_sitter_analyzer/core/cache_service.py +15 -5
  7. tree_sitter_analyzer/core/query.py +33 -22
  8. tree_sitter_analyzer/core/query_service.py +179 -154
  9. tree_sitter_analyzer/exceptions.py +334 -0
  10. tree_sitter_analyzer/file_handler.py +16 -1
  11. tree_sitter_analyzer/formatters/formatter_registry.py +355 -0
  12. tree_sitter_analyzer/formatters/html_formatter.py +462 -0
  13. tree_sitter_analyzer/formatters/language_formatter_factory.py +3 -0
  14. tree_sitter_analyzer/formatters/markdown_formatter.py +1 -1
  15. tree_sitter_analyzer/interfaces/mcp_server.py +3 -1
  16. tree_sitter_analyzer/language_detector.py +91 -7
  17. tree_sitter_analyzer/languages/css_plugin.py +390 -0
  18. tree_sitter_analyzer/languages/html_plugin.py +395 -0
  19. tree_sitter_analyzer/languages/java_plugin.py +116 -0
  20. tree_sitter_analyzer/languages/javascript_plugin.py +113 -0
  21. tree_sitter_analyzer/languages/markdown_plugin.py +266 -46
  22. tree_sitter_analyzer/languages/python_plugin.py +176 -33
  23. tree_sitter_analyzer/languages/typescript_plugin.py +130 -1
  24. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +68 -3
  25. tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +32 -7
  26. tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +10 -0
  27. tree_sitter_analyzer/mcp/tools/list_files_tool.py +9 -0
  28. tree_sitter_analyzer/mcp/tools/query_tool.py +100 -52
  29. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +98 -14
  30. tree_sitter_analyzer/mcp/tools/search_content_tool.py +9 -0
  31. tree_sitter_analyzer/mcp/tools/table_format_tool.py +37 -13
  32. tree_sitter_analyzer/models.py +53 -0
  33. tree_sitter_analyzer/output_manager.py +1 -1
  34. tree_sitter_analyzer/plugins/base.py +50 -0
  35. tree_sitter_analyzer/plugins/manager.py +5 -1
  36. tree_sitter_analyzer/queries/css.py +634 -0
  37. tree_sitter_analyzer/queries/html.py +556 -0
  38. tree_sitter_analyzer/queries/markdown.py +54 -164
  39. tree_sitter_analyzer/query_loader.py +16 -3
  40. tree_sitter_analyzer/security/validator.py +343 -46
  41. tree_sitter_analyzer/utils/__init__.py +113 -0
  42. tree_sitter_analyzer/utils/tree_sitter_compat.py +282 -0
  43. tree_sitter_analyzer/utils.py +62 -24
  44. {tree_sitter_analyzer-1.7.5.dist-info → tree_sitter_analyzer-1.8.2.dist-info}/METADATA +136 -14
  45. {tree_sitter_analyzer-1.7.5.dist-info → tree_sitter_analyzer-1.8.2.dist-info}/RECORD +47 -38
  46. {tree_sitter_analyzer-1.7.5.dist-info → tree_sitter_analyzer-1.8.2.dist-info}/entry_points.txt +2 -0
  47. {tree_sitter_analyzer-1.7.5.dist-info → tree_sitter_analyzer-1.8.2.dist-info}/WHEEL +0 -0
@@ -116,34 +116,70 @@ class ReadPartialTool(BaseMCPTool):
116
116
  suppress_output = arguments.get("suppress_output", False)
117
117
  output_format = arguments.get("format", "text")
118
118
 
119
+ # Security validation BEFORE path resolution to catch symlinks
120
+ is_valid, error_msg = self.security_validator.validate_file_path(file_path, self.project_root)
121
+ if not is_valid:
122
+ logger.warning(
123
+ f"Security validation failed for file path: {file_path} - {error_msg}"
124
+ )
125
+ return {
126
+ "success": False,
127
+ "error": f"Security validation failed: {error_msg}",
128
+ "file_path": file_path
129
+ }
130
+
119
131
  # Resolve file path using common path resolver
120
132
  resolved_path = self.path_resolver.resolve(file_path)
121
133
 
122
- # Security validation (validate resolved absolute path when possible)
123
- is_valid, error_msg = self.security_validator.validate_file_path(resolved_path)
134
+ # Additional security validation on resolved path
135
+ is_valid, error_msg = self.security_validator.validate_file_path(resolved_path, self.project_root)
124
136
  if not is_valid:
125
137
  logger.warning(
126
- f"Security validation failed for file path: {file_path} - {error_msg}"
138
+ f"Security validation failed for resolved path: {resolved_path} - {error_msg}"
127
139
  )
128
- raise ValueError(f"Invalid file path: {error_msg}")
140
+ return {
141
+ "success": False,
142
+ "error": f"Security validation failed for resolved path: {error_msg}",
143
+ "file_path": file_path
144
+ }
129
145
 
130
146
  # Validate file exists
131
147
  if not Path(resolved_path).exists():
132
- raise ValueError("Invalid file path: file does not exist")
148
+ return {
149
+ "success": False,
150
+ "error": "Invalid file path: file does not exist",
151
+ "file_path": file_path
152
+ }
133
153
 
134
154
  # Validate line numbers
135
155
  if start_line < 1:
136
- raise ValueError("start_line must be >= 1")
156
+ return {
157
+ "success": False,
158
+ "error": "start_line must be >= 1",
159
+ "file_path": file_path
160
+ }
137
161
 
138
162
  if end_line is not None and end_line < start_line:
139
- raise ValueError("end_line must be >= start_line")
163
+ return {
164
+ "success": False,
165
+ "error": "end_line must be >= start_line",
166
+ "file_path": file_path
167
+ }
140
168
 
141
169
  # Validate column numbers
142
170
  if start_column is not None and start_column < 0:
143
- raise ValueError("start_column must be >= 0")
171
+ return {
172
+ "success": False,
173
+ "error": "start_column must be >= 0",
174
+ "file_path": file_path
175
+ }
144
176
 
145
177
  if end_column is not None and end_column < 0:
146
- raise ValueError("end_column must be >= 0")
178
+ return {
179
+ "success": False,
180
+ "error": "end_column must be >= 0",
181
+ "file_path": file_path
182
+ }
147
183
 
148
184
  logger.info(
149
185
  f"Reading partial content from {file_path}: lines {start_line}-{end_line or 'end'}"
@@ -160,9 +196,19 @@ class ReadPartialTool(BaseMCPTool):
160
196
  )
161
197
 
162
198
  if content is None:
163
- raise RuntimeError(
164
- f"Failed to read partial content from file: {file_path}"
165
- )
199
+ return {
200
+ "success": False,
201
+ "error": f"Failed to read partial content from file: {file_path}",
202
+ "file_path": file_path
203
+ }
204
+
205
+ # Check if content is empty or invalid range
206
+ if not content or content.strip() == "":
207
+ return {
208
+ "success": False,
209
+ "error": f"Invalid line range or empty content: start_line={start_line}, end_line={end_line}",
210
+ "file_path": file_path
211
+ }
166
212
 
167
213
  # Build result structure compatible with CLI --partial-read format
168
214
  result_data = {
@@ -198,8 +244,14 @@ class ReadPartialTool(BaseMCPTool):
198
244
  f"Successfully read {len(content)} characters from {file_path}"
199
245
  )
200
246
 
247
+ # Calculate lines extracted
248
+ lines_extracted = len(content.split('\n')) if content else 0
249
+ if end_line:
250
+ lines_extracted = end_line - start_line + 1
251
+
201
252
  # Build result - conditionally include partial_content_result based on suppress_output
202
253
  result = {
254
+ "success": True,
203
255
  "file_path": file_path,
204
256
  "range": {
205
257
  "start_line": start_line,
@@ -208,11 +260,39 @@ class ReadPartialTool(BaseMCPTool):
208
260
  "end_column": end_column,
209
261
  },
210
262
  "content_length": len(content),
263
+ "lines_extracted": lines_extracted,
211
264
  }
212
265
 
213
266
  # Only include partial_content_result if not suppressed or no output file specified
214
267
  if not suppress_output or not output_file:
215
- result["partial_content_result"] = cli_output
268
+ if output_format == "json":
269
+ # For JSON format, return structured data with exact line count
270
+ lines = content.split('\n') if content else []
271
+
272
+ # If end_line is specified, ensure we return exactly the requested number of lines
273
+ if end_line and len(lines) > lines_extracted:
274
+ lines = lines[:lines_extracted]
275
+ elif end_line and len(lines) < lines_extracted:
276
+ # Pad with empty lines if needed (shouldn't normally happen)
277
+ lines.extend([''] * (lines_extracted - len(lines)))
278
+
279
+ result["partial_content_result"] = {
280
+ "lines": lines,
281
+ "metadata": {
282
+ "file_path": file_path,
283
+ "range": {
284
+ "start_line": start_line,
285
+ "end_line": end_line,
286
+ "start_column": start_column,
287
+ "end_column": end_column,
288
+ },
289
+ "content_length": len(content),
290
+ "lines_count": len(lines)
291
+ }
292
+ }
293
+ else:
294
+ # For text/raw format, return CLI-compatible string
295
+ result["partial_content_result"] = cli_output
216
296
 
217
297
  # Handle file output if requested
218
298
  if output_file:
@@ -254,7 +334,11 @@ class ReadPartialTool(BaseMCPTool):
254
334
 
255
335
  except Exception as e:
256
336
  logger.error(f"Error reading partial content from {file_path}: {e}")
257
- raise
337
+ return {
338
+ "success": False,
339
+ "error": str(e),
340
+ "file_path": file_path
341
+ }
258
342
 
259
343
  def _read_file_partial(
260
344
  self,
@@ -289,6 +289,15 @@ class SearchContentTool(BaseMCPTool):
289
289
 
290
290
  @handle_mcp_errors("search_content")
291
291
  async def execute(self, arguments: dict[str, Any]) -> dict[str, Any] | int:
292
+ # Check if rg command is available
293
+ if not fd_rg_utils.check_external_command("rg"):
294
+ return {
295
+ "success": False,
296
+ "error": "rg (ripgrep) command not found. Please install ripgrep (https://github.com/BurntSushi/ripgrep) to use this tool.",
297
+ "count": 0,
298
+ "results": []
299
+ }
300
+
292
301
  self.validate_arguments(arguments)
293
302
 
294
303
  roots = arguments.get("roots")
@@ -18,6 +18,7 @@ from ...constants import (
18
18
  is_element_of_type,
19
19
  )
20
20
  from ...core.analysis_engine import AnalysisRequest, get_analysis_engine
21
+ from ...formatters.formatter_registry import FormatterRegistry
21
22
  from ...language_detector import detect_language_from_file
22
23
  from ...table_formatter import TableFormatter
23
24
  from ...utils import setup_logger
@@ -73,7 +74,7 @@ class TableFormatTool(BaseMCPTool):
73
74
  "format_type": {
74
75
  "type": "string",
75
76
  "description": "Table format type",
76
- "enum": ["full", "compact", "csv", "json"],
77
+ "enum": list(set(FormatterRegistry.get_available_formats() + ["full", "compact", "csv", "json"])),
77
78
  "default": "full",
78
79
  },
79
80
  "language": {
@@ -123,8 +124,11 @@ class TableFormatTool(BaseMCPTool):
123
124
  format_type = arguments["format_type"]
124
125
  if not isinstance(format_type, str):
125
126
  raise ValueError("format_type must be a string")
126
- if format_type not in ["full", "compact", "csv", "json"]:
127
- raise ValueError("format_type must be one of: full, compact, csv, json")
127
+
128
+ # Check both new FormatterRegistry formats and legacy formats
129
+ available_formats = list(set(FormatterRegistry.get_available_formats() + ["full", "compact", "csv", "json"]))
130
+ if format_type not in available_formats:
131
+ raise ValueError(f"format_type must be one of: {', '.join(sorted(available_formats))}")
128
132
 
129
133
  # Validate language if provided
130
134
  if "language" in arguments:
@@ -242,6 +246,7 @@ class TableFormatTool(BaseMCPTool):
242
246
  package_info = {"name": packages[0].name}
243
247
 
244
248
  return {
249
+ "success": True,
245
250
  "file_path": result.file_path,
246
251
  "language": result.language,
247
252
  "package": package_info,
@@ -378,18 +383,26 @@ class TableFormatTool(BaseMCPTool):
378
383
  output_file = args.get("output_file")
379
384
  suppress_output = args.get("suppress_output", False)
380
385
 
386
+ # Security validation BEFORE path resolution to catch symlinks
387
+ is_valid, error_msg = self.security_validator.validate_file_path(file_path)
388
+ if not is_valid:
389
+ self.logger.warning(
390
+ f"Security validation failed for file path: {file_path} - {error_msg}"
391
+ )
392
+ raise ValueError(f"Invalid file path: {error_msg}")
393
+
381
394
  # Resolve file path using common path resolver
382
395
  resolved_path = self.path_resolver.resolve(file_path)
383
396
 
384
- # Security validation
397
+ # Additional security validation on resolved path
385
398
  is_valid, error_msg = self.security_validator.validate_file_path(
386
399
  resolved_path
387
400
  )
388
401
  if not is_valid:
389
402
  self.logger.warning(
390
- f"Security validation failed for file path: {file_path} - {error_msg}"
403
+ f"Security validation failed for resolved path: {resolved_path} - {error_msg}"
391
404
  )
392
- raise ValueError(f"Invalid file path: {error_msg}")
405
+ raise ValueError(f"Invalid resolved path: {error_msg}")
393
406
 
394
407
  # Sanitize format_type input
395
408
  if format_type:
@@ -439,14 +452,24 @@ class TableFormatTool(BaseMCPTool):
439
452
  f"Failed to analyze structure for file: {file_path}"
440
453
  )
441
454
 
442
- # Create table formatter
443
- formatter = TableFormatter(format_type)
444
-
445
- # Convert AnalysisResult to dict format for TableFormatter
455
+ # Always convert analysis result to dict for metadata extraction
446
456
  structure_dict = self._convert_analysis_result_to_dict(structure_result)
447
-
448
- # Format table
449
- table_output = formatter.format_structure(structure_dict)
457
+
458
+ # Try to use new FormatterRegistry first, fallback to legacy TableFormatter
459
+ try:
460
+ if FormatterRegistry.is_format_supported(format_type):
461
+ # Use new FormatterRegistry
462
+ formatter = FormatterRegistry.get_formatter(format_type)
463
+ table_output = formatter.format(structure_result.elements)
464
+ else:
465
+ # Fallback to legacy TableFormatter for backward compatibility
466
+ formatter = TableFormatter(format_type)
467
+ table_output = formatter.format_structure(structure_dict)
468
+ except Exception as e:
469
+ # If FormatterRegistry fails, fallback to legacy TableFormatter
470
+ logger.warning(f"FormatterRegistry failed, using legacy formatter: {e}")
471
+ formatter = TableFormatter(format_type)
472
+ table_output = formatter.format_structure(structure_dict)
450
473
 
451
474
  # Ensure output format matches CLI exactly
452
475
  # Fix line ending differences: normalize to Unix-style LF (\n)
@@ -470,6 +493,7 @@ class TableFormatTool(BaseMCPTool):
470
493
 
471
494
  # Build result - conditionally include table_output based on suppress_output
472
495
  result = {
496
+ "success": True,
473
497
  "format_type": format_type,
474
498
  "file_path": file_path,
475
499
  "language": language,
@@ -150,6 +150,59 @@ class Package(CodeElement):
150
150
  element_type: str = "package"
151
151
 
152
152
 
153
+ # ========================================
154
+ # HTML/CSS-Specific Models
155
+ # ========================================
156
+
157
+
158
+ @dataclass(frozen=False)
159
+ class MarkupElement(CodeElement):
160
+ """
161
+ HTML要素を表現するデータモデル。
162
+ CodeElementを継承し、マークアップ固有の属性を追加する。
163
+ """
164
+
165
+ tag_name: str = ""
166
+ attributes: dict[str, str] = field(default_factory=dict)
167
+ parent: "MarkupElement | None" = None
168
+ children: list["MarkupElement"] = field(default_factory=list)
169
+ element_class: str = "" # 分類システムのカテゴリ (例: 'structure', 'media', 'form')
170
+ element_type: str = "html_element"
171
+
172
+ def to_summary_item(self) -> dict[str, Any]:
173
+ """Return dictionary for summary item"""
174
+ return {
175
+ "name": self.name,
176
+ "tag_name": self.tag_name,
177
+ "type": "html_element",
178
+ "element_class": self.element_class,
179
+ "lines": {"start": self.start_line, "end": self.end_line},
180
+ }
181
+
182
+
183
+ @dataclass(frozen=False)
184
+ class StyleElement(CodeElement):
185
+ """
186
+ CSSルールを表現するデータモデル。
187
+ CodeElementを継承する。
188
+ """
189
+
190
+ selector: str = ""
191
+ properties: dict[str, str] = field(default_factory=dict)
192
+ element_class: str = "" # 分類システムのカテゴリ (例: 'layout', 'typography', 'color')
193
+ element_type: str = "css_rule"
194
+
195
+ def to_summary_item(self) -> dict[str, Any]:
196
+ """Return dictionary for summary item"""
197
+ return {
198
+ "name": self.name,
199
+ "selector": self.selector,
200
+ "type": "css_rule",
201
+ "element_class": self.element_class,
202
+ "lines": {"start": self.start_line, "end": self.end_line},
203
+ }
204
+
205
+
153
206
  # ========================================
154
207
  # Java-Specific Models
155
208
  # ========================================
@@ -28,7 +28,7 @@ class OutputManager:
28
28
  """Output warning message"""
29
29
  if not self.quiet:
30
30
  print(f"WARNING: {message}", file=sys.stderr)
31
- log_warning(message)
31
+ log_warning(message)
32
32
 
33
33
  def error(self, message: str) -> None:
34
34
  """Output error message"""
@@ -178,6 +178,56 @@ class LanguagePlugin(ABC):
178
178
  """
179
179
  pass
180
180
 
181
+ def get_supported_element_types(self) -> list[str]:
182
+ """
183
+ Return list of supported CodeElement types.
184
+
185
+ Returns:
186
+ List of element types (e.g., ["function", "class", "variable"])
187
+ """
188
+ return ["function", "class", "variable", "import"]
189
+
190
+ def get_queries(self) -> dict[str, str]:
191
+ """
192
+ Return language-specific tree-sitter queries.
193
+
194
+ Returns:
195
+ Dictionary mapping query names to query strings
196
+ """
197
+ return {}
198
+
199
+ def execute_query_strategy(self, query_key: str | None, language: str) -> str | None:
200
+ """
201
+ Execute query strategy for this language plugin.
202
+
203
+ Args:
204
+ query_key: Query key to execute
205
+ language: Programming language
206
+
207
+ Returns:
208
+ Query string or None if not supported
209
+ """
210
+ queries = self.get_queries()
211
+ return queries.get(query_key) if query_key else None
212
+
213
+ def get_formatter_map(self) -> dict[str, str]:
214
+ """
215
+ Return mapping of format types to formatter class names.
216
+
217
+ Returns:
218
+ Dictionary mapping format names to formatter classes
219
+ """
220
+ return {}
221
+
222
+ def get_element_categories(self) -> dict[str, list[str]]:
223
+ """
224
+ Return element categories for HTML/CSS languages.
225
+
226
+ Returns:
227
+ Dictionary mapping category names to element lists
228
+ """
229
+ return {}
230
+
181
231
  def is_applicable(self, file_path: str) -> bool:
182
232
  """
183
233
  Check if this plugin is applicable for the given file.
@@ -64,7 +64,11 @@ class PluginManager:
64
64
  log_debug(f"Skipping duplicate plugin for language: {language}")
65
65
 
66
66
  final_plugins = list(unique_plugins.values())
67
- log_info(f"Successfully loaded {len(final_plugins)} plugins")
67
+ # Only log if not in CLI mode (check if we're in quiet mode)
68
+ import os
69
+ log_level = os.environ.get("LOG_LEVEL", "WARNING")
70
+ if log_level != "ERROR":
71
+ log_info(f"Successfully loaded {len(final_plugins)} plugins")
68
72
  return final_plugins
69
73
 
70
74
  def _load_from_entry_points(self) -> list[LanguagePlugin]: