tree-sitter-analyzer 1.9.17.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. tree_sitter_analyzer/__init__.py +132 -0
  2. tree_sitter_analyzer/__main__.py +11 -0
  3. tree_sitter_analyzer/api.py +853 -0
  4. tree_sitter_analyzer/cli/__init__.py +39 -0
  5. tree_sitter_analyzer/cli/__main__.py +12 -0
  6. tree_sitter_analyzer/cli/argument_validator.py +89 -0
  7. tree_sitter_analyzer/cli/commands/__init__.py +26 -0
  8. tree_sitter_analyzer/cli/commands/advanced_command.py +226 -0
  9. tree_sitter_analyzer/cli/commands/base_command.py +181 -0
  10. tree_sitter_analyzer/cli/commands/default_command.py +18 -0
  11. tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +188 -0
  12. tree_sitter_analyzer/cli/commands/list_files_cli.py +133 -0
  13. tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -0
  14. tree_sitter_analyzer/cli/commands/query_command.py +109 -0
  15. tree_sitter_analyzer/cli/commands/search_content_cli.py +161 -0
  16. tree_sitter_analyzer/cli/commands/structure_command.py +156 -0
  17. tree_sitter_analyzer/cli/commands/summary_command.py +116 -0
  18. tree_sitter_analyzer/cli/commands/table_command.py +414 -0
  19. tree_sitter_analyzer/cli/info_commands.py +124 -0
  20. tree_sitter_analyzer/cli_main.py +472 -0
  21. tree_sitter_analyzer/constants.py +85 -0
  22. tree_sitter_analyzer/core/__init__.py +15 -0
  23. tree_sitter_analyzer/core/analysis_engine.py +580 -0
  24. tree_sitter_analyzer/core/cache_service.py +333 -0
  25. tree_sitter_analyzer/core/engine.py +585 -0
  26. tree_sitter_analyzer/core/parser.py +293 -0
  27. tree_sitter_analyzer/core/query.py +605 -0
  28. tree_sitter_analyzer/core/query_filter.py +200 -0
  29. tree_sitter_analyzer/core/query_service.py +340 -0
  30. tree_sitter_analyzer/encoding_utils.py +530 -0
  31. tree_sitter_analyzer/exceptions.py +747 -0
  32. tree_sitter_analyzer/file_handler.py +246 -0
  33. tree_sitter_analyzer/formatters/__init__.py +1 -0
  34. tree_sitter_analyzer/formatters/base_formatter.py +201 -0
  35. tree_sitter_analyzer/formatters/csharp_formatter.py +367 -0
  36. tree_sitter_analyzer/formatters/formatter_config.py +197 -0
  37. tree_sitter_analyzer/formatters/formatter_factory.py +84 -0
  38. tree_sitter_analyzer/formatters/formatter_registry.py +377 -0
  39. tree_sitter_analyzer/formatters/formatter_selector.py +96 -0
  40. tree_sitter_analyzer/formatters/go_formatter.py +368 -0
  41. tree_sitter_analyzer/formatters/html_formatter.py +498 -0
  42. tree_sitter_analyzer/formatters/java_formatter.py +423 -0
  43. tree_sitter_analyzer/formatters/javascript_formatter.py +611 -0
  44. tree_sitter_analyzer/formatters/kotlin_formatter.py +268 -0
  45. tree_sitter_analyzer/formatters/language_formatter_factory.py +123 -0
  46. tree_sitter_analyzer/formatters/legacy_formatter_adapters.py +228 -0
  47. tree_sitter_analyzer/formatters/markdown_formatter.py +725 -0
  48. tree_sitter_analyzer/formatters/php_formatter.py +301 -0
  49. tree_sitter_analyzer/formatters/python_formatter.py +830 -0
  50. tree_sitter_analyzer/formatters/ruby_formatter.py +278 -0
  51. tree_sitter_analyzer/formatters/rust_formatter.py +233 -0
  52. tree_sitter_analyzer/formatters/sql_formatter_wrapper.py +689 -0
  53. tree_sitter_analyzer/formatters/sql_formatters.py +536 -0
  54. tree_sitter_analyzer/formatters/typescript_formatter.py +543 -0
  55. tree_sitter_analyzer/formatters/yaml_formatter.py +462 -0
  56. tree_sitter_analyzer/interfaces/__init__.py +9 -0
  57. tree_sitter_analyzer/interfaces/cli.py +535 -0
  58. tree_sitter_analyzer/interfaces/cli_adapter.py +359 -0
  59. tree_sitter_analyzer/interfaces/mcp_adapter.py +224 -0
  60. tree_sitter_analyzer/interfaces/mcp_server.py +428 -0
  61. tree_sitter_analyzer/language_detector.py +553 -0
  62. tree_sitter_analyzer/language_loader.py +271 -0
  63. tree_sitter_analyzer/languages/__init__.py +10 -0
  64. tree_sitter_analyzer/languages/csharp_plugin.py +1076 -0
  65. tree_sitter_analyzer/languages/css_plugin.py +449 -0
  66. tree_sitter_analyzer/languages/go_plugin.py +836 -0
  67. tree_sitter_analyzer/languages/html_plugin.py +496 -0
  68. tree_sitter_analyzer/languages/java_plugin.py +1299 -0
  69. tree_sitter_analyzer/languages/javascript_plugin.py +1622 -0
  70. tree_sitter_analyzer/languages/kotlin_plugin.py +656 -0
  71. tree_sitter_analyzer/languages/markdown_plugin.py +1928 -0
  72. tree_sitter_analyzer/languages/php_plugin.py +862 -0
  73. tree_sitter_analyzer/languages/python_plugin.py +1636 -0
  74. tree_sitter_analyzer/languages/ruby_plugin.py +757 -0
  75. tree_sitter_analyzer/languages/rust_plugin.py +673 -0
  76. tree_sitter_analyzer/languages/sql_plugin.py +2444 -0
  77. tree_sitter_analyzer/languages/typescript_plugin.py +1892 -0
  78. tree_sitter_analyzer/languages/yaml_plugin.py +695 -0
  79. tree_sitter_analyzer/legacy_table_formatter.py +860 -0
  80. tree_sitter_analyzer/mcp/__init__.py +34 -0
  81. tree_sitter_analyzer/mcp/resources/__init__.py +43 -0
  82. tree_sitter_analyzer/mcp/resources/code_file_resource.py +208 -0
  83. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +586 -0
  84. tree_sitter_analyzer/mcp/server.py +869 -0
  85. tree_sitter_analyzer/mcp/tools/__init__.py +28 -0
  86. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +779 -0
  87. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +291 -0
  88. tree_sitter_analyzer/mcp/tools/base_tool.py +139 -0
  89. tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +816 -0
  90. tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +686 -0
  91. tree_sitter_analyzer/mcp/tools/list_files_tool.py +413 -0
  92. tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
  93. tree_sitter_analyzer/mcp/tools/query_tool.py +443 -0
  94. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +464 -0
  95. tree_sitter_analyzer/mcp/tools/search_content_tool.py +836 -0
  96. tree_sitter_analyzer/mcp/tools/table_format_tool.py +572 -0
  97. tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +653 -0
  98. tree_sitter_analyzer/mcp/utils/__init__.py +113 -0
  99. tree_sitter_analyzer/mcp/utils/error_handler.py +569 -0
  100. tree_sitter_analyzer/mcp/utils/file_output_factory.py +217 -0
  101. tree_sitter_analyzer/mcp/utils/file_output_manager.py +322 -0
  102. tree_sitter_analyzer/mcp/utils/gitignore_detector.py +358 -0
  103. tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -0
  104. tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
  105. tree_sitter_analyzer/models.py +840 -0
  106. tree_sitter_analyzer/mypy_current_errors.txt +2 -0
  107. tree_sitter_analyzer/output_manager.py +255 -0
  108. tree_sitter_analyzer/platform_compat/__init__.py +3 -0
  109. tree_sitter_analyzer/platform_compat/adapter.py +324 -0
  110. tree_sitter_analyzer/platform_compat/compare.py +224 -0
  111. tree_sitter_analyzer/platform_compat/detector.py +67 -0
  112. tree_sitter_analyzer/platform_compat/fixtures.py +228 -0
  113. tree_sitter_analyzer/platform_compat/profiles.py +217 -0
  114. tree_sitter_analyzer/platform_compat/record.py +55 -0
  115. tree_sitter_analyzer/platform_compat/recorder.py +155 -0
  116. tree_sitter_analyzer/platform_compat/report.py +92 -0
  117. tree_sitter_analyzer/plugins/__init__.py +280 -0
  118. tree_sitter_analyzer/plugins/base.py +647 -0
  119. tree_sitter_analyzer/plugins/manager.py +384 -0
  120. tree_sitter_analyzer/project_detector.py +328 -0
  121. tree_sitter_analyzer/queries/__init__.py +27 -0
  122. tree_sitter_analyzer/queries/csharp.py +216 -0
  123. tree_sitter_analyzer/queries/css.py +615 -0
  124. tree_sitter_analyzer/queries/go.py +275 -0
  125. tree_sitter_analyzer/queries/html.py +543 -0
  126. tree_sitter_analyzer/queries/java.py +402 -0
  127. tree_sitter_analyzer/queries/javascript.py +724 -0
  128. tree_sitter_analyzer/queries/kotlin.py +192 -0
  129. tree_sitter_analyzer/queries/markdown.py +258 -0
  130. tree_sitter_analyzer/queries/php.py +95 -0
  131. tree_sitter_analyzer/queries/python.py +859 -0
  132. tree_sitter_analyzer/queries/ruby.py +92 -0
  133. tree_sitter_analyzer/queries/rust.py +223 -0
  134. tree_sitter_analyzer/queries/sql.py +555 -0
  135. tree_sitter_analyzer/queries/typescript.py +871 -0
  136. tree_sitter_analyzer/queries/yaml.py +236 -0
  137. tree_sitter_analyzer/query_loader.py +272 -0
  138. tree_sitter_analyzer/security/__init__.py +22 -0
  139. tree_sitter_analyzer/security/boundary_manager.py +277 -0
  140. tree_sitter_analyzer/security/regex_checker.py +297 -0
  141. tree_sitter_analyzer/security/validator.py +599 -0
  142. tree_sitter_analyzer/table_formatter.py +782 -0
  143. tree_sitter_analyzer/utils/__init__.py +53 -0
  144. tree_sitter_analyzer/utils/logging.py +433 -0
  145. tree_sitter_analyzer/utils/tree_sitter_compat.py +289 -0
  146. tree_sitter_analyzer-1.9.17.1.dist-info/METADATA +485 -0
  147. tree_sitter_analyzer-1.9.17.1.dist-info/RECORD +149 -0
  148. tree_sitter_analyzer-1.9.17.1.dist-info/WHEEL +4 -0
  149. tree_sitter_analyzer-1.9.17.1.dist-info/entry_points.txt +25 -0
@@ -0,0 +1,695 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ YAML Language Plugin
4
+
5
+ YAML-specific parsing and element extraction functionality using tree-sitter-yaml.
6
+ Provides comprehensive support for YAML elements including mappings, sequences,
7
+ scalars, anchors, aliases, and comments.
8
+ """
9
+
10
+ import logging
11
+ from typing import TYPE_CHECKING, Any
12
+
13
+ from ..models import AnalysisResult, CodeElement
14
+ from ..plugins.base import ElementExtractor, LanguagePlugin
15
+ from ..utils import log_debug, log_error, log_info, log_warning
16
+
17
+ if TYPE_CHECKING:
18
+ import tree_sitter
19
+
20
+ from ..core.analysis_engine import AnalysisRequest
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # Graceful degradation for tree-sitter-yaml
25
+ try:
26
+ import tree_sitter
27
+ import tree_sitter_yaml as ts_yaml
28
+
29
+ YAML_AVAILABLE = True
30
+ except ImportError:
31
+ YAML_AVAILABLE = False
32
+ log_warning("tree-sitter-yaml not installed, YAML support disabled")
33
+
34
+
35
+ class YAMLElement(CodeElement):
36
+ """YAML-specific code element."""
37
+
38
+ def __init__(
39
+ self,
40
+ name: str,
41
+ start_line: int,
42
+ end_line: int,
43
+ raw_text: str,
44
+ language: str = "yaml",
45
+ element_type: str = "yaml",
46
+ key: str | None = None,
47
+ value: str | None = None,
48
+ value_type: str | None = None,
49
+ anchor_name: str | None = None,
50
+ alias_target: str | None = None,
51
+ nesting_level: int = 0,
52
+ document_index: int = 0,
53
+ child_count: int | None = None,
54
+ **kwargs: Any,
55
+ ) -> None:
56
+ """Initialize YAMLElement.
57
+
58
+ Args:
59
+ name: Element name
60
+ start_line: Starting line number
61
+ end_line: Ending line number
62
+ raw_text: Raw text content
63
+ language: Language identifier
64
+ element_type: Type of YAML element
65
+ key: Key for mapping pairs
66
+ value: Scalar value (None for complex structures)
67
+ value_type: Type of value (string, number, boolean, null, mapping, sequence)
68
+ anchor_name: Anchor name for &anchor definitions
69
+ alias_target: Target anchor name for *alias references (not resolved)
70
+ nesting_level: AST-based logical depth
71
+ document_index: Index of document in multi-document YAML
72
+ child_count: Number of child elements for complex structures
73
+ **kwargs: Additional attributes
74
+ """
75
+ super().__init__(
76
+ name=name,
77
+ start_line=start_line,
78
+ end_line=end_line,
79
+ raw_text=raw_text,
80
+ language=language,
81
+ **kwargs,
82
+ )
83
+ self.element_type = element_type
84
+ self.key = key
85
+ self.value = value
86
+ self.value_type = value_type
87
+ self.anchor_name = anchor_name
88
+ self.alias_target = alias_target
89
+ self.nesting_level = nesting_level
90
+ self.document_index = document_index
91
+ self.child_count = child_count
92
+
93
+
94
+ class YAMLElementExtractor(ElementExtractor):
95
+ """YAML-specific element extractor using tree-sitter-yaml."""
96
+
97
+ def __init__(self) -> None:
98
+ """Initialize the YAML element extractor."""
99
+ self.source_code: str = ""
100
+ self.content_lines: list[str] = []
101
+ self._current_document_index: int = 0
102
+
103
+ def extract_functions(
104
+ self, tree: "tree_sitter.Tree", source_code: str
105
+ ) -> list[CodeElement]:
106
+ """YAML doesn't have functions, return empty list."""
107
+ return []
108
+
109
+ def extract_classes(
110
+ self, tree: "tree_sitter.Tree", source_code: str
111
+ ) -> list[CodeElement]:
112
+ """YAML doesn't have classes, return empty list."""
113
+ return []
114
+
115
+ def extract_variables(
116
+ self, tree: "tree_sitter.Tree", source_code: str
117
+ ) -> list[CodeElement]:
118
+ """YAML doesn't have variables, return empty list."""
119
+ return []
120
+
121
+ def extract_imports(
122
+ self, tree: "tree_sitter.Tree", source_code: str
123
+ ) -> list[CodeElement]:
124
+ """YAML doesn't have imports, return empty list."""
125
+ return []
126
+
127
+ def extract_yaml_elements(
128
+ self, tree: "tree_sitter.Tree", source_code: str
129
+ ) -> list[YAMLElement]:
130
+ """Extract all YAML elements from the parsed tree.
131
+
132
+ Args:
133
+ tree: Parsed tree-sitter tree
134
+ source_code: Original source code
135
+
136
+ Returns:
137
+ List of YAMLElement objects
138
+ """
139
+ self.source_code = source_code or ""
140
+ self.content_lines = self.source_code.split("\n")
141
+ self._current_document_index = 0
142
+
143
+ elements: list[YAMLElement] = []
144
+
145
+ if tree is None or tree.root_node is None:
146
+ return elements
147
+
148
+ try:
149
+ # Extract documents first to set document indices
150
+ self._extract_documents(tree.root_node, elements)
151
+ # Extract mappings
152
+ self._extract_mappings(tree.root_node, elements)
153
+ # Extract sequences
154
+ self._extract_sequences(tree.root_node, elements)
155
+ # Extract anchors and aliases
156
+ self._extract_anchors(tree.root_node, elements)
157
+ self._extract_aliases(tree.root_node, elements)
158
+ # Extract comments
159
+ self._extract_comments(tree.root_node, elements)
160
+ except Exception as e:
161
+ log_error(f"Error during YAML element extraction: {e}")
162
+
163
+ log_debug(f"Extracted {len(elements)} YAML elements")
164
+ return elements
165
+
166
+ def _get_node_text(self, node: "tree_sitter.Node") -> str:
167
+ """Get text content from a tree-sitter node."""
168
+ try:
169
+ if hasattr(node, "start_byte") and hasattr(node, "end_byte"):
170
+ source_bytes = self.source_code.encode("utf-8")
171
+ node_bytes = source_bytes[node.start_byte : node.end_byte]
172
+ return node_bytes.decode("utf-8", errors="replace")
173
+ return ""
174
+ except Exception as e:
175
+ log_debug(f"Failed to extract node text: {e}")
176
+ return ""
177
+
178
+ def _calculate_nesting_level(self, node: "tree_sitter.Node") -> int:
179
+ """Calculate AST-based logical nesting level."""
180
+ level = 0
181
+ current = node.parent
182
+ while current is not None:
183
+ if current.type in (
184
+ "block_mapping",
185
+ "block_sequence",
186
+ "flow_mapping",
187
+ "flow_sequence",
188
+ ):
189
+ level += 1
190
+ current = current.parent
191
+ return level
192
+
193
+ def _get_document_index(self, node: "tree_sitter.Node") -> int:
194
+ """Get document index for a node."""
195
+ current = node
196
+ while current is not None:
197
+ if current.type == "document":
198
+ # Count preceding document siblings
199
+ index = 0
200
+ sibling = current.prev_sibling
201
+ while sibling is not None:
202
+ if sibling.type == "document":
203
+ index += 1
204
+ sibling = sibling.prev_sibling
205
+ return index
206
+ current = current.parent
207
+ return 0
208
+
209
+ def _traverse_nodes(self, node: "tree_sitter.Node") -> "list[tree_sitter.Node]":
210
+ """Traverse all nodes in the tree."""
211
+ nodes = [node]
212
+ for child in node.children:
213
+ nodes.extend(self._traverse_nodes(child))
214
+ return nodes
215
+
216
+ def _count_document_children(self, document_node: "tree_sitter.Node") -> int:
217
+ """Count meaningful children in a document (top-level mappings).
218
+
219
+ This counts the number of top-level key-value pairs in the document,
220
+ which is more meaningful than counting AST nodes.
221
+ """
222
+ count = 0
223
+ for child in document_node.children:
224
+ # Skip document markers and comments
225
+ if child.type in ("---", "...", "comment"):
226
+ continue
227
+ # For block_node, count the mappings inside
228
+ if child.type == "block_node":
229
+ for subchild in child.children:
230
+ if subchild.type == "block_mapping":
231
+ # Count the mapping pairs
232
+ count += len(
233
+ [
234
+ c
235
+ for c in subchild.children
236
+ if c.type == "block_mapping_pair"
237
+ ]
238
+ )
239
+ elif subchild.type in ("block_sequence", "flow_sequence"):
240
+ count += 1
241
+ elif child.type == "block_mapping":
242
+ count += len(
243
+ [c for c in child.children if c.type == "block_mapping_pair"]
244
+ )
245
+ return count
246
+
247
+ def _extract_documents(
248
+ self, root_node: "tree_sitter.Node", elements: list[YAMLElement]
249
+ ) -> None:
250
+ """Extract YAML documents."""
251
+ for node in self._traverse_nodes(root_node):
252
+ if node.type == "document":
253
+ try:
254
+ start_line = node.start_point[0] + 1
255
+ end_line = node.end_point[0] + 1
256
+ raw_text = self._get_node_text(node)
257
+ doc_index = self._get_document_index(node)
258
+
259
+ # Count meaningful child elements (top-level mappings)
260
+ # Exclude document markers (---) and comments
261
+ child_count = self._count_document_children(node)
262
+
263
+ element = YAMLElement(
264
+ name=f"Document {doc_index}",
265
+ start_line=start_line,
266
+ end_line=end_line,
267
+ raw_text=raw_text[:200] + "..."
268
+ if len(raw_text) > 200
269
+ else raw_text,
270
+ element_type="document",
271
+ document_index=doc_index,
272
+ child_count=child_count,
273
+ nesting_level=0,
274
+ )
275
+ elements.append(element)
276
+ except Exception as e:
277
+ log_debug(f"Failed to extract document: {e}")
278
+
279
+ def _extract_mappings(
280
+ self, root_node: "tree_sitter.Node", elements: list[YAMLElement]
281
+ ) -> None:
282
+ """Extract YAML mappings (key-value pairs)."""
283
+ for node in self._traverse_nodes(root_node):
284
+ if node.type in ("block_mapping_pair", "flow_pair"):
285
+ try:
286
+ start_line = node.start_point[0] + 1
287
+ end_line = node.end_point[0] + 1
288
+ raw_text = self._get_node_text(node)
289
+
290
+ # Extract key and value
291
+ key = None
292
+ value = None
293
+ value_type = None
294
+ child_count = None
295
+
296
+ # Find key and value nodes
297
+ # In tree-sitter-yaml, block_mapping_pair has structure:
298
+ # flow_node (key), ':', flow_node (value)
299
+ key_node = None
300
+ value_node = None
301
+ found_colon = False
302
+
303
+ for child in node.children:
304
+ if child.type == ":":
305
+ found_colon = True
306
+ elif child.type in ("flow_node", "block_node"):
307
+ if not found_colon:
308
+ # This is the key
309
+ key_node = child
310
+ else:
311
+ # This is the value
312
+ value_node = child
313
+ elif child.type == "key":
314
+ # Key is wrapped in a "key" node
315
+ if child.children:
316
+ key_node = child.children[0]
317
+ else:
318
+ key_node = child
319
+ elif child.type == "value":
320
+ # Value is wrapped in a "value" node
321
+ if child.children:
322
+ value_node = child.children[0]
323
+ else:
324
+ value_node = child
325
+
326
+ # Extract key text - drill down through flow_node/block_node
327
+ if key_node is not None:
328
+ # Drill down to get the actual scalar
329
+ current = key_node
330
+ while (
331
+ current
332
+ and current.type in ("flow_node", "block_node")
333
+ and current.children
334
+ ):
335
+ current = current.children[0]
336
+ if current:
337
+ key = self._get_node_text(current).strip()
338
+
339
+ # Extract value info - drill down through flow_node/block_node
340
+ if value_node is not None:
341
+ # Drill down to get the actual value node
342
+ current = value_node
343
+ while (
344
+ current
345
+ and current.type in ("flow_node", "block_node")
346
+ and current.children
347
+ ):
348
+ current = current.children[0]
349
+ if current:
350
+ value, value_type, child_count = self._extract_value_info(
351
+ current
352
+ )
353
+
354
+ nesting_level = self._calculate_nesting_level(node)
355
+ doc_index = self._get_document_index(node)
356
+
357
+ element = YAMLElement(
358
+ name=key or "mapping",
359
+ start_line=start_line,
360
+ end_line=end_line,
361
+ raw_text=raw_text,
362
+ element_type="mapping",
363
+ key=key,
364
+ value=value,
365
+ value_type=value_type,
366
+ nesting_level=nesting_level,
367
+ document_index=doc_index,
368
+ child_count=child_count,
369
+ )
370
+ elements.append(element)
371
+ except Exception as e:
372
+ log_debug(f"Failed to extract mapping: {e}")
373
+
374
+ def _extract_value_info(
375
+ self, node: "tree_sitter.Node"
376
+ ) -> tuple[str | None, str | None, int | None]:
377
+ """Extract value information from a node.
378
+
379
+ Returns:
380
+ Tuple of (value, value_type, child_count)
381
+ """
382
+ if node is None:
383
+ return None, None, None
384
+
385
+ node_type = node.type
386
+ text = self._get_node_text(node).strip()
387
+
388
+ # Scalar types
389
+ if node_type in ("plain_scalar", "double_quote_scalar", "single_quote_scalar"):
390
+ # Determine scalar type
391
+ if text.lower() in ("true", "false", "yes", "no", "on", "off"):
392
+ return text, "boolean", None
393
+ elif text.lower() in ("null", "~", ""):
394
+ return text if text else None, "null", None
395
+ elif self._is_number(text):
396
+ return text, "number", None
397
+ else:
398
+ return text, "string", None
399
+ elif node_type == "block_scalar":
400
+ return text, "string", None
401
+ elif node_type in ("block_mapping", "flow_mapping"):
402
+ child_count = len(
403
+ [
404
+ c
405
+ for c in node.children
406
+ if c.type in ("block_mapping_pair", "flow_pair")
407
+ ]
408
+ )
409
+ return None, "mapping", child_count
410
+ elif node_type in ("block_sequence", "flow_sequence"):
411
+ child_count = len(
412
+ [c for c in node.children if c.type in ("block_sequence_item",)]
413
+ or node.children
414
+ )
415
+ return None, "sequence", child_count
416
+ elif node_type == "alias":
417
+ alias_name = text.lstrip("*")
418
+ return f"*{alias_name}", "alias", None
419
+
420
+ return text, "unknown", None
421
+
422
+ def _is_number(self, text: str) -> bool:
423
+ """Check if text represents a number."""
424
+ try:
425
+ float(text)
426
+ return True
427
+ except ValueError:
428
+ return False
429
+
430
+ def _extract_sequences(
431
+ self, root_node: "tree_sitter.Node", elements: list[YAMLElement]
432
+ ) -> None:
433
+ """Extract YAML sequences (lists)."""
434
+ for node in self._traverse_nodes(root_node):
435
+ if node.type in ("block_sequence", "flow_sequence"):
436
+ try:
437
+ start_line = node.start_point[0] + 1
438
+ end_line = node.end_point[0] + 1
439
+ raw_text = self._get_node_text(node)
440
+
441
+ # Count items
442
+ if node.type == "block_sequence":
443
+ child_count = len(
444
+ [
445
+ c
446
+ for c in node.children
447
+ if c.type == "block_sequence_item"
448
+ ]
449
+ )
450
+ else:
451
+ child_count = len(node.children)
452
+
453
+ nesting_level = self._calculate_nesting_level(node)
454
+ doc_index = self._get_document_index(node)
455
+
456
+ element = YAMLElement(
457
+ name="sequence",
458
+ start_line=start_line,
459
+ end_line=end_line,
460
+ raw_text=raw_text[:200] + "..."
461
+ if len(raw_text) > 200
462
+ else raw_text,
463
+ element_type="sequence",
464
+ value_type="sequence",
465
+ nesting_level=nesting_level,
466
+ document_index=doc_index,
467
+ child_count=child_count,
468
+ )
469
+ elements.append(element)
470
+ except Exception as e:
471
+ log_debug(f"Failed to extract sequence: {e}")
472
+
473
+ def _extract_anchors(
474
+ self, root_node: "tree_sitter.Node", elements: list[YAMLElement]
475
+ ) -> None:
476
+ """Extract YAML anchors (&name)."""
477
+ for node in self._traverse_nodes(root_node):
478
+ if node.type == "anchor":
479
+ try:
480
+ start_line = node.start_point[0] + 1
481
+ end_line = node.end_point[0] + 1
482
+ raw_text = self._get_node_text(node)
483
+ anchor_name = raw_text.lstrip("&").strip()
484
+
485
+ nesting_level = self._calculate_nesting_level(node)
486
+ doc_index = self._get_document_index(node)
487
+
488
+ element = YAMLElement(
489
+ name=f"&{anchor_name}",
490
+ start_line=start_line,
491
+ end_line=end_line,
492
+ raw_text=raw_text,
493
+ element_type="anchor",
494
+ anchor_name=anchor_name,
495
+ nesting_level=nesting_level,
496
+ document_index=doc_index,
497
+ )
498
+ elements.append(element)
499
+ except Exception as e:
500
+ log_debug(f"Failed to extract anchor: {e}")
501
+
502
+ def _extract_aliases(
503
+ self, root_node: "tree_sitter.Node", elements: list[YAMLElement]
504
+ ) -> None:
505
+ """Extract YAML aliases (*name)."""
506
+ for node in self._traverse_nodes(root_node):
507
+ if node.type == "alias":
508
+ try:
509
+ start_line = node.start_point[0] + 1
510
+ end_line = node.end_point[0] + 1
511
+ raw_text = self._get_node_text(node)
512
+ alias_target = raw_text.lstrip("*").strip()
513
+
514
+ nesting_level = self._calculate_nesting_level(node)
515
+ doc_index = self._get_document_index(node)
516
+
517
+ element = YAMLElement(
518
+ name=f"*{alias_target}",
519
+ start_line=start_line,
520
+ end_line=end_line,
521
+ raw_text=raw_text,
522
+ element_type="alias",
523
+ alias_target=alias_target,
524
+ nesting_level=nesting_level,
525
+ document_index=doc_index,
526
+ )
527
+ elements.append(element)
528
+ except Exception as e:
529
+ log_debug(f"Failed to extract alias: {e}")
530
+
531
+ def _extract_comments(
532
+ self, root_node: "tree_sitter.Node", elements: list[YAMLElement]
533
+ ) -> None:
534
+ """Extract YAML comments."""
535
+ for node in self._traverse_nodes(root_node):
536
+ if node.type == "comment":
537
+ try:
538
+ start_line = node.start_point[0] + 1
539
+ end_line = node.end_point[0] + 1
540
+ raw_text = self._get_node_text(node)
541
+ comment_text = raw_text.lstrip("#").strip()
542
+
543
+ doc_index = self._get_document_index(node)
544
+
545
+ element = YAMLElement(
546
+ name=comment_text[:50] + "..."
547
+ if len(comment_text) > 50
548
+ else comment_text,
549
+ start_line=start_line,
550
+ end_line=end_line,
551
+ raw_text=raw_text,
552
+ element_type="comment",
553
+ value=comment_text,
554
+ value_type="comment",
555
+ document_index=doc_index,
556
+ nesting_level=0,
557
+ )
558
+ elements.append(element)
559
+ except Exception as e:
560
+ log_debug(f"Failed to extract comment: {e}")
561
+
562
+
563
+ class YAMLPlugin(LanguagePlugin):
564
+ """YAML language plugin using tree-sitter-yaml for true YAML parsing."""
565
+
566
+ def get_language_name(self) -> str:
567
+ """Return the language name."""
568
+ return "yaml"
569
+
570
+ def get_file_extensions(self) -> list[str]:
571
+ """Return supported file extensions."""
572
+ return [".yaml", ".yml"]
573
+
574
+ def create_extractor(self) -> ElementExtractor:
575
+ """Create and return a YAML element extractor."""
576
+ return YAMLElementExtractor()
577
+
578
+ def get_supported_element_types(self) -> list[str]:
579
+ """Return supported element types."""
580
+ return [
581
+ "mapping",
582
+ "sequence",
583
+ "scalar",
584
+ "anchor",
585
+ "alias",
586
+ "comment",
587
+ "document",
588
+ ]
589
+
590
+ def get_queries(self) -> dict[str, str]:
591
+ """Return YAML-specific tree-sitter queries."""
592
+ from ..queries.yaml import YAML_QUERIES
593
+
594
+ return YAML_QUERIES
595
+
596
+ def execute_query_strategy(
597
+ self, query_key: str | None, language: str
598
+ ) -> str | None:
599
+ """Execute query strategy for YAML."""
600
+ if language != "yaml":
601
+ return None
602
+
603
+ queries = self.get_queries()
604
+ return queries.get(query_key) if query_key else None
605
+
606
+ def get_element_categories(self) -> dict[str, list[str]]:
607
+ """Return YAML element categories for query execution."""
608
+ return {
609
+ "structure": ["document", "block_mapping", "block_sequence"],
610
+ "mappings": ["block_mapping_pair", "flow_pair"],
611
+ "sequences": ["block_sequence", "flow_sequence"],
612
+ "scalars": [
613
+ "plain_scalar",
614
+ "double_quote_scalar",
615
+ "single_quote_scalar",
616
+ "block_scalar",
617
+ ],
618
+ "references": ["anchor", "alias"],
619
+ "metadata": ["comment", "tag"],
620
+ }
621
+
622
+ async def analyze_file(
623
+ self, file_path: str, request: "AnalysisRequest"
624
+ ) -> "AnalysisResult":
625
+ """Analyze YAML file using tree-sitter-yaml parser.
626
+
627
+ Args:
628
+ file_path: Path to the YAML file
629
+ request: Analysis request parameters
630
+
631
+ Returns:
632
+ AnalysisResult with extracted elements
633
+ """
634
+ from ..encoding_utils import read_file_safe
635
+
636
+ # Check if YAML support is available
637
+ if not YAML_AVAILABLE:
638
+ log_error("tree-sitter-yaml not available")
639
+ return AnalysisResult(
640
+ file_path=file_path,
641
+ language="yaml",
642
+ line_count=0,
643
+ elements=[],
644
+ node_count=0,
645
+ query_results={},
646
+ source_code="",
647
+ success=False,
648
+ error_message="YAML support not available. Install tree-sitter-yaml.",
649
+ )
650
+
651
+ try:
652
+ # Read file content with encoding detection
653
+ content, encoding = read_file_safe(file_path)
654
+
655
+ # Get YAML language
656
+ YAML_LANGUAGE = tree_sitter.Language(ts_yaml.language())
657
+
658
+ # Create parser
659
+ parser = tree_sitter.Parser()
660
+ parser.language = YAML_LANGUAGE
661
+
662
+ # Parse the YAML content
663
+ tree = parser.parse(content.encode("utf-8"))
664
+
665
+ # Extract elements using the extractor
666
+ extractor = self.create_extractor()
667
+ elements = extractor.extract_yaml_elements(tree, content)
668
+
669
+ log_info(f"Extracted {len(elements)} YAML elements from {file_path}")
670
+
671
+ return AnalysisResult(
672
+ file_path=file_path,
673
+ language="yaml",
674
+ line_count=len(content.splitlines()),
675
+ elements=elements,
676
+ node_count=len(elements),
677
+ query_results={},
678
+ source_code=content,
679
+ success=True,
680
+ error_message=None,
681
+ )
682
+
683
+ except Exception as e:
684
+ log_error(f"Failed to analyze YAML file {file_path}: {e}")
685
+ return AnalysisResult(
686
+ file_path=file_path,
687
+ language="yaml",
688
+ line_count=0,
689
+ elements=[],
690
+ node_count=0,
691
+ query_results={},
692
+ source_code="",
693
+ success=False,
694
+ error_message=str(e),
695
+ )