tree-sitter-analyzer 1.9.17.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tree_sitter_analyzer/__init__.py +132 -0
- tree_sitter_analyzer/__main__.py +11 -0
- tree_sitter_analyzer/api.py +853 -0
- tree_sitter_analyzer/cli/__init__.py +39 -0
- tree_sitter_analyzer/cli/__main__.py +12 -0
- tree_sitter_analyzer/cli/argument_validator.py +89 -0
- tree_sitter_analyzer/cli/commands/__init__.py +26 -0
- tree_sitter_analyzer/cli/commands/advanced_command.py +226 -0
- tree_sitter_analyzer/cli/commands/base_command.py +181 -0
- tree_sitter_analyzer/cli/commands/default_command.py +18 -0
- tree_sitter_analyzer/cli/commands/find_and_grep_cli.py +188 -0
- tree_sitter_analyzer/cli/commands/list_files_cli.py +133 -0
- tree_sitter_analyzer/cli/commands/partial_read_command.py +139 -0
- tree_sitter_analyzer/cli/commands/query_command.py +109 -0
- tree_sitter_analyzer/cli/commands/search_content_cli.py +161 -0
- tree_sitter_analyzer/cli/commands/structure_command.py +156 -0
- tree_sitter_analyzer/cli/commands/summary_command.py +116 -0
- tree_sitter_analyzer/cli/commands/table_command.py +414 -0
- tree_sitter_analyzer/cli/info_commands.py +124 -0
- tree_sitter_analyzer/cli_main.py +472 -0
- tree_sitter_analyzer/constants.py +85 -0
- tree_sitter_analyzer/core/__init__.py +15 -0
- tree_sitter_analyzer/core/analysis_engine.py +580 -0
- tree_sitter_analyzer/core/cache_service.py +333 -0
- tree_sitter_analyzer/core/engine.py +585 -0
- tree_sitter_analyzer/core/parser.py +293 -0
- tree_sitter_analyzer/core/query.py +605 -0
- tree_sitter_analyzer/core/query_filter.py +200 -0
- tree_sitter_analyzer/core/query_service.py +340 -0
- tree_sitter_analyzer/encoding_utils.py +530 -0
- tree_sitter_analyzer/exceptions.py +747 -0
- tree_sitter_analyzer/file_handler.py +246 -0
- tree_sitter_analyzer/formatters/__init__.py +1 -0
- tree_sitter_analyzer/formatters/base_formatter.py +201 -0
- tree_sitter_analyzer/formatters/csharp_formatter.py +367 -0
- tree_sitter_analyzer/formatters/formatter_config.py +197 -0
- tree_sitter_analyzer/formatters/formatter_factory.py +84 -0
- tree_sitter_analyzer/formatters/formatter_registry.py +377 -0
- tree_sitter_analyzer/formatters/formatter_selector.py +96 -0
- tree_sitter_analyzer/formatters/go_formatter.py +368 -0
- tree_sitter_analyzer/formatters/html_formatter.py +498 -0
- tree_sitter_analyzer/formatters/java_formatter.py +423 -0
- tree_sitter_analyzer/formatters/javascript_formatter.py +611 -0
- tree_sitter_analyzer/formatters/kotlin_formatter.py +268 -0
- tree_sitter_analyzer/formatters/language_formatter_factory.py +123 -0
- tree_sitter_analyzer/formatters/legacy_formatter_adapters.py +228 -0
- tree_sitter_analyzer/formatters/markdown_formatter.py +725 -0
- tree_sitter_analyzer/formatters/php_formatter.py +301 -0
- tree_sitter_analyzer/formatters/python_formatter.py +830 -0
- tree_sitter_analyzer/formatters/ruby_formatter.py +278 -0
- tree_sitter_analyzer/formatters/rust_formatter.py +233 -0
- tree_sitter_analyzer/formatters/sql_formatter_wrapper.py +689 -0
- tree_sitter_analyzer/formatters/sql_formatters.py +536 -0
- tree_sitter_analyzer/formatters/typescript_formatter.py +543 -0
- tree_sitter_analyzer/formatters/yaml_formatter.py +462 -0
- tree_sitter_analyzer/interfaces/__init__.py +9 -0
- tree_sitter_analyzer/interfaces/cli.py +535 -0
- tree_sitter_analyzer/interfaces/cli_adapter.py +359 -0
- tree_sitter_analyzer/interfaces/mcp_adapter.py +224 -0
- tree_sitter_analyzer/interfaces/mcp_server.py +428 -0
- tree_sitter_analyzer/language_detector.py +553 -0
- tree_sitter_analyzer/language_loader.py +271 -0
- tree_sitter_analyzer/languages/__init__.py +10 -0
- tree_sitter_analyzer/languages/csharp_plugin.py +1076 -0
- tree_sitter_analyzer/languages/css_plugin.py +449 -0
- tree_sitter_analyzer/languages/go_plugin.py +836 -0
- tree_sitter_analyzer/languages/html_plugin.py +496 -0
- tree_sitter_analyzer/languages/java_plugin.py +1299 -0
- tree_sitter_analyzer/languages/javascript_plugin.py +1622 -0
- tree_sitter_analyzer/languages/kotlin_plugin.py +656 -0
- tree_sitter_analyzer/languages/markdown_plugin.py +1928 -0
- tree_sitter_analyzer/languages/php_plugin.py +862 -0
- tree_sitter_analyzer/languages/python_plugin.py +1636 -0
- tree_sitter_analyzer/languages/ruby_plugin.py +757 -0
- tree_sitter_analyzer/languages/rust_plugin.py +673 -0
- tree_sitter_analyzer/languages/sql_plugin.py +2444 -0
- tree_sitter_analyzer/languages/typescript_plugin.py +1892 -0
- tree_sitter_analyzer/languages/yaml_plugin.py +695 -0
- tree_sitter_analyzer/legacy_table_formatter.py +860 -0
- tree_sitter_analyzer/mcp/__init__.py +34 -0
- tree_sitter_analyzer/mcp/resources/__init__.py +43 -0
- tree_sitter_analyzer/mcp/resources/code_file_resource.py +208 -0
- tree_sitter_analyzer/mcp/resources/project_stats_resource.py +586 -0
- tree_sitter_analyzer/mcp/server.py +869 -0
- tree_sitter_analyzer/mcp/tools/__init__.py +28 -0
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +779 -0
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +291 -0
- tree_sitter_analyzer/mcp/tools/base_tool.py +139 -0
- tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +816 -0
- tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +686 -0
- tree_sitter_analyzer/mcp/tools/list_files_tool.py +413 -0
- tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
- tree_sitter_analyzer/mcp/tools/query_tool.py +443 -0
- tree_sitter_analyzer/mcp/tools/read_partial_tool.py +464 -0
- tree_sitter_analyzer/mcp/tools/search_content_tool.py +836 -0
- tree_sitter_analyzer/mcp/tools/table_format_tool.py +572 -0
- tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +653 -0
- tree_sitter_analyzer/mcp/utils/__init__.py +113 -0
- tree_sitter_analyzer/mcp/utils/error_handler.py +569 -0
- tree_sitter_analyzer/mcp/utils/file_output_factory.py +217 -0
- tree_sitter_analyzer/mcp/utils/file_output_manager.py +322 -0
- tree_sitter_analyzer/mcp/utils/gitignore_detector.py +358 -0
- tree_sitter_analyzer/mcp/utils/path_resolver.py +414 -0
- tree_sitter_analyzer/mcp/utils/search_cache.py +343 -0
- tree_sitter_analyzer/models.py +840 -0
- tree_sitter_analyzer/mypy_current_errors.txt +2 -0
- tree_sitter_analyzer/output_manager.py +255 -0
- tree_sitter_analyzer/platform_compat/__init__.py +3 -0
- tree_sitter_analyzer/platform_compat/adapter.py +324 -0
- tree_sitter_analyzer/platform_compat/compare.py +224 -0
- tree_sitter_analyzer/platform_compat/detector.py +67 -0
- tree_sitter_analyzer/platform_compat/fixtures.py +228 -0
- tree_sitter_analyzer/platform_compat/profiles.py +217 -0
- tree_sitter_analyzer/platform_compat/record.py +55 -0
- tree_sitter_analyzer/platform_compat/recorder.py +155 -0
- tree_sitter_analyzer/platform_compat/report.py +92 -0
- tree_sitter_analyzer/plugins/__init__.py +280 -0
- tree_sitter_analyzer/plugins/base.py +647 -0
- tree_sitter_analyzer/plugins/manager.py +384 -0
- tree_sitter_analyzer/project_detector.py +328 -0
- tree_sitter_analyzer/queries/__init__.py +27 -0
- tree_sitter_analyzer/queries/csharp.py +216 -0
- tree_sitter_analyzer/queries/css.py +615 -0
- tree_sitter_analyzer/queries/go.py +275 -0
- tree_sitter_analyzer/queries/html.py +543 -0
- tree_sitter_analyzer/queries/java.py +402 -0
- tree_sitter_analyzer/queries/javascript.py +724 -0
- tree_sitter_analyzer/queries/kotlin.py +192 -0
- tree_sitter_analyzer/queries/markdown.py +258 -0
- tree_sitter_analyzer/queries/php.py +95 -0
- tree_sitter_analyzer/queries/python.py +859 -0
- tree_sitter_analyzer/queries/ruby.py +92 -0
- tree_sitter_analyzer/queries/rust.py +223 -0
- tree_sitter_analyzer/queries/sql.py +555 -0
- tree_sitter_analyzer/queries/typescript.py +871 -0
- tree_sitter_analyzer/queries/yaml.py +236 -0
- tree_sitter_analyzer/query_loader.py +272 -0
- tree_sitter_analyzer/security/__init__.py +22 -0
- tree_sitter_analyzer/security/boundary_manager.py +277 -0
- tree_sitter_analyzer/security/regex_checker.py +297 -0
- tree_sitter_analyzer/security/validator.py +599 -0
- tree_sitter_analyzer/table_formatter.py +782 -0
- tree_sitter_analyzer/utils/__init__.py +53 -0
- tree_sitter_analyzer/utils/logging.py +433 -0
- tree_sitter_analyzer/utils/tree_sitter_compat.py +289 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/METADATA +485 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/RECORD +149 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/WHEEL +4 -0
- tree_sitter_analyzer-1.9.17.1.dist-info/entry_points.txt +25 -0
|
@@ -0,0 +1,695 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
YAML Language Plugin
|
|
4
|
+
|
|
5
|
+
YAML-specific parsing and element extraction functionality using tree-sitter-yaml.
|
|
6
|
+
Provides comprehensive support for YAML elements including mappings, sequences,
|
|
7
|
+
scalars, anchors, aliases, and comments.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from typing import TYPE_CHECKING, Any
|
|
12
|
+
|
|
13
|
+
from ..models import AnalysisResult, CodeElement
|
|
14
|
+
from ..plugins.base import ElementExtractor, LanguagePlugin
|
|
15
|
+
from ..utils import log_debug, log_error, log_info, log_warning
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
import tree_sitter
|
|
19
|
+
|
|
20
|
+
from ..core.analysis_engine import AnalysisRequest
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
# Graceful degradation for tree-sitter-yaml
|
|
25
|
+
try:
|
|
26
|
+
import tree_sitter
|
|
27
|
+
import tree_sitter_yaml as ts_yaml
|
|
28
|
+
|
|
29
|
+
YAML_AVAILABLE = True
|
|
30
|
+
except ImportError:
|
|
31
|
+
YAML_AVAILABLE = False
|
|
32
|
+
log_warning("tree-sitter-yaml not installed, YAML support disabled")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class YAMLElement(CodeElement):
|
|
36
|
+
"""YAML-specific code element."""
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
name: str,
|
|
41
|
+
start_line: int,
|
|
42
|
+
end_line: int,
|
|
43
|
+
raw_text: str,
|
|
44
|
+
language: str = "yaml",
|
|
45
|
+
element_type: str = "yaml",
|
|
46
|
+
key: str | None = None,
|
|
47
|
+
value: str | None = None,
|
|
48
|
+
value_type: str | None = None,
|
|
49
|
+
anchor_name: str | None = None,
|
|
50
|
+
alias_target: str | None = None,
|
|
51
|
+
nesting_level: int = 0,
|
|
52
|
+
document_index: int = 0,
|
|
53
|
+
child_count: int | None = None,
|
|
54
|
+
**kwargs: Any,
|
|
55
|
+
) -> None:
|
|
56
|
+
"""Initialize YAMLElement.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
name: Element name
|
|
60
|
+
start_line: Starting line number
|
|
61
|
+
end_line: Ending line number
|
|
62
|
+
raw_text: Raw text content
|
|
63
|
+
language: Language identifier
|
|
64
|
+
element_type: Type of YAML element
|
|
65
|
+
key: Key for mapping pairs
|
|
66
|
+
value: Scalar value (None for complex structures)
|
|
67
|
+
value_type: Type of value (string, number, boolean, null, mapping, sequence)
|
|
68
|
+
anchor_name: Anchor name for &anchor definitions
|
|
69
|
+
alias_target: Target anchor name for *alias references (not resolved)
|
|
70
|
+
nesting_level: AST-based logical depth
|
|
71
|
+
document_index: Index of document in multi-document YAML
|
|
72
|
+
child_count: Number of child elements for complex structures
|
|
73
|
+
**kwargs: Additional attributes
|
|
74
|
+
"""
|
|
75
|
+
super().__init__(
|
|
76
|
+
name=name,
|
|
77
|
+
start_line=start_line,
|
|
78
|
+
end_line=end_line,
|
|
79
|
+
raw_text=raw_text,
|
|
80
|
+
language=language,
|
|
81
|
+
**kwargs,
|
|
82
|
+
)
|
|
83
|
+
self.element_type = element_type
|
|
84
|
+
self.key = key
|
|
85
|
+
self.value = value
|
|
86
|
+
self.value_type = value_type
|
|
87
|
+
self.anchor_name = anchor_name
|
|
88
|
+
self.alias_target = alias_target
|
|
89
|
+
self.nesting_level = nesting_level
|
|
90
|
+
self.document_index = document_index
|
|
91
|
+
self.child_count = child_count
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class YAMLElementExtractor(ElementExtractor):
|
|
95
|
+
"""YAML-specific element extractor using tree-sitter-yaml."""
|
|
96
|
+
|
|
97
|
+
def __init__(self) -> None:
|
|
98
|
+
"""Initialize the YAML element extractor."""
|
|
99
|
+
self.source_code: str = ""
|
|
100
|
+
self.content_lines: list[str] = []
|
|
101
|
+
self._current_document_index: int = 0
|
|
102
|
+
|
|
103
|
+
def extract_functions(
|
|
104
|
+
self, tree: "tree_sitter.Tree", source_code: str
|
|
105
|
+
) -> list[CodeElement]:
|
|
106
|
+
"""YAML doesn't have functions, return empty list."""
|
|
107
|
+
return []
|
|
108
|
+
|
|
109
|
+
def extract_classes(
|
|
110
|
+
self, tree: "tree_sitter.Tree", source_code: str
|
|
111
|
+
) -> list[CodeElement]:
|
|
112
|
+
"""YAML doesn't have classes, return empty list."""
|
|
113
|
+
return []
|
|
114
|
+
|
|
115
|
+
def extract_variables(
|
|
116
|
+
self, tree: "tree_sitter.Tree", source_code: str
|
|
117
|
+
) -> list[CodeElement]:
|
|
118
|
+
"""YAML doesn't have variables, return empty list."""
|
|
119
|
+
return []
|
|
120
|
+
|
|
121
|
+
def extract_imports(
|
|
122
|
+
self, tree: "tree_sitter.Tree", source_code: str
|
|
123
|
+
) -> list[CodeElement]:
|
|
124
|
+
"""YAML doesn't have imports, return empty list."""
|
|
125
|
+
return []
|
|
126
|
+
|
|
127
|
+
def extract_yaml_elements(
|
|
128
|
+
self, tree: "tree_sitter.Tree", source_code: str
|
|
129
|
+
) -> list[YAMLElement]:
|
|
130
|
+
"""Extract all YAML elements from the parsed tree.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
tree: Parsed tree-sitter tree
|
|
134
|
+
source_code: Original source code
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
List of YAMLElement objects
|
|
138
|
+
"""
|
|
139
|
+
self.source_code = source_code or ""
|
|
140
|
+
self.content_lines = self.source_code.split("\n")
|
|
141
|
+
self._current_document_index = 0
|
|
142
|
+
|
|
143
|
+
elements: list[YAMLElement] = []
|
|
144
|
+
|
|
145
|
+
if tree is None or tree.root_node is None:
|
|
146
|
+
return elements
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
# Extract documents first to set document indices
|
|
150
|
+
self._extract_documents(tree.root_node, elements)
|
|
151
|
+
# Extract mappings
|
|
152
|
+
self._extract_mappings(tree.root_node, elements)
|
|
153
|
+
# Extract sequences
|
|
154
|
+
self._extract_sequences(tree.root_node, elements)
|
|
155
|
+
# Extract anchors and aliases
|
|
156
|
+
self._extract_anchors(tree.root_node, elements)
|
|
157
|
+
self._extract_aliases(tree.root_node, elements)
|
|
158
|
+
# Extract comments
|
|
159
|
+
self._extract_comments(tree.root_node, elements)
|
|
160
|
+
except Exception as e:
|
|
161
|
+
log_error(f"Error during YAML element extraction: {e}")
|
|
162
|
+
|
|
163
|
+
log_debug(f"Extracted {len(elements)} YAML elements")
|
|
164
|
+
return elements
|
|
165
|
+
|
|
166
|
+
def _get_node_text(self, node: "tree_sitter.Node") -> str:
|
|
167
|
+
"""Get text content from a tree-sitter node."""
|
|
168
|
+
try:
|
|
169
|
+
if hasattr(node, "start_byte") and hasattr(node, "end_byte"):
|
|
170
|
+
source_bytes = self.source_code.encode("utf-8")
|
|
171
|
+
node_bytes = source_bytes[node.start_byte : node.end_byte]
|
|
172
|
+
return node_bytes.decode("utf-8", errors="replace")
|
|
173
|
+
return ""
|
|
174
|
+
except Exception as e:
|
|
175
|
+
log_debug(f"Failed to extract node text: {e}")
|
|
176
|
+
return ""
|
|
177
|
+
|
|
178
|
+
def _calculate_nesting_level(self, node: "tree_sitter.Node") -> int:
|
|
179
|
+
"""Calculate AST-based logical nesting level."""
|
|
180
|
+
level = 0
|
|
181
|
+
current = node.parent
|
|
182
|
+
while current is not None:
|
|
183
|
+
if current.type in (
|
|
184
|
+
"block_mapping",
|
|
185
|
+
"block_sequence",
|
|
186
|
+
"flow_mapping",
|
|
187
|
+
"flow_sequence",
|
|
188
|
+
):
|
|
189
|
+
level += 1
|
|
190
|
+
current = current.parent
|
|
191
|
+
return level
|
|
192
|
+
|
|
193
|
+
def _get_document_index(self, node: "tree_sitter.Node") -> int:
|
|
194
|
+
"""Get document index for a node."""
|
|
195
|
+
current = node
|
|
196
|
+
while current is not None:
|
|
197
|
+
if current.type == "document":
|
|
198
|
+
# Count preceding document siblings
|
|
199
|
+
index = 0
|
|
200
|
+
sibling = current.prev_sibling
|
|
201
|
+
while sibling is not None:
|
|
202
|
+
if sibling.type == "document":
|
|
203
|
+
index += 1
|
|
204
|
+
sibling = sibling.prev_sibling
|
|
205
|
+
return index
|
|
206
|
+
current = current.parent
|
|
207
|
+
return 0
|
|
208
|
+
|
|
209
|
+
def _traverse_nodes(self, node: "tree_sitter.Node") -> "list[tree_sitter.Node]":
|
|
210
|
+
"""Traverse all nodes in the tree."""
|
|
211
|
+
nodes = [node]
|
|
212
|
+
for child in node.children:
|
|
213
|
+
nodes.extend(self._traverse_nodes(child))
|
|
214
|
+
return nodes
|
|
215
|
+
|
|
216
|
+
def _count_document_children(self, document_node: "tree_sitter.Node") -> int:
|
|
217
|
+
"""Count meaningful children in a document (top-level mappings).
|
|
218
|
+
|
|
219
|
+
This counts the number of top-level key-value pairs in the document,
|
|
220
|
+
which is more meaningful than counting AST nodes.
|
|
221
|
+
"""
|
|
222
|
+
count = 0
|
|
223
|
+
for child in document_node.children:
|
|
224
|
+
# Skip document markers and comments
|
|
225
|
+
if child.type in ("---", "...", "comment"):
|
|
226
|
+
continue
|
|
227
|
+
# For block_node, count the mappings inside
|
|
228
|
+
if child.type == "block_node":
|
|
229
|
+
for subchild in child.children:
|
|
230
|
+
if subchild.type == "block_mapping":
|
|
231
|
+
# Count the mapping pairs
|
|
232
|
+
count += len(
|
|
233
|
+
[
|
|
234
|
+
c
|
|
235
|
+
for c in subchild.children
|
|
236
|
+
if c.type == "block_mapping_pair"
|
|
237
|
+
]
|
|
238
|
+
)
|
|
239
|
+
elif subchild.type in ("block_sequence", "flow_sequence"):
|
|
240
|
+
count += 1
|
|
241
|
+
elif child.type == "block_mapping":
|
|
242
|
+
count += len(
|
|
243
|
+
[c for c in child.children if c.type == "block_mapping_pair"]
|
|
244
|
+
)
|
|
245
|
+
return count
|
|
246
|
+
|
|
247
|
+
def _extract_documents(
|
|
248
|
+
self, root_node: "tree_sitter.Node", elements: list[YAMLElement]
|
|
249
|
+
) -> None:
|
|
250
|
+
"""Extract YAML documents."""
|
|
251
|
+
for node in self._traverse_nodes(root_node):
|
|
252
|
+
if node.type == "document":
|
|
253
|
+
try:
|
|
254
|
+
start_line = node.start_point[0] + 1
|
|
255
|
+
end_line = node.end_point[0] + 1
|
|
256
|
+
raw_text = self._get_node_text(node)
|
|
257
|
+
doc_index = self._get_document_index(node)
|
|
258
|
+
|
|
259
|
+
# Count meaningful child elements (top-level mappings)
|
|
260
|
+
# Exclude document markers (---) and comments
|
|
261
|
+
child_count = self._count_document_children(node)
|
|
262
|
+
|
|
263
|
+
element = YAMLElement(
|
|
264
|
+
name=f"Document {doc_index}",
|
|
265
|
+
start_line=start_line,
|
|
266
|
+
end_line=end_line,
|
|
267
|
+
raw_text=raw_text[:200] + "..."
|
|
268
|
+
if len(raw_text) > 200
|
|
269
|
+
else raw_text,
|
|
270
|
+
element_type="document",
|
|
271
|
+
document_index=doc_index,
|
|
272
|
+
child_count=child_count,
|
|
273
|
+
nesting_level=0,
|
|
274
|
+
)
|
|
275
|
+
elements.append(element)
|
|
276
|
+
except Exception as e:
|
|
277
|
+
log_debug(f"Failed to extract document: {e}")
|
|
278
|
+
|
|
279
|
+
def _extract_mappings(
|
|
280
|
+
self, root_node: "tree_sitter.Node", elements: list[YAMLElement]
|
|
281
|
+
) -> None:
|
|
282
|
+
"""Extract YAML mappings (key-value pairs)."""
|
|
283
|
+
for node in self._traverse_nodes(root_node):
|
|
284
|
+
if node.type in ("block_mapping_pair", "flow_pair"):
|
|
285
|
+
try:
|
|
286
|
+
start_line = node.start_point[0] + 1
|
|
287
|
+
end_line = node.end_point[0] + 1
|
|
288
|
+
raw_text = self._get_node_text(node)
|
|
289
|
+
|
|
290
|
+
# Extract key and value
|
|
291
|
+
key = None
|
|
292
|
+
value = None
|
|
293
|
+
value_type = None
|
|
294
|
+
child_count = None
|
|
295
|
+
|
|
296
|
+
# Find key and value nodes
|
|
297
|
+
# In tree-sitter-yaml, block_mapping_pair has structure:
|
|
298
|
+
# flow_node (key), ':', flow_node (value)
|
|
299
|
+
key_node = None
|
|
300
|
+
value_node = None
|
|
301
|
+
found_colon = False
|
|
302
|
+
|
|
303
|
+
for child in node.children:
|
|
304
|
+
if child.type == ":":
|
|
305
|
+
found_colon = True
|
|
306
|
+
elif child.type in ("flow_node", "block_node"):
|
|
307
|
+
if not found_colon:
|
|
308
|
+
# This is the key
|
|
309
|
+
key_node = child
|
|
310
|
+
else:
|
|
311
|
+
# This is the value
|
|
312
|
+
value_node = child
|
|
313
|
+
elif child.type == "key":
|
|
314
|
+
# Key is wrapped in a "key" node
|
|
315
|
+
if child.children:
|
|
316
|
+
key_node = child.children[0]
|
|
317
|
+
else:
|
|
318
|
+
key_node = child
|
|
319
|
+
elif child.type == "value":
|
|
320
|
+
# Value is wrapped in a "value" node
|
|
321
|
+
if child.children:
|
|
322
|
+
value_node = child.children[0]
|
|
323
|
+
else:
|
|
324
|
+
value_node = child
|
|
325
|
+
|
|
326
|
+
# Extract key text - drill down through flow_node/block_node
|
|
327
|
+
if key_node is not None:
|
|
328
|
+
# Drill down to get the actual scalar
|
|
329
|
+
current = key_node
|
|
330
|
+
while (
|
|
331
|
+
current
|
|
332
|
+
and current.type in ("flow_node", "block_node")
|
|
333
|
+
and current.children
|
|
334
|
+
):
|
|
335
|
+
current = current.children[0]
|
|
336
|
+
if current:
|
|
337
|
+
key = self._get_node_text(current).strip()
|
|
338
|
+
|
|
339
|
+
# Extract value info - drill down through flow_node/block_node
|
|
340
|
+
if value_node is not None:
|
|
341
|
+
# Drill down to get the actual value node
|
|
342
|
+
current = value_node
|
|
343
|
+
while (
|
|
344
|
+
current
|
|
345
|
+
and current.type in ("flow_node", "block_node")
|
|
346
|
+
and current.children
|
|
347
|
+
):
|
|
348
|
+
current = current.children[0]
|
|
349
|
+
if current:
|
|
350
|
+
value, value_type, child_count = self._extract_value_info(
|
|
351
|
+
current
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
nesting_level = self._calculate_nesting_level(node)
|
|
355
|
+
doc_index = self._get_document_index(node)
|
|
356
|
+
|
|
357
|
+
element = YAMLElement(
|
|
358
|
+
name=key or "mapping",
|
|
359
|
+
start_line=start_line,
|
|
360
|
+
end_line=end_line,
|
|
361
|
+
raw_text=raw_text,
|
|
362
|
+
element_type="mapping",
|
|
363
|
+
key=key,
|
|
364
|
+
value=value,
|
|
365
|
+
value_type=value_type,
|
|
366
|
+
nesting_level=nesting_level,
|
|
367
|
+
document_index=doc_index,
|
|
368
|
+
child_count=child_count,
|
|
369
|
+
)
|
|
370
|
+
elements.append(element)
|
|
371
|
+
except Exception as e:
|
|
372
|
+
log_debug(f"Failed to extract mapping: {e}")
|
|
373
|
+
|
|
374
|
+
def _extract_value_info(
|
|
375
|
+
self, node: "tree_sitter.Node"
|
|
376
|
+
) -> tuple[str | None, str | None, int | None]:
|
|
377
|
+
"""Extract value information from a node.
|
|
378
|
+
|
|
379
|
+
Returns:
|
|
380
|
+
Tuple of (value, value_type, child_count)
|
|
381
|
+
"""
|
|
382
|
+
if node is None:
|
|
383
|
+
return None, None, None
|
|
384
|
+
|
|
385
|
+
node_type = node.type
|
|
386
|
+
text = self._get_node_text(node).strip()
|
|
387
|
+
|
|
388
|
+
# Scalar types
|
|
389
|
+
if node_type in ("plain_scalar", "double_quote_scalar", "single_quote_scalar"):
|
|
390
|
+
# Determine scalar type
|
|
391
|
+
if text.lower() in ("true", "false", "yes", "no", "on", "off"):
|
|
392
|
+
return text, "boolean", None
|
|
393
|
+
elif text.lower() in ("null", "~", ""):
|
|
394
|
+
return text if text else None, "null", None
|
|
395
|
+
elif self._is_number(text):
|
|
396
|
+
return text, "number", None
|
|
397
|
+
else:
|
|
398
|
+
return text, "string", None
|
|
399
|
+
elif node_type == "block_scalar":
|
|
400
|
+
return text, "string", None
|
|
401
|
+
elif node_type in ("block_mapping", "flow_mapping"):
|
|
402
|
+
child_count = len(
|
|
403
|
+
[
|
|
404
|
+
c
|
|
405
|
+
for c in node.children
|
|
406
|
+
if c.type in ("block_mapping_pair", "flow_pair")
|
|
407
|
+
]
|
|
408
|
+
)
|
|
409
|
+
return None, "mapping", child_count
|
|
410
|
+
elif node_type in ("block_sequence", "flow_sequence"):
|
|
411
|
+
child_count = len(
|
|
412
|
+
[c for c in node.children if c.type in ("block_sequence_item",)]
|
|
413
|
+
or node.children
|
|
414
|
+
)
|
|
415
|
+
return None, "sequence", child_count
|
|
416
|
+
elif node_type == "alias":
|
|
417
|
+
alias_name = text.lstrip("*")
|
|
418
|
+
return f"*{alias_name}", "alias", None
|
|
419
|
+
|
|
420
|
+
return text, "unknown", None
|
|
421
|
+
|
|
422
|
+
def _is_number(self, text: str) -> bool:
|
|
423
|
+
"""Check if text represents a number."""
|
|
424
|
+
try:
|
|
425
|
+
float(text)
|
|
426
|
+
return True
|
|
427
|
+
except ValueError:
|
|
428
|
+
return False
|
|
429
|
+
|
|
430
|
+
def _extract_sequences(
|
|
431
|
+
self, root_node: "tree_sitter.Node", elements: list[YAMLElement]
|
|
432
|
+
) -> None:
|
|
433
|
+
"""Extract YAML sequences (lists)."""
|
|
434
|
+
for node in self._traverse_nodes(root_node):
|
|
435
|
+
if node.type in ("block_sequence", "flow_sequence"):
|
|
436
|
+
try:
|
|
437
|
+
start_line = node.start_point[0] + 1
|
|
438
|
+
end_line = node.end_point[0] + 1
|
|
439
|
+
raw_text = self._get_node_text(node)
|
|
440
|
+
|
|
441
|
+
# Count items
|
|
442
|
+
if node.type == "block_sequence":
|
|
443
|
+
child_count = len(
|
|
444
|
+
[
|
|
445
|
+
c
|
|
446
|
+
for c in node.children
|
|
447
|
+
if c.type == "block_sequence_item"
|
|
448
|
+
]
|
|
449
|
+
)
|
|
450
|
+
else:
|
|
451
|
+
child_count = len(node.children)
|
|
452
|
+
|
|
453
|
+
nesting_level = self._calculate_nesting_level(node)
|
|
454
|
+
doc_index = self._get_document_index(node)
|
|
455
|
+
|
|
456
|
+
element = YAMLElement(
|
|
457
|
+
name="sequence",
|
|
458
|
+
start_line=start_line,
|
|
459
|
+
end_line=end_line,
|
|
460
|
+
raw_text=raw_text[:200] + "..."
|
|
461
|
+
if len(raw_text) > 200
|
|
462
|
+
else raw_text,
|
|
463
|
+
element_type="sequence",
|
|
464
|
+
value_type="sequence",
|
|
465
|
+
nesting_level=nesting_level,
|
|
466
|
+
document_index=doc_index,
|
|
467
|
+
child_count=child_count,
|
|
468
|
+
)
|
|
469
|
+
elements.append(element)
|
|
470
|
+
except Exception as e:
|
|
471
|
+
log_debug(f"Failed to extract sequence: {e}")
|
|
472
|
+
|
|
473
|
+
def _extract_anchors(
|
|
474
|
+
self, root_node: "tree_sitter.Node", elements: list[YAMLElement]
|
|
475
|
+
) -> None:
|
|
476
|
+
"""Extract YAML anchors (&name)."""
|
|
477
|
+
for node in self._traverse_nodes(root_node):
|
|
478
|
+
if node.type == "anchor":
|
|
479
|
+
try:
|
|
480
|
+
start_line = node.start_point[0] + 1
|
|
481
|
+
end_line = node.end_point[0] + 1
|
|
482
|
+
raw_text = self._get_node_text(node)
|
|
483
|
+
anchor_name = raw_text.lstrip("&").strip()
|
|
484
|
+
|
|
485
|
+
nesting_level = self._calculate_nesting_level(node)
|
|
486
|
+
doc_index = self._get_document_index(node)
|
|
487
|
+
|
|
488
|
+
element = YAMLElement(
|
|
489
|
+
name=f"&{anchor_name}",
|
|
490
|
+
start_line=start_line,
|
|
491
|
+
end_line=end_line,
|
|
492
|
+
raw_text=raw_text,
|
|
493
|
+
element_type="anchor",
|
|
494
|
+
anchor_name=anchor_name,
|
|
495
|
+
nesting_level=nesting_level,
|
|
496
|
+
document_index=doc_index,
|
|
497
|
+
)
|
|
498
|
+
elements.append(element)
|
|
499
|
+
except Exception as e:
|
|
500
|
+
log_debug(f"Failed to extract anchor: {e}")
|
|
501
|
+
|
|
502
|
+
def _extract_aliases(
|
|
503
|
+
self, root_node: "tree_sitter.Node", elements: list[YAMLElement]
|
|
504
|
+
) -> None:
|
|
505
|
+
"""Extract YAML aliases (*name)."""
|
|
506
|
+
for node in self._traverse_nodes(root_node):
|
|
507
|
+
if node.type == "alias":
|
|
508
|
+
try:
|
|
509
|
+
start_line = node.start_point[0] + 1
|
|
510
|
+
end_line = node.end_point[0] + 1
|
|
511
|
+
raw_text = self._get_node_text(node)
|
|
512
|
+
alias_target = raw_text.lstrip("*").strip()
|
|
513
|
+
|
|
514
|
+
nesting_level = self._calculate_nesting_level(node)
|
|
515
|
+
doc_index = self._get_document_index(node)
|
|
516
|
+
|
|
517
|
+
element = YAMLElement(
|
|
518
|
+
name=f"*{alias_target}",
|
|
519
|
+
start_line=start_line,
|
|
520
|
+
end_line=end_line,
|
|
521
|
+
raw_text=raw_text,
|
|
522
|
+
element_type="alias",
|
|
523
|
+
alias_target=alias_target,
|
|
524
|
+
nesting_level=nesting_level,
|
|
525
|
+
document_index=doc_index,
|
|
526
|
+
)
|
|
527
|
+
elements.append(element)
|
|
528
|
+
except Exception as e:
|
|
529
|
+
log_debug(f"Failed to extract alias: {e}")
|
|
530
|
+
|
|
531
|
+
def _extract_comments(
|
|
532
|
+
self, root_node: "tree_sitter.Node", elements: list[YAMLElement]
|
|
533
|
+
) -> None:
|
|
534
|
+
"""Extract YAML comments."""
|
|
535
|
+
for node in self._traverse_nodes(root_node):
|
|
536
|
+
if node.type == "comment":
|
|
537
|
+
try:
|
|
538
|
+
start_line = node.start_point[0] + 1
|
|
539
|
+
end_line = node.end_point[0] + 1
|
|
540
|
+
raw_text = self._get_node_text(node)
|
|
541
|
+
comment_text = raw_text.lstrip("#").strip()
|
|
542
|
+
|
|
543
|
+
doc_index = self._get_document_index(node)
|
|
544
|
+
|
|
545
|
+
element = YAMLElement(
|
|
546
|
+
name=comment_text[:50] + "..."
|
|
547
|
+
if len(comment_text) > 50
|
|
548
|
+
else comment_text,
|
|
549
|
+
start_line=start_line,
|
|
550
|
+
end_line=end_line,
|
|
551
|
+
raw_text=raw_text,
|
|
552
|
+
element_type="comment",
|
|
553
|
+
value=comment_text,
|
|
554
|
+
value_type="comment",
|
|
555
|
+
document_index=doc_index,
|
|
556
|
+
nesting_level=0,
|
|
557
|
+
)
|
|
558
|
+
elements.append(element)
|
|
559
|
+
except Exception as e:
|
|
560
|
+
log_debug(f"Failed to extract comment: {e}")
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
class YAMLPlugin(LanguagePlugin):
|
|
564
|
+
"""YAML language plugin using tree-sitter-yaml for true YAML parsing."""
|
|
565
|
+
|
|
566
|
+
def get_language_name(self) -> str:
|
|
567
|
+
"""Return the language name."""
|
|
568
|
+
return "yaml"
|
|
569
|
+
|
|
570
|
+
def get_file_extensions(self) -> list[str]:
|
|
571
|
+
"""Return supported file extensions."""
|
|
572
|
+
return [".yaml", ".yml"]
|
|
573
|
+
|
|
574
|
+
def create_extractor(self) -> ElementExtractor:
|
|
575
|
+
"""Create and return a YAML element extractor."""
|
|
576
|
+
return YAMLElementExtractor()
|
|
577
|
+
|
|
578
|
+
def get_supported_element_types(self) -> list[str]:
|
|
579
|
+
"""Return supported element types."""
|
|
580
|
+
return [
|
|
581
|
+
"mapping",
|
|
582
|
+
"sequence",
|
|
583
|
+
"scalar",
|
|
584
|
+
"anchor",
|
|
585
|
+
"alias",
|
|
586
|
+
"comment",
|
|
587
|
+
"document",
|
|
588
|
+
]
|
|
589
|
+
|
|
590
|
+
def get_queries(self) -> dict[str, str]:
|
|
591
|
+
"""Return YAML-specific tree-sitter queries."""
|
|
592
|
+
from ..queries.yaml import YAML_QUERIES
|
|
593
|
+
|
|
594
|
+
return YAML_QUERIES
|
|
595
|
+
|
|
596
|
+
def execute_query_strategy(
|
|
597
|
+
self, query_key: str | None, language: str
|
|
598
|
+
) -> str | None:
|
|
599
|
+
"""Execute query strategy for YAML."""
|
|
600
|
+
if language != "yaml":
|
|
601
|
+
return None
|
|
602
|
+
|
|
603
|
+
queries = self.get_queries()
|
|
604
|
+
return queries.get(query_key) if query_key else None
|
|
605
|
+
|
|
606
|
+
def get_element_categories(self) -> dict[str, list[str]]:
|
|
607
|
+
"""Return YAML element categories for query execution."""
|
|
608
|
+
return {
|
|
609
|
+
"structure": ["document", "block_mapping", "block_sequence"],
|
|
610
|
+
"mappings": ["block_mapping_pair", "flow_pair"],
|
|
611
|
+
"sequences": ["block_sequence", "flow_sequence"],
|
|
612
|
+
"scalars": [
|
|
613
|
+
"plain_scalar",
|
|
614
|
+
"double_quote_scalar",
|
|
615
|
+
"single_quote_scalar",
|
|
616
|
+
"block_scalar",
|
|
617
|
+
],
|
|
618
|
+
"references": ["anchor", "alias"],
|
|
619
|
+
"metadata": ["comment", "tag"],
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
async def analyze_file(
|
|
623
|
+
self, file_path: str, request: "AnalysisRequest"
|
|
624
|
+
) -> "AnalysisResult":
|
|
625
|
+
"""Analyze YAML file using tree-sitter-yaml parser.
|
|
626
|
+
|
|
627
|
+
Args:
|
|
628
|
+
file_path: Path to the YAML file
|
|
629
|
+
request: Analysis request parameters
|
|
630
|
+
|
|
631
|
+
Returns:
|
|
632
|
+
AnalysisResult with extracted elements
|
|
633
|
+
"""
|
|
634
|
+
from ..encoding_utils import read_file_safe
|
|
635
|
+
|
|
636
|
+
# Check if YAML support is available
|
|
637
|
+
if not YAML_AVAILABLE:
|
|
638
|
+
log_error("tree-sitter-yaml not available")
|
|
639
|
+
return AnalysisResult(
|
|
640
|
+
file_path=file_path,
|
|
641
|
+
language="yaml",
|
|
642
|
+
line_count=0,
|
|
643
|
+
elements=[],
|
|
644
|
+
node_count=0,
|
|
645
|
+
query_results={},
|
|
646
|
+
source_code="",
|
|
647
|
+
success=False,
|
|
648
|
+
error_message="YAML support not available. Install tree-sitter-yaml.",
|
|
649
|
+
)
|
|
650
|
+
|
|
651
|
+
try:
|
|
652
|
+
# Read file content with encoding detection
|
|
653
|
+
content, encoding = read_file_safe(file_path)
|
|
654
|
+
|
|
655
|
+
# Get YAML language
|
|
656
|
+
YAML_LANGUAGE = tree_sitter.Language(ts_yaml.language())
|
|
657
|
+
|
|
658
|
+
# Create parser
|
|
659
|
+
parser = tree_sitter.Parser()
|
|
660
|
+
parser.language = YAML_LANGUAGE
|
|
661
|
+
|
|
662
|
+
# Parse the YAML content
|
|
663
|
+
tree = parser.parse(content.encode("utf-8"))
|
|
664
|
+
|
|
665
|
+
# Extract elements using the extractor
|
|
666
|
+
extractor = self.create_extractor()
|
|
667
|
+
elements = extractor.extract_yaml_elements(tree, content)
|
|
668
|
+
|
|
669
|
+
log_info(f"Extracted {len(elements)} YAML elements from {file_path}")
|
|
670
|
+
|
|
671
|
+
return AnalysisResult(
|
|
672
|
+
file_path=file_path,
|
|
673
|
+
language="yaml",
|
|
674
|
+
line_count=len(content.splitlines()),
|
|
675
|
+
elements=elements,
|
|
676
|
+
node_count=len(elements),
|
|
677
|
+
query_results={},
|
|
678
|
+
source_code=content,
|
|
679
|
+
success=True,
|
|
680
|
+
error_message=None,
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
except Exception as e:
|
|
684
|
+
log_error(f"Failed to analyze YAML file {file_path}: {e}")
|
|
685
|
+
return AnalysisResult(
|
|
686
|
+
file_path=file_path,
|
|
687
|
+
language="yaml",
|
|
688
|
+
line_count=0,
|
|
689
|
+
elements=[],
|
|
690
|
+
node_count=0,
|
|
691
|
+
query_results={},
|
|
692
|
+
source_code="",
|
|
693
|
+
success=False,
|
|
694
|
+
error_message=str(e),
|
|
695
|
+
)
|