tree-sitter-analyzer 1.7.7__py3-none-any.whl → 1.8.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tree-sitter-analyzer might be problematic. Click here for more details.
- tree_sitter_analyzer/__init__.py +1 -1
- tree_sitter_analyzer/api.py +23 -30
- tree_sitter_analyzer/cli/argument_validator.py +77 -0
- tree_sitter_analyzer/cli/commands/table_command.py +7 -2
- tree_sitter_analyzer/cli_main.py +17 -3
- tree_sitter_analyzer/core/cache_service.py +15 -5
- tree_sitter_analyzer/core/query.py +33 -22
- tree_sitter_analyzer/core/query_service.py +179 -154
- tree_sitter_analyzer/formatters/formatter_registry.py +355 -0
- tree_sitter_analyzer/formatters/html_formatter.py +462 -0
- tree_sitter_analyzer/formatters/language_formatter_factory.py +3 -0
- tree_sitter_analyzer/formatters/markdown_formatter.py +1 -1
- tree_sitter_analyzer/language_detector.py +80 -7
- tree_sitter_analyzer/languages/css_plugin.py +390 -0
- tree_sitter_analyzer/languages/html_plugin.py +395 -0
- tree_sitter_analyzer/languages/java_plugin.py +116 -0
- tree_sitter_analyzer/languages/javascript_plugin.py +113 -0
- tree_sitter_analyzer/languages/markdown_plugin.py +266 -46
- tree_sitter_analyzer/languages/python_plugin.py +176 -33
- tree_sitter_analyzer/languages/typescript_plugin.py +130 -1
- tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +12 -1
- tree_sitter_analyzer/mcp/tools/query_tool.py +101 -60
- tree_sitter_analyzer/mcp/tools/search_content_tool.py +12 -1
- tree_sitter_analyzer/mcp/tools/table_format_tool.py +26 -12
- tree_sitter_analyzer/mcp/utils/file_output_factory.py +204 -0
- tree_sitter_analyzer/mcp/utils/file_output_manager.py +52 -2
- tree_sitter_analyzer/models.py +53 -0
- tree_sitter_analyzer/output_manager.py +1 -1
- tree_sitter_analyzer/plugins/base.py +50 -0
- tree_sitter_analyzer/plugins/manager.py +5 -1
- tree_sitter_analyzer/queries/css.py +634 -0
- tree_sitter_analyzer/queries/html.py +556 -0
- tree_sitter_analyzer/queries/markdown.py +54 -164
- tree_sitter_analyzer/query_loader.py +16 -3
- tree_sitter_analyzer/security/validator.py +182 -44
- tree_sitter_analyzer/utils/__init__.py +113 -0
- tree_sitter_analyzer/utils/tree_sitter_compat.py +282 -0
- tree_sitter_analyzer/utils.py +62 -24
- {tree_sitter_analyzer-1.7.7.dist-info → tree_sitter_analyzer-1.8.3.dist-info}/METADATA +135 -31
- {tree_sitter_analyzer-1.7.7.dist-info → tree_sitter_analyzer-1.8.3.dist-info}/RECORD +42 -32
- {tree_sitter_analyzer-1.7.7.dist-info → tree_sitter_analyzer-1.8.3.dist-info}/entry_points.txt +2 -0
- {tree_sitter_analyzer-1.7.7.dist-info → tree_sitter_analyzer-1.8.3.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,462 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
HTML Formatter
|
|
4
|
+
|
|
5
|
+
Specialized formatter for HTML/CSS code elements including MarkupElement and StyleElement.
|
|
6
|
+
Provides HTML-specific formatting with element classification and hierarchy display.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from ..models import CodeElement, MarkupElement, StyleElement
|
|
13
|
+
from .formatter_registry import IFormatter
|
|
14
|
+
from .base_formatter import BaseFormatter
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class HtmlFormatter(BaseFormatter, IFormatter):
|
|
18
|
+
"""HTML-specific formatter for MarkupElement and StyleElement"""
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def get_format_name() -> str:
|
|
22
|
+
return "html"
|
|
23
|
+
|
|
24
|
+
def format(self, elements: list[CodeElement]) -> str:
|
|
25
|
+
"""Format HTML elements with hierarchy and classification"""
|
|
26
|
+
if not elements:
|
|
27
|
+
return "No HTML elements found."
|
|
28
|
+
|
|
29
|
+
lines = []
|
|
30
|
+
lines.append("# HTML Structure Analysis")
|
|
31
|
+
lines.append("")
|
|
32
|
+
|
|
33
|
+
# Handle both CodeElement objects and dictionaries
|
|
34
|
+
markup_elements = []
|
|
35
|
+
style_elements = []
|
|
36
|
+
other_elements = []
|
|
37
|
+
|
|
38
|
+
for e in elements:
|
|
39
|
+
if isinstance(e, MarkupElement):
|
|
40
|
+
markup_elements.append(e)
|
|
41
|
+
elif isinstance(e, StyleElement):
|
|
42
|
+
style_elements.append(e)
|
|
43
|
+
elif isinstance(e, dict):
|
|
44
|
+
# Convert dictionary to appropriate element type based on content
|
|
45
|
+
element_type = e.get('type', e.get('element_type', 'unknown'))
|
|
46
|
+
if 'tag_name' in e or element_type in ['tag', 'element', 'markup']:
|
|
47
|
+
markup_elements.append(self._dict_to_markup_element(e))
|
|
48
|
+
elif 'selector' in e or element_type in ['rule', 'style']:
|
|
49
|
+
style_elements.append(self._dict_to_style_element(e))
|
|
50
|
+
else:
|
|
51
|
+
other_elements.append(e)
|
|
52
|
+
else:
|
|
53
|
+
other_elements.append(e)
|
|
54
|
+
|
|
55
|
+
# Format markup elements
|
|
56
|
+
if markup_elements:
|
|
57
|
+
lines.extend(self._format_markup_elements(markup_elements))
|
|
58
|
+
|
|
59
|
+
# Format style elements
|
|
60
|
+
if style_elements:
|
|
61
|
+
lines.extend(self._format_style_elements(style_elements))
|
|
62
|
+
|
|
63
|
+
# Format other elements
|
|
64
|
+
if other_elements:
|
|
65
|
+
lines.extend(self._format_other_elements(other_elements))
|
|
66
|
+
|
|
67
|
+
return "\n".join(lines)
|
|
68
|
+
|
|
69
|
+
def format_summary(self, analysis_result: dict[str, Any]) -> str:
|
|
70
|
+
"""Format summary output for HTML elements"""
|
|
71
|
+
elements = analysis_result.get("elements", [])
|
|
72
|
+
if not elements:
|
|
73
|
+
return "No HTML elements found."
|
|
74
|
+
|
|
75
|
+
markup_count = sum(1 for e in elements if isinstance(e, MarkupElement))
|
|
76
|
+
style_count = sum(1 for e in elements if isinstance(e, StyleElement))
|
|
77
|
+
other_count = len(elements) - markup_count - style_count
|
|
78
|
+
|
|
79
|
+
lines = []
|
|
80
|
+
lines.append("# HTML Analysis Summary")
|
|
81
|
+
lines.append("")
|
|
82
|
+
lines.append(f"**Total Elements:** {len(elements)}")
|
|
83
|
+
lines.append(f"- Markup Elements: {markup_count}")
|
|
84
|
+
lines.append(f"- Style Elements: {style_count}")
|
|
85
|
+
lines.append(f"- Other Elements: {other_count}")
|
|
86
|
+
|
|
87
|
+
return "\n".join(lines)
|
|
88
|
+
|
|
89
|
+
def format_structure(self, analysis_result: dict[str, Any]) -> str:
|
|
90
|
+
"""Format structure analysis output"""
|
|
91
|
+
elements = analysis_result.get("elements", [])
|
|
92
|
+
return self.format(elements)
|
|
93
|
+
|
|
94
|
+
def format_advanced(self, analysis_result: dict[str, Any], output_format: str = "json") -> str:
|
|
95
|
+
"""Format advanced analysis output"""
|
|
96
|
+
elements = analysis_result.get("elements", [])
|
|
97
|
+
|
|
98
|
+
if output_format == "json":
|
|
99
|
+
formatter = HtmlJsonFormatter()
|
|
100
|
+
return formatter.format(elements)
|
|
101
|
+
else:
|
|
102
|
+
return self.format(elements)
|
|
103
|
+
|
|
104
|
+
def format_table(self, analysis_result: dict[str, Any], table_type: str = "full") -> str:
|
|
105
|
+
"""Format table output"""
|
|
106
|
+
elements = analysis_result.get("elements", [])
|
|
107
|
+
|
|
108
|
+
if table_type == "compact":
|
|
109
|
+
formatter = HtmlCompactFormatter()
|
|
110
|
+
return formatter.format(elements)
|
|
111
|
+
elif table_type == "json":
|
|
112
|
+
formatter = HtmlJsonFormatter()
|
|
113
|
+
return formatter.format(elements)
|
|
114
|
+
else:
|
|
115
|
+
# Default to full format (including "html" and "full")
|
|
116
|
+
return self.format(elements)
|
|
117
|
+
|
|
118
|
+
def _format_markup_elements(self, elements: list[MarkupElement]) -> list[str]:
|
|
119
|
+
"""Format MarkupElement list with hierarchy"""
|
|
120
|
+
lines = []
|
|
121
|
+
lines.append("## HTML Elements")
|
|
122
|
+
lines.append("")
|
|
123
|
+
|
|
124
|
+
# Group by element class
|
|
125
|
+
element_groups = {}
|
|
126
|
+
for element in elements:
|
|
127
|
+
element_class = element.element_class or "unknown"
|
|
128
|
+
if element_class not in element_groups:
|
|
129
|
+
element_groups[element_class] = []
|
|
130
|
+
element_groups[element_class].append(element)
|
|
131
|
+
|
|
132
|
+
# Format each group
|
|
133
|
+
for element_class, group_elements in element_groups.items():
|
|
134
|
+
lines.append(f"### {element_class.title()} Elements ({len(group_elements)})")
|
|
135
|
+
lines.append("")
|
|
136
|
+
lines.append("| Tag | Name | Lines | Attributes | Children |")
|
|
137
|
+
lines.append("|-----|------|-------|------------|----------|")
|
|
138
|
+
|
|
139
|
+
for element in group_elements:
|
|
140
|
+
tag_name = element.tag_name or "unknown"
|
|
141
|
+
name = element.name or tag_name
|
|
142
|
+
lines_str = f"{element.start_line}-{element.end_line}"
|
|
143
|
+
|
|
144
|
+
# Format attributes
|
|
145
|
+
attrs = []
|
|
146
|
+
attributes = element.attributes or {}
|
|
147
|
+
for key, value in attributes.items():
|
|
148
|
+
if value:
|
|
149
|
+
attrs.append(f"{key}=\"{value}\"")
|
|
150
|
+
else:
|
|
151
|
+
attrs.append(key)
|
|
152
|
+
attrs_str = ", ".join(attrs) if attrs else "-"
|
|
153
|
+
if len(attrs_str) > 30:
|
|
154
|
+
attrs_str = attrs_str[:27] + "..."
|
|
155
|
+
|
|
156
|
+
# Count children
|
|
157
|
+
children_count = len(element.children)
|
|
158
|
+
|
|
159
|
+
lines.append(f"| `{tag_name}` | {name} | {lines_str} | {attrs_str} | {children_count} |")
|
|
160
|
+
|
|
161
|
+
lines.append("")
|
|
162
|
+
|
|
163
|
+
# Show hierarchy for root elements
|
|
164
|
+
root_elements = [e for e in elements if e.parent is None]
|
|
165
|
+
if root_elements and len(root_elements) < len(elements):
|
|
166
|
+
lines.append("### Element Hierarchy")
|
|
167
|
+
lines.append("")
|
|
168
|
+
for root in root_elements:
|
|
169
|
+
lines.extend(self._format_element_tree(root, 0))
|
|
170
|
+
lines.append("")
|
|
171
|
+
|
|
172
|
+
return lines
|
|
173
|
+
|
|
174
|
+
def _format_element_tree(self, element: MarkupElement, depth: int) -> list[str]:
|
|
175
|
+
"""Format element tree hierarchy"""
|
|
176
|
+
lines = []
|
|
177
|
+
indent = " " * depth
|
|
178
|
+
tag_name = element.tag_name or "unknown"
|
|
179
|
+
|
|
180
|
+
# Format element info
|
|
181
|
+
attrs_info = ""
|
|
182
|
+
attributes = element.attributes or {}
|
|
183
|
+
if attributes:
|
|
184
|
+
key_attrs = []
|
|
185
|
+
for key, value in attributes.items():
|
|
186
|
+
if key in ["id", "class", "name"]:
|
|
187
|
+
key_attrs.append(f"{key}=\"{value}\"" if value else key)
|
|
188
|
+
if key_attrs:
|
|
189
|
+
attrs_info = f" ({', '.join(key_attrs)})"
|
|
190
|
+
|
|
191
|
+
lines.append(f"{indent}- `{tag_name}`{attrs_info} [{element.start_line}-{element.end_line}]")
|
|
192
|
+
|
|
193
|
+
# Format children
|
|
194
|
+
for child in element.children:
|
|
195
|
+
lines.extend(self._format_element_tree(child, depth + 1))
|
|
196
|
+
|
|
197
|
+
return lines
|
|
198
|
+
|
|
199
|
+
def _format_style_elements(self, elements: list[StyleElement]) -> list[str]:
|
|
200
|
+
"""Format StyleElement list"""
|
|
201
|
+
lines = []
|
|
202
|
+
lines.append("## CSS Rules")
|
|
203
|
+
lines.append("")
|
|
204
|
+
|
|
205
|
+
# Group by element class
|
|
206
|
+
element_groups = {}
|
|
207
|
+
for element in elements:
|
|
208
|
+
element_class = element.element_class or "unknown"
|
|
209
|
+
if element_class not in element_groups:
|
|
210
|
+
element_groups[element_class] = []
|
|
211
|
+
element_groups[element_class].append(element)
|
|
212
|
+
|
|
213
|
+
# Format each group
|
|
214
|
+
for element_class, group_elements in element_groups.items():
|
|
215
|
+
lines.append(f"### {element_class.title()} Rules ({len(group_elements)})")
|
|
216
|
+
lines.append("")
|
|
217
|
+
lines.append("| Selector | Properties | Lines |")
|
|
218
|
+
lines.append("|----------|------------|-------|")
|
|
219
|
+
|
|
220
|
+
for element in group_elements:
|
|
221
|
+
selector = element.selector or element.name
|
|
222
|
+
lines_str = f"{element.start_line}-{element.end_line}"
|
|
223
|
+
|
|
224
|
+
# Format properties
|
|
225
|
+
props = []
|
|
226
|
+
properties = element.properties or {}
|
|
227
|
+
for key, value in properties.items():
|
|
228
|
+
props.append(f"{key}: {value}")
|
|
229
|
+
props_str = "; ".join(props) if props else "-"
|
|
230
|
+
if len(props_str) > 40:
|
|
231
|
+
props_str = props_str[:37] + "..."
|
|
232
|
+
|
|
233
|
+
lines.append(f"| `{selector}` | {props_str} | {lines_str} |")
|
|
234
|
+
|
|
235
|
+
lines.append("")
|
|
236
|
+
|
|
237
|
+
return lines
|
|
238
|
+
|
|
239
|
+
def _format_other_elements(self, elements: list) -> list[str]:
|
|
240
|
+
"""Format other code elements"""
|
|
241
|
+
lines = []
|
|
242
|
+
lines.append("## Other Elements")
|
|
243
|
+
lines.append("")
|
|
244
|
+
lines.append("| Type | Name | Lines | Language |")
|
|
245
|
+
lines.append("|------|------|-------|----------|")
|
|
246
|
+
|
|
247
|
+
for element in elements:
|
|
248
|
+
if isinstance(element, dict):
|
|
249
|
+
element_type = element.get("element_type", element.get("type", "unknown"))
|
|
250
|
+
name = element.get("name", "unknown")
|
|
251
|
+
start_line = element.get("start_line", 0)
|
|
252
|
+
end_line = element.get("end_line", 0)
|
|
253
|
+
language = element.get("language", "unknown")
|
|
254
|
+
else:
|
|
255
|
+
element_type = getattr(element, "element_type", "unknown")
|
|
256
|
+
name = getattr(element, "name", "unknown")
|
|
257
|
+
start_line = getattr(element, "start_line", 0)
|
|
258
|
+
end_line = getattr(element, "end_line", 0)
|
|
259
|
+
language = getattr(element, "language", "unknown")
|
|
260
|
+
|
|
261
|
+
lines_str = f"{start_line}-{end_line}"
|
|
262
|
+
lines.append(f"| {element_type} | {name} | {lines_str} | {language} |")
|
|
263
|
+
|
|
264
|
+
lines.append("")
|
|
265
|
+
return lines
|
|
266
|
+
|
|
267
|
+
def _dict_to_markup_element(self, data: dict):
|
|
268
|
+
"""Convert dictionary to MarkupElement-like object"""
|
|
269
|
+
# Create a mock MarkupElement-like object
|
|
270
|
+
class MockMarkupElement:
|
|
271
|
+
def __init__(self, data):
|
|
272
|
+
self.name = data.get('name', 'unknown')
|
|
273
|
+
self.tag_name = data.get('tag_name', data.get('name', 'unknown'))
|
|
274
|
+
self.element_class = data.get('element_class', 'unknown')
|
|
275
|
+
self.start_line = data.get('start_line', 0)
|
|
276
|
+
self.end_line = data.get('end_line', 0)
|
|
277
|
+
self.attributes = data.get('attributes', {})
|
|
278
|
+
self.children = []
|
|
279
|
+
self.parent = None
|
|
280
|
+
self.language = data.get('language', 'html')
|
|
281
|
+
|
|
282
|
+
return MockMarkupElement(data)
|
|
283
|
+
|
|
284
|
+
def _dict_to_style_element(self, data: dict):
|
|
285
|
+
"""Convert dictionary to StyleElement-like object"""
|
|
286
|
+
# Create a mock StyleElement-like object
|
|
287
|
+
class MockStyleElement:
|
|
288
|
+
def __init__(self, data):
|
|
289
|
+
self.name = data.get('name', 'unknown')
|
|
290
|
+
self.selector = data.get('selector', data.get('name', 'unknown'))
|
|
291
|
+
self.element_class = data.get('element_class', 'unknown')
|
|
292
|
+
self.start_line = data.get('start_line', 0)
|
|
293
|
+
self.end_line = data.get('end_line', 0)
|
|
294
|
+
self.properties = data.get('properties', {})
|
|
295
|
+
self.language = data.get('language', 'css')
|
|
296
|
+
|
|
297
|
+
return MockStyleElement(data)
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
class HtmlJsonFormatter(IFormatter):
|
|
301
|
+
"""JSON formatter specifically for HTML elements"""
|
|
302
|
+
|
|
303
|
+
@staticmethod
|
|
304
|
+
def get_format_name() -> str:
|
|
305
|
+
return "html_json"
|
|
306
|
+
|
|
307
|
+
def format(self, elements: list[CodeElement]) -> str:
|
|
308
|
+
"""Format HTML elements as JSON with hierarchy"""
|
|
309
|
+
result = {
|
|
310
|
+
"html_analysis": {
|
|
311
|
+
"total_elements": len(elements),
|
|
312
|
+
"markup_elements": [],
|
|
313
|
+
"style_elements": [],
|
|
314
|
+
"other_elements": []
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
for element in elements:
|
|
319
|
+
if isinstance(element, MarkupElement):
|
|
320
|
+
result["html_analysis"]["markup_elements"].append(self._markup_to_dict(element))
|
|
321
|
+
elif isinstance(element, StyleElement):
|
|
322
|
+
result["html_analysis"]["style_elements"].append(self._style_to_dict(element))
|
|
323
|
+
elif isinstance(element, dict):
|
|
324
|
+
# Handle dictionary format
|
|
325
|
+
element_type = element.get("element_type", element.get("type", "unknown"))
|
|
326
|
+
if "tag_name" in element or element_type in ['tag', 'element', 'markup']:
|
|
327
|
+
result["html_analysis"]["markup_elements"].append(element)
|
|
328
|
+
elif "selector" in element or element_type in ['rule', 'style']:
|
|
329
|
+
result["html_analysis"]["style_elements"].append(element)
|
|
330
|
+
else:
|
|
331
|
+
result["html_analysis"]["other_elements"].append(element)
|
|
332
|
+
else:
|
|
333
|
+
result["html_analysis"]["other_elements"].append(self._element_to_dict(element))
|
|
334
|
+
|
|
335
|
+
return json.dumps(result, indent=2, ensure_ascii=False)
|
|
336
|
+
|
|
337
|
+
def _markup_to_dict(self, element: MarkupElement) -> dict[str, Any]:
|
|
338
|
+
"""Convert MarkupElement to dictionary"""
|
|
339
|
+
return {
|
|
340
|
+
"name": element.name,
|
|
341
|
+
"tag_name": element.tag_name,
|
|
342
|
+
"element_class": element.element_class,
|
|
343
|
+
"start_line": element.start_line,
|
|
344
|
+
"end_line": element.end_line,
|
|
345
|
+
"attributes": element.attributes,
|
|
346
|
+
"children_count": len(element.children),
|
|
347
|
+
"children": [self._markup_to_dict(child) for child in element.children],
|
|
348
|
+
"language": element.language
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
def _style_to_dict(self, element: StyleElement) -> dict[str, Any]:
|
|
352
|
+
"""Convert StyleElement to dictionary"""
|
|
353
|
+
return {
|
|
354
|
+
"name": element.name,
|
|
355
|
+
"selector": element.selector,
|
|
356
|
+
"element_class": element.element_class,
|
|
357
|
+
"start_line": element.start_line,
|
|
358
|
+
"end_line": element.end_line,
|
|
359
|
+
"properties": element.properties,
|
|
360
|
+
"language": element.language
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
def _element_to_dict(self, element: CodeElement) -> dict[str, Any]:
|
|
364
|
+
"""Convert generic CodeElement to dictionary"""
|
|
365
|
+
return {
|
|
366
|
+
"name": element.name,
|
|
367
|
+
"type": getattr(element, "element_type", "unknown"),
|
|
368
|
+
"start_line": element.start_line,
|
|
369
|
+
"end_line": element.end_line,
|
|
370
|
+
"language": element.language
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
class HtmlCompactFormatter(IFormatter):
|
|
375
|
+
"""Compact formatter for HTML elements"""
|
|
376
|
+
|
|
377
|
+
@staticmethod
|
|
378
|
+
def get_format_name() -> str:
|
|
379
|
+
return "html_compact"
|
|
380
|
+
|
|
381
|
+
def format(self, elements: list[CodeElement]) -> str:
|
|
382
|
+
"""Format HTML elements in compact format"""
|
|
383
|
+
if not elements:
|
|
384
|
+
return "No HTML elements found."
|
|
385
|
+
|
|
386
|
+
lines = []
|
|
387
|
+
lines.append("HTML ELEMENTS")
|
|
388
|
+
lines.append("-" * 20)
|
|
389
|
+
|
|
390
|
+
markup_count = sum(1 for e in elements if isinstance(e, MarkupElement))
|
|
391
|
+
style_count = sum(1 for e in elements if isinstance(e, StyleElement))
|
|
392
|
+
other_count = len(elements) - markup_count - style_count
|
|
393
|
+
|
|
394
|
+
lines.append(f"Total: {len(elements)} elements")
|
|
395
|
+
lines.append(f" Markup: {markup_count}")
|
|
396
|
+
lines.append(f" Style: {style_count}")
|
|
397
|
+
lines.append(f" Other: {other_count}")
|
|
398
|
+
lines.append("")
|
|
399
|
+
|
|
400
|
+
for element in elements:
|
|
401
|
+
if isinstance(element, MarkupElement):
|
|
402
|
+
symbol = "🏷️"
|
|
403
|
+
info = f"<{element.tag_name}>"
|
|
404
|
+
if element.attributes.get("id"):
|
|
405
|
+
info += f" #{element.attributes['id']}"
|
|
406
|
+
if element.attributes.get("class"):
|
|
407
|
+
info += f" .{element.attributes['class']}"
|
|
408
|
+
name = element.name
|
|
409
|
+
start_line = element.start_line
|
|
410
|
+
end_line = element.end_line
|
|
411
|
+
elif isinstance(element, StyleElement):
|
|
412
|
+
symbol = "🎨"
|
|
413
|
+
info = element.selector
|
|
414
|
+
name = element.name
|
|
415
|
+
start_line = element.start_line
|
|
416
|
+
end_line = element.end_line
|
|
417
|
+
elif isinstance(element, dict):
|
|
418
|
+
# Handle dictionary format
|
|
419
|
+
element_type = element.get("element_type", element.get("type", "unknown"))
|
|
420
|
+
name = element.get("name", "unknown")
|
|
421
|
+
start_line = element.get("start_line", 0)
|
|
422
|
+
end_line = element.get("end_line", 0)
|
|
423
|
+
|
|
424
|
+
if "tag_name" in element or element_type in ['tag', 'element', 'markup']:
|
|
425
|
+
symbol = "🏷️"
|
|
426
|
+
tag_name = element.get("tag_name", name)
|
|
427
|
+
info = f"<{tag_name}>"
|
|
428
|
+
attributes = element.get("attributes", {})
|
|
429
|
+
if attributes.get("id"):
|
|
430
|
+
info += f" #{attributes['id']}"
|
|
431
|
+
if attributes.get("class"):
|
|
432
|
+
info += f" .{attributes['class']}"
|
|
433
|
+
elif "selector" in element or element_type in ['rule', 'style']:
|
|
434
|
+
symbol = "🎨"
|
|
435
|
+
info = element.get("selector", name)
|
|
436
|
+
else:
|
|
437
|
+
symbol = "📄"
|
|
438
|
+
info = element_type
|
|
439
|
+
else:
|
|
440
|
+
symbol = "📄"
|
|
441
|
+
info = getattr(element, "element_type", "unknown")
|
|
442
|
+
name = getattr(element, "name", "unknown")
|
|
443
|
+
start_line = getattr(element, "start_line", 0)
|
|
444
|
+
end_line = getattr(element, "end_line", 0)
|
|
445
|
+
|
|
446
|
+
lines.append(f"{symbol} {name} {info} [{start_line}-{end_line}]")
|
|
447
|
+
|
|
448
|
+
return "\n".join(lines)
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
# Register HTML formatters
|
|
452
|
+
def register_html_formatters() -> None:
|
|
453
|
+
"""Register HTML-specific formatters"""
|
|
454
|
+
from .formatter_registry import FormatterRegistry
|
|
455
|
+
|
|
456
|
+
FormatterRegistry.register_formatter(HtmlFormatter)
|
|
457
|
+
FormatterRegistry.register_formatter(HtmlJsonFormatter)
|
|
458
|
+
FormatterRegistry.register_formatter(HtmlCompactFormatter)
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
# Auto-register when module is imported
|
|
462
|
+
register_html_formatters()
|
|
@@ -6,6 +6,7 @@ Factory for creating language-specific formatters for different output types.
|
|
|
6
6
|
from typing import Dict, Type, Any
|
|
7
7
|
from .base_formatter import BaseFormatter
|
|
8
8
|
from .markdown_formatter import MarkdownFormatter
|
|
9
|
+
from .html_formatter import HtmlFormatter
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class LanguageFormatterFactory:
|
|
@@ -14,6 +15,8 @@ class LanguageFormatterFactory:
|
|
|
14
15
|
_formatters: Dict[str, Type[BaseFormatter]] = {
|
|
15
16
|
"markdown": MarkdownFormatter,
|
|
16
17
|
"md": MarkdownFormatter, # Alias
|
|
18
|
+
"html": HtmlFormatter,
|
|
19
|
+
"css": HtmlFormatter, # CSS files also use HTML formatter
|
|
17
20
|
}
|
|
18
21
|
|
|
19
22
|
@classmethod
|
|
@@ -442,7 +442,7 @@ class MarkdownFormatter(BaseFormatter):
|
|
|
442
442
|
"""Format advanced analysis in text format"""
|
|
443
443
|
output = ["--- Advanced Analysis Results ---"]
|
|
444
444
|
|
|
445
|
-
# Basic info
|
|
445
|
+
# Basic info - format with quotes to match expected output
|
|
446
446
|
output.append(f'"File: {data["file_path"]}"')
|
|
447
447
|
output.append(f'"Language: {data["language"]}"')
|
|
448
448
|
output.append(f'"Lines: {data["line_count"]}"')
|
|
@@ -66,6 +66,15 @@ class LanguageDetector:
|
|
|
66
66
|
".mkd": "markdown",
|
|
67
67
|
".mkdn": "markdown",
|
|
68
68
|
".mdx": "markdown",
|
|
69
|
+
# HTML系
|
|
70
|
+
".html": "html",
|
|
71
|
+
".htm": "html",
|
|
72
|
+
".xhtml": "html",
|
|
73
|
+
# CSS系
|
|
74
|
+
".css": "css",
|
|
75
|
+
".scss": "css",
|
|
76
|
+
".sass": "css",
|
|
77
|
+
".less": "css",
|
|
69
78
|
# JSON系
|
|
70
79
|
".json": "json",
|
|
71
80
|
".jsonc": "json",
|
|
@@ -104,6 +113,8 @@ class LanguageDetector:
|
|
|
104
113
|
"rust",
|
|
105
114
|
"go",
|
|
106
115
|
"markdown",
|
|
116
|
+
"html",
|
|
117
|
+
"css",
|
|
107
118
|
"json",
|
|
108
119
|
}
|
|
109
120
|
|
|
@@ -148,6 +159,15 @@ class LanguageDetector:
|
|
|
148
159
|
".mkd": ("markdown", 0.8),
|
|
149
160
|
".mkdn": ("markdown", 0.8),
|
|
150
161
|
".mdx": ("markdown", 0.7), # MDX might be mixed with JSX
|
|
162
|
+
# HTML extensions
|
|
163
|
+
".html": ("html", 0.9),
|
|
164
|
+
".htm": ("html", 0.9),
|
|
165
|
+
".xhtml": ("html", 0.8),
|
|
166
|
+
# CSS extensions
|
|
167
|
+
".css": ("css", 0.9),
|
|
168
|
+
".scss": ("css", 0.8), # Sass/SCSS
|
|
169
|
+
".sass": ("css", 0.8), # Sass
|
|
170
|
+
".less": ("css", 0.8), # Less
|
|
151
171
|
# JSON extensions
|
|
152
172
|
".json": ("json", 0.9),
|
|
153
173
|
".jsonc": ("json", 0.8), # JSON with comments
|
|
@@ -203,6 +223,26 @@ class LanguageDetector:
|
|
|
203
223
|
(r"^\s*\|.*\|", 0.2), # Tables
|
|
204
224
|
(r"^[-=]{3,}$", 0.2), # Setext headers or horizontal rules
|
|
205
225
|
],
|
|
226
|
+
"html": [
|
|
227
|
+
(r"<!DOCTYPE\s+html", 0.4), # HTML5 doctype
|
|
228
|
+
(r"<html[^>]*>", 0.3), # HTML tag
|
|
229
|
+
(r"<head[^>]*>", 0.3), # Head tag
|
|
230
|
+
(r"<body[^>]*>", 0.3), # Body tag
|
|
231
|
+
(r"<div[^>]*>", 0.2), # Div tag
|
|
232
|
+
(r"<p[^>]*>", 0.2), # Paragraph tag
|
|
233
|
+
(r"<a\s+href=", 0.2), # Link tag with href
|
|
234
|
+
(r"<img\s+src=", 0.2), # Image tag with src
|
|
235
|
+
],
|
|
236
|
+
"css": [
|
|
237
|
+
(r"[.#][\w-]+\s*{", 0.4), # CSS selectors
|
|
238
|
+
(r"@media\s+", 0.3), # Media queries
|
|
239
|
+
(r"@import\s+", 0.3), # Import statements
|
|
240
|
+
(r"@keyframes\s+", 0.3), # Keyframes
|
|
241
|
+
(r":\s*[\w-]+\s*;", 0.2), # Property declarations
|
|
242
|
+
(r"color\s*:", 0.2), # Color property
|
|
243
|
+
(r"font-", 0.2), # Font properties
|
|
244
|
+
(r"margin\s*:", 0.2), # Margin property
|
|
245
|
+
],
|
|
206
246
|
}
|
|
207
247
|
|
|
208
248
|
from .utils import log_debug, log_warning
|
|
@@ -221,14 +261,22 @@ class LanguageDetector:
|
|
|
221
261
|
content: ファイルコンテンツ(任意、曖昧性解決用)
|
|
222
262
|
|
|
223
263
|
Returns:
|
|
224
|
-
(言語名, 信頼度) のタプル
|
|
264
|
+
(言語名, 信頼度) のタプル - 常に有効な言語名を返す
|
|
225
265
|
"""
|
|
266
|
+
# Handle invalid input
|
|
267
|
+
if not file_path or not isinstance(file_path, str):
|
|
268
|
+
return "unknown", 0.0
|
|
269
|
+
|
|
226
270
|
path = Path(file_path)
|
|
227
271
|
extension = path.suffix.lower()
|
|
228
272
|
|
|
229
273
|
# Direct mapping by extension
|
|
230
274
|
if extension in self.EXTENSION_MAPPING:
|
|
231
275
|
language = self.EXTENSION_MAPPING[extension]
|
|
276
|
+
|
|
277
|
+
# Ensure language is valid
|
|
278
|
+
if not language or language.strip() == "":
|
|
279
|
+
return "unknown", 0.0
|
|
232
280
|
|
|
233
281
|
# Use confidence from extension_map if available
|
|
234
282
|
if extension in self.extension_map:
|
|
@@ -242,11 +290,14 @@ class LanguageDetector:
|
|
|
242
290
|
# Resolve ambiguity using content
|
|
243
291
|
if content:
|
|
244
292
|
refined_language = self._resolve_ambiguity(extension, content)
|
|
293
|
+
# Ensure refined language is valid
|
|
294
|
+
if not refined_language or refined_language.strip() == "":
|
|
295
|
+
refined_language = "unknown"
|
|
245
296
|
return refined_language, 0.9 if refined_language != language else 0.7
|
|
246
297
|
else:
|
|
247
298
|
return language, 0.7 # Lower confidence without content
|
|
248
299
|
|
|
249
|
-
# Unknown extension
|
|
300
|
+
# Unknown extension - always return "unknown" instead of None
|
|
250
301
|
return "unknown", 0.0
|
|
251
302
|
|
|
252
303
|
def detect_from_extension(self, file_path: str) -> str:
|
|
@@ -257,10 +308,22 @@ class LanguageDetector:
|
|
|
257
308
|
file_path: File path
|
|
258
309
|
|
|
259
310
|
Returns:
|
|
260
|
-
Detected language name
|
|
311
|
+
Detected language name - 常に有効な文字列を返す
|
|
261
312
|
"""
|
|
262
|
-
|
|
263
|
-
|
|
313
|
+
# Handle invalid input
|
|
314
|
+
if not file_path or not isinstance(file_path, str):
|
|
315
|
+
return "unknown"
|
|
316
|
+
|
|
317
|
+
result = self.detect_language(file_path)
|
|
318
|
+
if isinstance(result, tuple):
|
|
319
|
+
language, _ = result
|
|
320
|
+
# Ensure language is valid
|
|
321
|
+
if not language or language.strip() == "":
|
|
322
|
+
return "unknown"
|
|
323
|
+
return language
|
|
324
|
+
else:
|
|
325
|
+
# Fallback for unexpected result format
|
|
326
|
+
return "unknown"
|
|
264
327
|
|
|
265
328
|
def is_supported(self, language: str) -> bool:
|
|
266
329
|
"""
|
|
@@ -419,11 +482,21 @@ def detect_language_from_file(file_path: str) -> str:
|
|
|
419
482
|
file_path: File path
|
|
420
483
|
|
|
421
484
|
Returns:
|
|
422
|
-
Detected language name
|
|
485
|
+
Detected language name - 常に有効な文字列を返す
|
|
423
486
|
"""
|
|
487
|
+
# Handle invalid input
|
|
488
|
+
if not file_path or not isinstance(file_path, str):
|
|
489
|
+
return "unknown"
|
|
490
|
+
|
|
424
491
|
# Create a fresh instance to ensure latest configuration
|
|
425
492
|
fresh_detector = LanguageDetector()
|
|
426
|
-
|
|
493
|
+
result = fresh_detector.detect_from_extension(file_path)
|
|
494
|
+
|
|
495
|
+
# Ensure result is valid
|
|
496
|
+
if not result or result.strip() == "":
|
|
497
|
+
return "unknown"
|
|
498
|
+
|
|
499
|
+
return result
|
|
427
500
|
|
|
428
501
|
|
|
429
502
|
def is_language_supported(language: str) -> bool:
|