tree-sitter-analyzer 1.7.2__py3-none-any.whl → 1.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

@@ -0,0 +1,426 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Markdown Formatter
4
+
5
+ Provides specialized formatting for Markdown files, focusing on document structure
6
+ rather than programming constructs like classes and methods.
7
+ """
8
+
9
+ from typing import Dict, List, Any, Optional
10
+ from .base_formatter import BaseFormatter
11
+
12
+
13
+ class MarkdownFormatter(BaseFormatter):
14
+ """Formatter specialized for Markdown documents"""
15
+
16
+ def __init__(self):
17
+ super().__init__()
18
+ self.language = "markdown"
19
+
20
+ def format_summary(self, analysis_result: Dict[str, Any]) -> str:
21
+ """Format summary for Markdown files"""
22
+ file_path = analysis_result.get("file_path", "")
23
+ elements = analysis_result.get("elements", [])
24
+
25
+ # Count different types of Markdown elements
26
+ headers = [e for e in elements if e.get("type") == "heading"]
27
+ links = [e for e in elements if e.get("type") in ["link", "autolink", "reference_link"]]
28
+ images = [e for e in elements if e.get("type") in ["image", "reference_image"]]
29
+ code_blocks = [e for e in elements if e.get("type") == "code_block"]
30
+ lists = [e for e in elements if e.get("type") in ["list", "task_list"]]
31
+
32
+ summary = {
33
+ "headers": [{"name": h.get("text", "").strip(), "level": h.get("level", 1)} for h in headers],
34
+ "links": [{"text": l.get("text", ""), "url": l.get("url", "")} for l in links],
35
+ "images": [{"alt": i.get("alt", ""), "url": i.get("url", "")} for i in images],
36
+ "code_blocks": [{"language": cb.get("language", ""), "lines": cb.get("line_count", 0)} for cb in code_blocks],
37
+ "lists": [{"type": l.get("list_type", ""), "items": l.get("item_count", 0)} for l in lists]
38
+ }
39
+
40
+ result = {
41
+ "file_path": file_path,
42
+ "language": "markdown",
43
+ "summary": summary
44
+ }
45
+
46
+ return self._format_json_output("Summary Results", result)
47
+
48
+ def format_structure(self, analysis_result: Dict[str, Any]) -> str:
49
+ """Format structure analysis for Markdown files"""
50
+ file_path = analysis_result.get("file_path", "")
51
+ elements = analysis_result.get("elements", [])
52
+ line_count = analysis_result.get("line_count", 0)
53
+
54
+ # Organize elements by type
55
+ headers = [e for e in elements if e.get("type") == "heading"]
56
+ links = [e for e in elements if e.get("type") in ["link", "autolink", "reference_link"]]
57
+ images = [e for e in elements if e.get("type") in ["image", "reference_image"]]
58
+ code_blocks = [e for e in elements if e.get("type") == "code_block"]
59
+ lists = [e for e in elements if e.get("type") in ["list", "task_list"]]
60
+ tables = [e for e in elements if e.get("type") == "table"]
61
+
62
+ structure = {
63
+ "file_path": file_path,
64
+ "language": "markdown",
65
+ "headers": [
66
+ {
67
+ "text": h.get("text", "").strip(),
68
+ "level": h.get("level", 1),
69
+ "line_range": h.get("line_range", {})
70
+ } for h in headers
71
+ ],
72
+ "links": [
73
+ {
74
+ "text": l.get("text", ""),
75
+ "url": l.get("url", ""),
76
+ "line_range": l.get("line_range", {})
77
+ } for l in links
78
+ ],
79
+ "images": [
80
+ {
81
+ "alt": i.get("alt", ""),
82
+ "url": i.get("url", ""),
83
+ "line_range": i.get("line_range", {})
84
+ } for i in images
85
+ ],
86
+ "code_blocks": [
87
+ {
88
+ "language": cb.get("language", ""),
89
+ "line_count": cb.get("line_count", 0),
90
+ "line_range": cb.get("line_range", {})
91
+ } for cb in code_blocks
92
+ ],
93
+ "lists": [
94
+ {
95
+ "type": l.get("list_type", ""),
96
+ "item_count": l.get("item_count", 0),
97
+ "line_range": l.get("line_range", {})
98
+ } for l in lists
99
+ ],
100
+ "tables": [
101
+ {
102
+ "columns": t.get("column_count", 0),
103
+ "rows": t.get("row_count", 0),
104
+ "line_range": t.get("line_range", {})
105
+ } for t in tables
106
+ ],
107
+ "statistics": {
108
+ "header_count": len(headers),
109
+ "link_count": len(links),
110
+ "image_count": len(images),
111
+ "code_block_count": len(code_blocks),
112
+ "list_count": len(lists),
113
+ "table_count": len(tables),
114
+ "total_lines": line_count
115
+ },
116
+ "analysis_metadata": analysis_result.get("analysis_metadata", {})
117
+ }
118
+
119
+ return self._format_json_output("Structure Analysis Results", structure)
120
+
121
+ def format_advanced(self, analysis_result: Dict[str, Any], output_format: str = "json") -> str:
122
+ """Format advanced analysis for Markdown files"""
123
+ file_path = analysis_result.get("file_path", "")
124
+ elements = analysis_result.get("elements", [])
125
+ line_count = analysis_result.get("line_count", 0)
126
+ element_count = len(elements)
127
+
128
+ # Calculate Markdown-specific metrics
129
+ headers = [e for e in elements if e.get("type") == "heading"]
130
+ links = [e for e in elements if e.get("type") in ["link", "autolink", "reference_link"]]
131
+ images = [e for e in elements if e.get("type") in ["image", "reference_image"]]
132
+ code_blocks = [e for e in elements if e.get("type") == "code_block"]
133
+ lists = [e for e in elements if e.get("type") in ["list", "task_list"]]
134
+ tables = [e for e in elements if e.get("type") == "table"]
135
+
136
+ # Calculate document structure metrics
137
+ header_levels = [h.get("level", 1) for h in headers]
138
+ max_header_level = max(header_levels) if header_levels else 0
139
+ avg_header_level = sum(header_levels) / len(header_levels) if header_levels else 0
140
+
141
+ # Calculate content metrics
142
+ total_code_lines = sum(cb.get("line_count", 0) for cb in code_blocks)
143
+ total_list_items = sum(l.get("item_count", 0) for l in lists)
144
+
145
+ # External vs internal links
146
+ external_links = [l for l in links if l.get("url") and l.get("url", "").startswith(("http://", "https://"))]
147
+ internal_links = [l for l in links if not (l.get("url") and l.get("url", "").startswith(("http://", "https://")))]
148
+
149
+ advanced_data = {
150
+ "file_path": file_path,
151
+ "language": "markdown",
152
+ "line_count": line_count,
153
+ "element_count": element_count,
154
+ "success": True,
155
+ "elements": elements,
156
+ "document_metrics": {
157
+ "header_count": len(headers),
158
+ "max_header_level": max_header_level,
159
+ "avg_header_level": round(avg_header_level, 2),
160
+ "link_count": len(links),
161
+ "external_link_count": len(external_links),
162
+ "internal_link_count": len(internal_links),
163
+ "image_count": len(images),
164
+ "code_block_count": len(code_blocks),
165
+ "total_code_lines": total_code_lines,
166
+ "list_count": len(lists),
167
+ "total_list_items": total_list_items,
168
+ "table_count": len(tables)
169
+ },
170
+ "content_analysis": {
171
+ "has_toc": any("table of contents" in h.get("text", "").lower() for h in headers),
172
+ "has_code_examples": len(code_blocks) > 0,
173
+ "has_images": len(images) > 0,
174
+ "has_external_links": len(external_links) > 0,
175
+ "document_complexity": self._calculate_document_complexity(headers, links, code_blocks, tables)
176
+ }
177
+ }
178
+
179
+ if output_format == "text":
180
+ return self._format_advanced_text(advanced_data)
181
+ else:
182
+ return self._format_json_output("Advanced Analysis Results", advanced_data)
183
+
184
+ def format_table(self, analysis_result: Dict[str, Any], table_type: str = "full") -> str:
185
+ """Format table output for Markdown files"""
186
+ file_path = analysis_result.get("file_path", "")
187
+ elements = analysis_result.get("elements", [])
188
+
189
+ # Get document title from first header
190
+ headers = [e for e in elements if e.get("type") == "heading"]
191
+ title = headers[0].get("text", "").strip() if headers else file_path.split("/")[-1]
192
+
193
+ output = [f"# {title}\n"]
194
+
195
+ # Document Overview
196
+ output.append("## Document Overview\n")
197
+ output.append(f"| Property | Value |")
198
+ output.append(f"|----------|-------|")
199
+ output.append(f"| File | {file_path} |")
200
+ output.append(f"| Language | markdown |")
201
+ output.append(f"| Total Lines | {analysis_result.get('line_count', 0)} |")
202
+ output.append(f"| Total Elements | {len(elements)} |")
203
+ output.append("")
204
+
205
+ # Headers Section
206
+ if headers:
207
+ output.append("## Document Structure\n")
208
+ output.append("| Level | Header | Line |")
209
+ output.append("|-------|--------|------|")
210
+ for header in headers:
211
+ level = "#" * header.get("level", 1)
212
+ text = header.get("text", "").strip()
213
+ line = header.get("line_range", {}).get("start", "")
214
+ output.append(f"| {level} | {text} | {line} |")
215
+ output.append("")
216
+
217
+ # Links Section
218
+ links = [e for e in elements if e.get("type") in ["link", "autolink", "reference_link"]]
219
+ if links:
220
+ output.append("## Links\n")
221
+ output.append("| Text | URL | Type | Line |")
222
+ output.append("|------|-----|------|------|")
223
+ for link in links:
224
+ text = link.get("text", "")
225
+ url = link.get("url", "") or ""
226
+ link_type = "External" if url and url.startswith(("http://", "https://")) else "Internal"
227
+ line = link.get("line_range", {}).get("start", "")
228
+ output.append(f"| {text} | {url} | {link_type} | {line} |")
229
+ output.append("")
230
+
231
+ # Images Section
232
+ images = [e for e in elements if e.get("type") in ["image", "reference_image"]]
233
+ if images:
234
+ output.append("## Images\n")
235
+ output.append("| Alt Text | URL | Line |")
236
+ output.append("|----------|-----|------|")
237
+ for image in images:
238
+ alt = image.get("alt", "")
239
+ url = image.get("url", "")
240
+ line = image.get("line_range", {}).get("start", "")
241
+ output.append(f"| {alt} | {url} | {line} |")
242
+ output.append("")
243
+
244
+ # Code Blocks Section
245
+ code_blocks = [e for e in elements if e.get("type") == "code_block"]
246
+ if code_blocks:
247
+ output.append("## Code Blocks\n")
248
+ output.append("| Language | Lines | Line Range |")
249
+ output.append("|----------|-------|------------|")
250
+ for cb in code_blocks:
251
+ language = cb.get("language", "text")
252
+ lines = cb.get("line_count", 0)
253
+ line_range = cb.get("line_range", {})
254
+ start = line_range.get("start", "")
255
+ end = line_range.get("end", "")
256
+ range_str = f"{start}-{end}" if start and end else str(start)
257
+ output.append(f"| {language} | {lines} | {range_str} |")
258
+ output.append("")
259
+
260
+ # Lists Section
261
+ lists = [e for e in elements if e.get("type") in ["list", "task_list"]]
262
+ if lists:
263
+ output.append("## Lists\n")
264
+ output.append("| Type | Items | Line |")
265
+ output.append("|------|-------|------|")
266
+ for lst in lists:
267
+ list_type = lst.get("list_type", "unordered")
268
+ items = lst.get("item_count", 0)
269
+ line = lst.get("line_range", {}).get("start", "")
270
+ output.append(f"| {list_type} | {items} | {line} |")
271
+ output.append("")
272
+
273
+ # Tables Section
274
+ tables = [e for e in elements if e.get("type") == "table"]
275
+ if tables:
276
+ output.append("## Tables\n")
277
+ output.append("| Columns | Rows | Line |")
278
+ output.append("|---------|------|------|")
279
+ for table in tables:
280
+ columns = table.get("column_count", 0)
281
+ rows = table.get("row_count", 0)
282
+ line = table.get("line_range", {}).get("start", "")
283
+ output.append(f"| {columns} | {rows} | {line} |")
284
+ output.append("")
285
+
286
+ # Blockquotes Section
287
+ blockquotes = [e for e in elements if e.get("type") == "blockquote"]
288
+ if blockquotes:
289
+ output.append("## Blockquotes\n")
290
+ output.append("| Content | Line |")
291
+ output.append("|---------|------|")
292
+ for bq in blockquotes:
293
+ content = bq.get("text", "")[:50] + "..." if len(bq.get("text", "")) > 50 else bq.get("text", "")
294
+ line = bq.get("line_range", {}).get("start", "")
295
+ output.append(f"| {content} | {line} |")
296
+ output.append("")
297
+
298
+ # Horizontal Rules Section
299
+ horizontal_rules = [e for e in elements if e.get("type") == "horizontal_rule"]
300
+ if horizontal_rules:
301
+ output.append("## Horizontal Rules\n")
302
+ output.append("| Type | Line |")
303
+ output.append("|------|------|")
304
+ for hr in horizontal_rules:
305
+ line = hr.get("line_range", {}).get("start", "")
306
+ output.append(f"| Horizontal Rule | {line} |")
307
+ output.append("")
308
+
309
+ # HTML Elements Section
310
+ html_elements = [e for e in elements if e.get("type") in ["html_block", "html_inline"]]
311
+ if html_elements:
312
+ output.append("## HTML Elements\n")
313
+ output.append("| Type | Content | Line |")
314
+ output.append("|------|---------|------|")
315
+ for html in html_elements:
316
+ element_type = html.get("type", "")
317
+ content = html.get("name", "")[:30] + "..." if len(html.get("name", "")) > 30 else html.get("name", "")
318
+ line = html.get("line_range", {}).get("start", "")
319
+ output.append(f"| {element_type} | {content} | {line} |")
320
+ output.append("")
321
+
322
+ # Text Formatting Section
323
+ formatting_elements = [e for e in elements if e.get("type") in ["strong_emphasis", "emphasis", "inline_code", "strikethrough"]]
324
+ if formatting_elements:
325
+ output.append("## Text Formatting\n")
326
+ output.append("| Type | Content | Line |")
327
+ output.append("|------|---------|------|")
328
+ for fmt in formatting_elements:
329
+ format_type = fmt.get("type", "")
330
+ content = fmt.get("text", "")[:30] + "..." if len(fmt.get("text", "")) > 30 else fmt.get("text", "")
331
+ line = fmt.get("line_range", {}).get("start", "")
332
+ output.append(f"| {format_type} | {content} | {line} |")
333
+ output.append("")
334
+
335
+ # Footnotes Section
336
+ footnotes = [e for e in elements if e.get("type") in ["footnote_reference", "footnote_definition"]]
337
+ if footnotes:
338
+ output.append("## Footnotes\n")
339
+ output.append("| Type | Content | Line |")
340
+ output.append("|------|---------|------|")
341
+ for fn in footnotes:
342
+ footnote_type = fn.get("type", "")
343
+ content = fn.get("text", "")[:30] + "..." if len(fn.get("text", "")) > 30 else fn.get("text", "")
344
+ line = fn.get("line_range", {}).get("start", "")
345
+ output.append(f"| {footnote_type} | {content} | {line} |")
346
+ output.append("")
347
+
348
+ # Reference Definitions Section
349
+ references = [e for e in elements if e.get("type") == "reference_definition"]
350
+ if references:
351
+ output.append("## Reference Definitions\n")
352
+ output.append("| Content | Line |")
353
+ output.append("|---------|------|")
354
+ for ref in references:
355
+ content = ref.get("name", "")[:50] + "..." if len(ref.get("name", "")) > 50 else ref.get("name", "")
356
+ line = ref.get("line_range", {}).get("start", "")
357
+ output.append(f"| {content} | {line} |")
358
+ output.append("")
359
+
360
+ return "\n".join(output)
361
+
362
+ def _format_advanced_text(self, data: Dict[str, Any]) -> str:
363
+ """Format advanced analysis in text format"""
364
+ output = ["--- Advanced Analysis Results ---"]
365
+
366
+ # Basic info
367
+ output.append(f'"File: {data["file_path"]}"')
368
+ output.append(f'"Language: {data["language"]}"')
369
+ output.append(f'"Lines: {data["line_count"]}"')
370
+ output.append(f'"Elements: {data["element_count"]}"')
371
+
372
+ # Document metrics
373
+ metrics = data["document_metrics"]
374
+ output.append(f'"Headers: {metrics["header_count"]}"')
375
+ output.append(f'"Max Header Level: {metrics["max_header_level"]}"')
376
+ output.append(f'"Links: {metrics["link_count"]}"')
377
+ output.append(f'"External Links: {metrics["external_link_count"]}"')
378
+ output.append(f'"Images: {metrics["image_count"]}"')
379
+ output.append(f'"Code Blocks: {metrics["code_block_count"]}"')
380
+ output.append(f'"Code Lines: {metrics["total_code_lines"]}"')
381
+ output.append(f'"Lists: {metrics["list_count"]}"')
382
+ output.append(f'"Tables: {metrics["table_count"]}"')
383
+
384
+ # Content analysis
385
+ content = data["content_analysis"]
386
+ output.append(f'"Has TOC: {content["has_toc"]}"')
387
+ output.append(f'"Has Code: {content["has_code_examples"]}"')
388
+ output.append(f'"Has Images: {content["has_images"]}"')
389
+ output.append(f'"Has External Links: {content["has_external_links"]}"')
390
+ output.append(f'"Document Complexity: {content["document_complexity"]}"')
391
+
392
+ return "\n".join(output)
393
+
394
+ def _calculate_document_complexity(self, headers: List[Dict], links: List[Dict],
395
+ code_blocks: List[Dict], tables: List[Dict]) -> str:
396
+ """Calculate document complexity based on structure and content"""
397
+ score = 0
398
+
399
+ # Header complexity
400
+ if headers:
401
+ header_levels = [h.get("level", 1) for h in headers]
402
+ max_level = max(header_levels)
403
+ score += len(headers) * 2 # Base score for headers
404
+ score += max_level * 3 # Deeper nesting increases complexity
405
+
406
+ # Content complexity
407
+ score += len(links) * 1 # Links add moderate complexity
408
+ score += len(code_blocks) * 5 # Code blocks add significant complexity
409
+ score += len(tables) * 3 # Tables add moderate complexity
410
+
411
+ # Classify complexity
412
+ if score < 20:
413
+ return "Simple"
414
+ elif score < 50:
415
+ return "Moderate"
416
+ elif score < 100:
417
+ return "Complex"
418
+ else:
419
+ return "Very Complex"
420
+
421
+ def _format_json_output(self, title: str, data: Dict[str, Any]) -> str:
422
+ """Format JSON output with title"""
423
+ import json
424
+ output = [f"--- {title} ---"]
425
+ output.append(json.dumps(data, indent=2, ensure_ascii=False))
426
+ return "\n".join(output)
@@ -59,6 +59,13 @@ class LanguageDetector:
59
59
  ".m": "objc", # Ambiguous (MATLAB as well)
60
60
  ".dart": "dart",
61
61
  ".elm": "elm",
62
+ # Markdown系
63
+ ".md": "markdown",
64
+ ".markdown": "markdown",
65
+ ".mdown": "markdown",
66
+ ".mkd": "markdown",
67
+ ".mkdn": "markdown",
68
+ ".mdx": "markdown",
62
69
  }
63
70
 
64
71
  # Ambiguous extensions (map to multiple languages)
@@ -92,6 +99,7 @@ class LanguageDetector:
92
99
  "cpp",
93
100
  "rust",
94
101
  "go",
102
+ "markdown",
95
103
  }
96
104
 
97
105
  def __init__(self) -> None:
@@ -128,6 +136,13 @@ class LanguageDetector:
128
136
  ".r": ("r", 0.9),
129
137
  ".m": ("objectivec", 0.7),
130
138
  ".mm": ("objectivec", 0.8),
139
+ # Markdown extensions
140
+ ".md": ("markdown", 0.9),
141
+ ".markdown": ("markdown", 0.9),
142
+ ".mdown": ("markdown", 0.8),
143
+ ".mkd": ("markdown", 0.8),
144
+ ".mkdn": ("markdown", 0.8),
145
+ ".mdx": ("markdown", 0.7), # MDX might be mixed with JSX
131
146
  }
132
147
 
133
148
  # Content-based detection patterns
@@ -169,6 +184,16 @@ class LanguageDetector:
169
184
  (r"std::\w+", 0.2),
170
185
  (r"class\s+\w+\s*{", 0.3),
171
186
  ],
187
+ "markdown": [
188
+ (r"^#{1,6}\s+", 0.4), # ATX headers
189
+ (r"^\s*[-*+]\s+", 0.3), # List items
190
+ (r"```[\w]*", 0.3), # Fenced code blocks
191
+ (r"\[.*\]\(.*\)", 0.2), # Links
192
+ (r"!\[.*\]\(.*\)", 0.2), # Images
193
+ (r"^\s*>\s+", 0.2), # Blockquotes
194
+ (r"^\s*\|.*\|", 0.2), # Tables
195
+ (r"^[-=]{3,}$", 0.2), # Setext headers or horizontal rules
196
+ ],
172
197
  }
173
198
 
174
199
  from .utils import log_debug, log_warning
@@ -196,6 +221,11 @@ class LanguageDetector:
196
221
  if extension in self.EXTENSION_MAPPING:
197
222
  language = self.EXTENSION_MAPPING[extension]
198
223
 
224
+ # Use confidence from extension_map if available
225
+ if extension in self.extension_map:
226
+ _, confidence = self.extension_map[extension]
227
+ return language, confidence
228
+
199
229
  # No ambiguity -> high confidence
200
230
  if extension not in self.AMBIGUOUS_EXTENSIONS:
201
231
  return language, 1.0
@@ -36,6 +36,7 @@ class LanguageLoader:
36
36
  "cpp": "tree_sitter_cpp",
37
37
  "rust": "tree_sitter_rust",
38
38
  "go": "tree_sitter_go",
39
+ "markdown": "tree_sitter_markdown",
39
40
  }
40
41
 
41
42
  # TypeScript特別処理(TypeScriptとTSX)