tree-sitter-analyzer 1.8.4__py3-none-any.whl → 1.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (64) hide show
  1. tree_sitter_analyzer/__init__.py +1 -1
  2. tree_sitter_analyzer/api.py +4 -4
  3. tree_sitter_analyzer/cli/argument_validator.py +29 -17
  4. tree_sitter_analyzer/cli/commands/advanced_command.py +7 -5
  5. tree_sitter_analyzer/cli/commands/structure_command.py +7 -5
  6. tree_sitter_analyzer/cli/commands/summary_command.py +10 -6
  7. tree_sitter_analyzer/cli/commands/table_command.py +8 -7
  8. tree_sitter_analyzer/cli/info_commands.py +1 -1
  9. tree_sitter_analyzer/cli_main.py +3 -2
  10. tree_sitter_analyzer/core/analysis_engine.py +5 -5
  11. tree_sitter_analyzer/core/cache_service.py +3 -1
  12. tree_sitter_analyzer/core/query.py +17 -5
  13. tree_sitter_analyzer/core/query_service.py +1 -1
  14. tree_sitter_analyzer/encoding_utils.py +3 -3
  15. tree_sitter_analyzer/exceptions.py +61 -50
  16. tree_sitter_analyzer/file_handler.py +3 -0
  17. tree_sitter_analyzer/formatters/base_formatter.py +10 -5
  18. tree_sitter_analyzer/formatters/formatter_registry.py +83 -68
  19. tree_sitter_analyzer/formatters/html_formatter.py +90 -64
  20. tree_sitter_analyzer/formatters/javascript_formatter.py +21 -16
  21. tree_sitter_analyzer/formatters/language_formatter_factory.py +7 -6
  22. tree_sitter_analyzer/formatters/markdown_formatter.py +247 -124
  23. tree_sitter_analyzer/formatters/python_formatter.py +61 -38
  24. tree_sitter_analyzer/formatters/typescript_formatter.py +113 -45
  25. tree_sitter_analyzer/interfaces/mcp_server.py +2 -2
  26. tree_sitter_analyzer/language_detector.py +6 -6
  27. tree_sitter_analyzer/language_loader.py +3 -1
  28. tree_sitter_analyzer/languages/css_plugin.py +120 -61
  29. tree_sitter_analyzer/languages/html_plugin.py +159 -62
  30. tree_sitter_analyzer/languages/java_plugin.py +42 -34
  31. tree_sitter_analyzer/languages/javascript_plugin.py +59 -30
  32. tree_sitter_analyzer/languages/markdown_plugin.py +402 -368
  33. tree_sitter_analyzer/languages/python_plugin.py +111 -64
  34. tree_sitter_analyzer/languages/typescript_plugin.py +241 -132
  35. tree_sitter_analyzer/mcp/server.py +22 -18
  36. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +13 -8
  37. tree_sitter_analyzer/mcp/tools/base_tool.py +2 -2
  38. tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +232 -26
  39. tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +31 -23
  40. tree_sitter_analyzer/mcp/tools/list_files_tool.py +21 -19
  41. tree_sitter_analyzer/mcp/tools/query_tool.py +17 -18
  42. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +30 -31
  43. tree_sitter_analyzer/mcp/tools/search_content_tool.py +131 -77
  44. tree_sitter_analyzer/mcp/tools/table_format_tool.py +29 -16
  45. tree_sitter_analyzer/mcp/utils/file_output_factory.py +64 -51
  46. tree_sitter_analyzer/mcp/utils/file_output_manager.py +34 -24
  47. tree_sitter_analyzer/mcp/utils/gitignore_detector.py +8 -4
  48. tree_sitter_analyzer/models.py +7 -5
  49. tree_sitter_analyzer/plugins/base.py +9 -7
  50. tree_sitter_analyzer/plugins/manager.py +1 -0
  51. tree_sitter_analyzer/queries/css.py +2 -21
  52. tree_sitter_analyzer/queries/html.py +2 -15
  53. tree_sitter_analyzer/queries/markdown.py +30 -41
  54. tree_sitter_analyzer/queries/python.py +20 -5
  55. tree_sitter_analyzer/query_loader.py +5 -5
  56. tree_sitter_analyzer/security/validator.py +114 -86
  57. tree_sitter_analyzer/utils/__init__.py +58 -28
  58. tree_sitter_analyzer/utils/tree_sitter_compat.py +72 -65
  59. tree_sitter_analyzer/utils.py +26 -15
  60. {tree_sitter_analyzer-1.8.4.dist-info → tree_sitter_analyzer-1.9.1.dist-info}/METADATA +23 -6
  61. tree_sitter_analyzer-1.9.1.dist-info/RECORD +109 -0
  62. tree_sitter_analyzer-1.8.4.dist-info/RECORD +0 -109
  63. {tree_sitter_analyzer-1.8.4.dist-info → tree_sitter_analyzer-1.9.1.dist-info}/WHEEL +0 -0
  64. {tree_sitter_analyzer-1.8.4.dist-info → tree_sitter_analyzer-1.9.1.dist-info}/entry_points.txt +0 -0
@@ -6,7 +6,8 @@ Provides specialized formatting for Markdown files, focusing on document structu
6
6
  rather than programming constructs like classes and methods.
7
7
  """
8
8
 
9
- from typing import Dict, List, Any, Optional
9
+ from typing import Any
10
+
10
11
  from .base_formatter import BaseFormatter
11
12
 
12
13
 
@@ -17,18 +18,22 @@ class MarkdownFormatter(BaseFormatter):
17
18
  super().__init__()
18
19
  self.language = "markdown"
19
20
 
20
- def format_summary(self, analysis_result: Dict[str, Any]) -> str:
21
+ def format_summary(self, analysis_result: dict[str, Any]) -> str:
21
22
  """Format summary for Markdown files"""
22
23
  file_path = analysis_result.get("file_path", "")
23
24
  elements = analysis_result.get("elements", [])
24
-
25
+
25
26
  # Count different types of Markdown elements
26
27
  headers = [e for e in elements if e.get("type") == "heading"]
27
- links = [e for e in elements if e.get("type") in ["link", "autolink", "reference_link"]]
28
+ links = [
29
+ e
30
+ for e in elements
31
+ if e.get("type") in ["link", "autolink", "reference_link"]
32
+ ]
28
33
  images = self._collect_images(elements)
29
34
  code_blocks = [e for e in elements if e.get("type") == "code_block"]
30
35
  lists = [e for e in elements if e.get("type") in ["list", "task_list"]]
31
-
36
+
32
37
  # Robust adjust for link/image counts to match other commands
33
38
  robust_counts = self._compute_robust_counts_from_file(file_path)
34
39
  if len(links) < robust_counts.get("link_count", len(links)):
@@ -37,7 +42,9 @@ class MarkdownFormatter(BaseFormatter):
37
42
  missing = robust_counts.get("link_count", 0) - len(links)
38
43
  if missing > 0:
39
44
  # Add placeholder autolink entries to align with expected count
40
- links = links + [{"text": "autolink", "url": "autolink"} for _ in range(missing)]
45
+ links = links + [
46
+ {"text": "autolink", "url": "autolink"} for _ in range(missing)
47
+ ]
41
48
 
42
49
  # Some environments under-detect reference images in elements; align summary with
43
50
  # robust image count used elsewhere (structure/advanced) by adding placeholders
@@ -48,35 +55,49 @@ class MarkdownFormatter(BaseFormatter):
48
55
  images = images + ([{"alt": "", "url": ""}] * missing)
49
56
 
50
57
  summary = {
51
- "headers": [{"name": h.get("text", "").strip(), "level": h.get("level", 1)} for h in headers],
52
- "links": [{"text": l.get("text", ""), "url": l.get("url", "")} for l in links],
53
- "images": [{"alt": i.get("alt", ""), "url": i.get("url", "")} for i in images],
54
- "code_blocks": [{"language": cb.get("language", ""), "lines": cb.get("line_count", 0)} for cb in code_blocks],
55
- "lists": [{"type": l.get("list_type", ""), "items": l.get("item_count", 0)} for l in lists]
56
- }
57
-
58
- result = {
59
- "file_path": file_path,
60
- "language": "markdown",
61
- "summary": summary
58
+ "headers": [
59
+ {"name": h.get("text", "").strip(), "level": h.get("level", 1)}
60
+ for h in headers
61
+ ],
62
+ "links": [
63
+ {"text": link.get("text", ""), "url": link.get("url", "")}
64
+ for link in links
65
+ ],
66
+ "images": [
67
+ {"alt": i.get("alt", ""), "url": i.get("url", "")} for i in images
68
+ ],
69
+ "code_blocks": [
70
+ {"language": cb.get("language", ""), "lines": cb.get("line_count", 0)}
71
+ for cb in code_blocks
72
+ ],
73
+ "lists": [
74
+ {"type": lst.get("list_type", ""), "items": lst.get("item_count", 0)}
75
+ for lst in lists
76
+ ],
62
77
  }
63
-
78
+
79
+ result = {"file_path": file_path, "language": "markdown", "summary": summary}
80
+
64
81
  return self._format_json_output("Summary Results", result)
65
82
 
66
- def format_structure(self, analysis_result: Dict[str, Any]) -> str:
83
+ def format_structure(self, analysis_result: dict[str, Any]) -> str:
67
84
  """Format structure analysis for Markdown files"""
68
85
  file_path = analysis_result.get("file_path", "")
69
86
  elements = analysis_result.get("elements", [])
70
87
  line_count = analysis_result.get("line_count", 0)
71
-
88
+
72
89
  # Organize elements by type
73
90
  headers = [e for e in elements if e.get("type") == "heading"]
74
- links = [e for e in elements if e.get("type") in ["link", "autolink", "reference_link"]]
91
+ links = [
92
+ e
93
+ for e in elements
94
+ if e.get("type") in ["link", "autolink", "reference_link"]
95
+ ]
75
96
  images = self._collect_images(elements)
76
97
  code_blocks = [e for e in elements if e.get("type") == "code_block"]
77
98
  lists = [e for e in elements if e.get("type") in ["list", "task_list"]]
78
99
  tables = [e for e in elements if e.get("type") == "table"]
79
-
100
+
80
101
  # Robust counts to avoid undercount due to parser variance
81
102
  robust_counts = self._compute_robust_counts_from_file(file_path)
82
103
 
@@ -91,43 +112,49 @@ class MarkdownFormatter(BaseFormatter):
91
112
  {
92
113
  "text": h.get("text", "").strip(),
93
114
  "level": h.get("level", 1),
94
- "line_range": h.get("line_range", {})
95
- } for h in headers
115
+ "line_range": h.get("line_range", {}),
116
+ }
117
+ for h in headers
96
118
  ],
97
119
  "links": [
98
120
  {
99
- "text": l.get("text", ""),
100
- "url": l.get("url", ""),
101
- "line_range": l.get("line_range", {})
102
- } for l in links
121
+ "text": link.get("text", ""),
122
+ "url": link.get("url", ""),
123
+ "line_range": link.get("line_range", {}),
124
+ }
125
+ for link in links
103
126
  ],
104
127
  "images": [
105
128
  {
106
129
  "alt": i.get("alt", ""),
107
130
  "url": i.get("url", ""),
108
- "line_range": i.get("line_range", {})
109
- } for i in images
131
+ "line_range": i.get("line_range", {}),
132
+ }
133
+ for i in images
110
134
  ],
111
135
  "code_blocks": [
112
136
  {
113
137
  "language": cb.get("language", ""),
114
138
  "line_count": cb.get("line_count", 0),
115
- "line_range": cb.get("line_range", {})
116
- } for cb in code_blocks
139
+ "line_range": cb.get("line_range", {}),
140
+ }
141
+ for cb in code_blocks
117
142
  ],
118
143
  "lists": [
119
144
  {
120
- "type": l.get("list_type", ""),
121
- "item_count": l.get("item_count", 0),
122
- "line_range": l.get("line_range", {})
123
- } for l in lists
145
+ "type": lst.get("list_type", ""),
146
+ "item_count": lst.get("item_count", 0),
147
+ "line_range": lst.get("line_range", {}),
148
+ }
149
+ for lst in lists
124
150
  ],
125
151
  "tables": [
126
152
  {
127
153
  "columns": t.get("column_count", 0),
128
154
  "rows": t.get("row_count", 0),
129
- "line_range": t.get("line_range", {})
130
- } for t in tables
155
+ "line_range": t.get("line_range", {}),
156
+ }
157
+ for t in tables
131
158
  ],
132
159
  "statistics": {
133
160
  "header_count": len(headers),
@@ -137,41 +164,61 @@ class MarkdownFormatter(BaseFormatter):
137
164
  "code_block_count": len(code_blocks),
138
165
  "list_count": len(lists),
139
166
  "table_count": len(tables),
140
- "total_lines": line_count
167
+ "total_lines": line_count,
141
168
  },
142
- "analysis_metadata": analysis_result.get("analysis_metadata", {})
169
+ "analysis_metadata": analysis_result.get("analysis_metadata", {}),
143
170
  }
144
-
171
+
145
172
  return self._format_json_output("Structure Analysis Results", structure)
146
173
 
147
- def format_advanced(self, analysis_result: Dict[str, Any], output_format: str = "json") -> str:
174
+ def format_advanced(
175
+ self, analysis_result: dict[str, Any], output_format: str = "json"
176
+ ) -> str:
148
177
  """Format advanced analysis for Markdown files"""
149
178
  file_path = analysis_result.get("file_path", "")
150
179
  elements = analysis_result.get("elements", [])
151
180
  line_count = analysis_result.get("line_count", 0)
152
181
  element_count = len(elements)
153
-
182
+
154
183
  # Calculate Markdown-specific metrics
155
184
  headers = [e for e in elements if e.get("type") == "heading"]
156
- links = [e for e in elements if e.get("type") in ["link", "autolink", "reference_link"]]
185
+ links = [
186
+ e
187
+ for e in elements
188
+ if e.get("type") in ["link", "autolink", "reference_link"]
189
+ ]
157
190
  images = self._collect_images(elements)
158
191
  code_blocks = [e for e in elements if e.get("type") == "code_block"]
159
192
  lists = [e for e in elements if e.get("type") in ["list", "task_list"]]
160
193
  tables = [e for e in elements if e.get("type") == "table"]
161
-
194
+
162
195
  # Calculate document structure metrics
163
196
  header_levels = [h.get("level", 1) for h in headers]
164
197
  max_header_level = max(header_levels) if header_levels else 0
165
- avg_header_level = sum(header_levels) / len(header_levels) if header_levels else 0
166
-
198
+ avg_header_level = (
199
+ sum(header_levels) / len(header_levels) if header_levels else 0
200
+ )
201
+
167
202
  # Calculate content metrics
168
203
  total_code_lines = sum(cb.get("line_count", 0) for cb in code_blocks)
169
- total_list_items = sum(l.get("item_count", 0) for l in lists)
170
-
204
+ total_list_items = sum(lst.get("item_count", 0) for lst in lists)
205
+
171
206
  # External vs internal links
172
- external_links = [l for l in links if l.get("url") and l.get("url", "").startswith(("http://", "https://"))]
173
- internal_links = [l for l in links if not (l.get("url") and l.get("url", "").startswith(("http://", "https://")))]
174
-
207
+ external_links = [
208
+ link
209
+ for link in links
210
+ if link.get("url")
211
+ and link.get("url", "").startswith(("http://", "https://"))
212
+ ]
213
+ internal_links = [
214
+ link
215
+ for link in links
216
+ if not (
217
+ link.get("url")
218
+ and link.get("url", "").startswith(("http://", "https://"))
219
+ )
220
+ ]
221
+
175
222
  # Robust counts to avoid undercount due to parser variance
176
223
  robust_counts = self._compute_robust_counts_from_file(file_path)
177
224
 
@@ -199,43 +246,51 @@ class MarkdownFormatter(BaseFormatter):
199
246
  "total_code_lines": total_code_lines,
200
247
  "list_count": len(lists),
201
248
  "total_list_items": total_list_items,
202
- "table_count": len(tables)
249
+ "table_count": len(tables),
203
250
  },
204
251
  "content_analysis": {
205
- "has_toc": any("table of contents" in h.get("text", "").lower() for h in headers),
252
+ "has_toc": any(
253
+ "table of contents" in h.get("text", "").lower() for h in headers
254
+ ),
206
255
  "has_code_examples": len(code_blocks) > 0,
207
256
  "has_images": len(images) > 0,
208
257
  "has_external_links": len(external_links) > 0,
209
- "document_complexity": self._calculate_document_complexity(headers, links, code_blocks, tables)
210
- }
258
+ "document_complexity": self._calculate_document_complexity(
259
+ headers, links, code_blocks, tables
260
+ ),
261
+ },
211
262
  }
212
-
263
+
213
264
  if output_format == "text":
214
265
  return self._format_advanced_text(advanced_data)
215
266
  else:
216
267
  return self._format_json_output("Advanced Analysis Results", advanced_data)
217
268
 
218
- def format_table(self, analysis_result: Dict[str, Any], table_type: str = "full") -> str:
269
+ def format_table(
270
+ self, analysis_result: dict[str, Any], table_type: str = "full"
271
+ ) -> str:
219
272
  """Format table output for Markdown files"""
220
273
  file_path = analysis_result.get("file_path", "")
221
274
  elements = analysis_result.get("elements", [])
222
-
275
+
223
276
  # Get document title from first header
224
277
  headers = [e for e in elements if e.get("type") == "heading"]
225
- title = headers[0].get("text", "").strip() if headers else file_path.split("/")[-1]
226
-
278
+ title = (
279
+ headers[0].get("text", "").strip() if headers else file_path.split("/")[-1]
280
+ )
281
+
227
282
  output = [f"# {title}\n"]
228
-
283
+
229
284
  # Document Overview
230
285
  output.append("## Document Overview\n")
231
- output.append(f"| Property | Value |")
232
- output.append(f"|----------|-------|")
286
+ output.append("| Property | Value |")
287
+ output.append("|----------|-------|")
233
288
  output.append(f"| File | {file_path} |")
234
- output.append(f"| Language | markdown |")
289
+ output.append("| Language | markdown |")
235
290
  output.append(f"| Total Lines | {analysis_result.get('line_count', 0)} |")
236
291
  output.append(f"| Total Elements | {len(elements)} |")
237
292
  output.append("")
238
-
293
+
239
294
  # Headers Section
240
295
  if headers:
241
296
  output.append("## Document Structure\n")
@@ -247,9 +302,13 @@ class MarkdownFormatter(BaseFormatter):
247
302
  line = header.get("line_range", {}).get("start", "")
248
303
  output.append(f"| {level} | {text} | {line} |")
249
304
  output.append("")
250
-
305
+
251
306
  # Links Section
252
- links = [e for e in elements if e.get("type") in ["link", "autolink", "reference_link"]]
307
+ links = [
308
+ e
309
+ for e in elements
310
+ if e.get("type") in ["link", "autolink", "reference_link"]
311
+ ]
253
312
  if links:
254
313
  output.append("## Links\n")
255
314
  output.append("| Text | URL | Type | Line |")
@@ -257,11 +316,15 @@ class MarkdownFormatter(BaseFormatter):
257
316
  for link in links:
258
317
  text = link.get("text", "")
259
318
  url = link.get("url", "") or ""
260
- link_type = "External" if url and url.startswith(("http://", "https://")) else "Internal"
319
+ link_type = (
320
+ "External"
321
+ if url and url.startswith(("http://", "https://"))
322
+ else "Internal"
323
+ )
261
324
  line = link.get("line_range", {}).get("start", "")
262
325
  output.append(f"| {text} | {url} | {link_type} | {line} |")
263
326
  output.append("")
264
-
327
+
265
328
  # Images Section
266
329
  images = self._collect_images(elements)
267
330
  if images:
@@ -274,7 +337,7 @@ class MarkdownFormatter(BaseFormatter):
274
337
  line = image.get("line_range", {}).get("start", "")
275
338
  output.append(f"| {alt} | {url} | {line} |")
276
339
  output.append("")
277
-
340
+
278
341
  # Code Blocks Section
279
342
  code_blocks = [e for e in elements if e.get("type") == "code_block"]
280
343
  if code_blocks:
@@ -290,7 +353,7 @@ class MarkdownFormatter(BaseFormatter):
290
353
  range_str = f"{start}-{end}" if start and end else str(start)
291
354
  output.append(f"| {language} | {lines} | {range_str} |")
292
355
  output.append("")
293
-
356
+
294
357
  # Lists Section
295
358
  lists = [e for e in elements if e.get("type") in ["list", "task_list"]]
296
359
  if lists:
@@ -303,7 +366,7 @@ class MarkdownFormatter(BaseFormatter):
303
366
  line = lst.get("line_range", {}).get("start", "")
304
367
  output.append(f"| {list_type} | {items} | {line} |")
305
368
  output.append("")
306
-
369
+
307
370
  # Tables Section
308
371
  tables = [e for e in elements if e.get("type") == "table"]
309
372
  if tables:
@@ -316,7 +379,7 @@ class MarkdownFormatter(BaseFormatter):
316
379
  line = table.get("line_range", {}).get("start", "")
317
380
  output.append(f"| {columns} | {rows} | {line} |")
318
381
  output.append("")
319
-
382
+
320
383
  # Blockquotes Section
321
384
  blockquotes = [e for e in elements if e.get("type") == "blockquote"]
322
385
  if blockquotes:
@@ -324,11 +387,15 @@ class MarkdownFormatter(BaseFormatter):
324
387
  output.append("| Content | Line |")
325
388
  output.append("|---------|------|")
326
389
  for bq in blockquotes:
327
- content = bq.get("text", "")[:50] + "..." if len(bq.get("text", "")) > 50 else bq.get("text", "")
390
+ content = (
391
+ bq.get("text", "")[:50] + "..."
392
+ if len(bq.get("text", "")) > 50
393
+ else bq.get("text", "")
394
+ )
328
395
  line = bq.get("line_range", {}).get("start", "")
329
396
  output.append(f"| {content} | {line} |")
330
397
  output.append("")
331
-
398
+
332
399
  # Horizontal Rules Section
333
400
  horizontal_rules = [e for e in elements if e.get("type") == "horizontal_rule"]
334
401
  if horizontal_rules:
@@ -339,46 +406,69 @@ class MarkdownFormatter(BaseFormatter):
339
406
  line = hr.get("line_range", {}).get("start", "")
340
407
  output.append(f"| Horizontal Rule | {line} |")
341
408
  output.append("")
342
-
409
+
343
410
  # HTML Elements Section
344
- html_elements = [e for e in elements if e.get("type") in ["html_block", "html_inline"]]
411
+ html_elements = [
412
+ e for e in elements if e.get("type") in ["html_block", "html_inline"]
413
+ ]
345
414
  if html_elements:
346
415
  output.append("## HTML Elements\n")
347
416
  output.append("| Type | Content | Line |")
348
417
  output.append("|------|---------|------|")
349
418
  for html in html_elements:
350
419
  element_type = html.get("type", "")
351
- content = html.get("name", "")[:30] + "..." if len(html.get("name", "")) > 30 else html.get("name", "")
420
+ content = (
421
+ html.get("name", "")[:30] + "..."
422
+ if len(html.get("name", "")) > 30
423
+ else html.get("name", "")
424
+ )
352
425
  line = html.get("line_range", {}).get("start", "")
353
426
  output.append(f"| {element_type} | {content} | {line} |")
354
427
  output.append("")
355
-
428
+
356
429
  # Text Formatting Section
357
- formatting_elements = [e for e in elements if e.get("type") in ["strong_emphasis", "emphasis", "inline_code", "strikethrough"]]
430
+ formatting_elements = [
431
+ e
432
+ for e in elements
433
+ if e.get("type")
434
+ in ["strong_emphasis", "emphasis", "inline_code", "strikethrough"]
435
+ ]
358
436
  if formatting_elements:
359
437
  output.append("## Text Formatting\n")
360
438
  output.append("| Type | Content | Line |")
361
439
  output.append("|------|---------|------|")
362
440
  for fmt in formatting_elements:
363
441
  format_type = fmt.get("type", "")
364
- content = fmt.get("text", "")[:30] + "..." if len(fmt.get("text", "")) > 30 else fmt.get("text", "")
442
+ content = (
443
+ fmt.get("text", "")[:30] + "..."
444
+ if len(fmt.get("text", "")) > 30
445
+ else fmt.get("text", "")
446
+ )
365
447
  line = fmt.get("line_range", {}).get("start", "")
366
448
  output.append(f"| {format_type} | {content} | {line} |")
367
449
  output.append("")
368
-
450
+
369
451
  # Footnotes Section
370
- footnotes = [e for e in elements if e.get("type") in ["footnote_reference", "footnote_definition"]]
452
+ footnotes = [
453
+ e
454
+ for e in elements
455
+ if e.get("type") in ["footnote_reference", "footnote_definition"]
456
+ ]
371
457
  if footnotes:
372
458
  output.append("## Footnotes\n")
373
459
  output.append("| Type | Content | Line |")
374
460
  output.append("|------|---------|------|")
375
461
  for fn in footnotes:
376
462
  footnote_type = fn.get("type", "")
377
- content = fn.get("text", "")[:30] + "..." if len(fn.get("text", "")) > 30 else fn.get("text", "")
463
+ content = (
464
+ fn.get("text", "")[:30] + "..."
465
+ if len(fn.get("text", "")) > 30
466
+ else fn.get("text", "")
467
+ )
378
468
  line = fn.get("line_range", {}).get("start", "")
379
469
  output.append(f"| {footnote_type} | {content} | {line} |")
380
470
  output.append("")
381
-
471
+
382
472
  # Reference Definitions Section
383
473
  references = [e for e in elements if e.get("type") == "reference_definition"]
384
474
  if references:
@@ -386,33 +476,42 @@ class MarkdownFormatter(BaseFormatter):
386
476
  output.append("| Content | Line |")
387
477
  output.append("|---------|------|")
388
478
  for ref in references:
389
- content = ref.get("name", "")[:50] + "..." if len(ref.get("name", "")) > 50 else ref.get("name", "")
479
+ content = (
480
+ ref.get("name", "")[:50] + "..."
481
+ if len(ref.get("name", "")) > 50
482
+ else ref.get("name", "")
483
+ )
390
484
  line = ref.get("line_range", {}).get("start", "")
391
485
  output.append(f"| {content} | {line} |")
392
486
  output.append("")
393
-
487
+
394
488
  return "\n".join(output)
395
489
 
396
- def _collect_images(self, elements: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
490
+ def _collect_images(self, elements: list[dict[str, Any]]) -> list[dict[str, Any]]:
397
491
  """Collect images including reference definitions that point to images.
398
492
 
399
493
  Fallback: if no explicit image reference definitions are present, also
400
494
  treat reference definitions with image-like URLs as images to keep
401
495
  counts consistent across environments.
402
496
  """
403
- images: List[Dict[str, Any]] = [
404
- e for e in elements
405
- if e.get("type") in ["image", "reference_image", "image_reference_definition"]
497
+ images: list[dict[str, Any]] = [
498
+ e
499
+ for e in elements
500
+ if e.get("type")
501
+ in ["image", "reference_image", "image_reference_definition"]
406
502
  ]
407
503
 
408
504
  # Avoid duplicates if image reference definitions already exist
409
- has_image_ref_defs = any(e.get("type") == "image_reference_definition" for e in elements)
505
+ has_image_ref_defs = any(
506
+ e.get("type") == "image_reference_definition" for e in elements
507
+ )
410
508
  if has_image_ref_defs:
411
509
  return images
412
510
 
413
511
  # Fallback: promote reference_definition with image-like URL
414
512
  try:
415
513
  import re
514
+
416
515
  image_exts = (".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp", ".bmp")
417
516
  for e in elements:
418
517
  if e.get("type") == "reference_definition":
@@ -421,33 +520,35 @@ class MarkdownFormatter(BaseFormatter):
421
520
  if not url:
422
521
  # Parse from raw content stored in name
423
522
  name_field = (e.get("name") or "").strip()
424
- m = re.match(r'^\[([^\]]+)\]:\s*([^\s]+)', name_field)
523
+ m = re.match(r"^\[([^\]]+)\]:\s*([^\s]+)", name_field)
425
524
  if m:
426
525
  alt = alt or m.group(1)
427
526
  url = m.group(2)
428
527
  if url and any(url.lower().endswith(ext) for ext in image_exts):
429
- images.append({
430
- **e,
431
- "type": "image_reference_definition",
432
- "url": url,
433
- "alt": alt,
434
- })
528
+ images.append(
529
+ {
530
+ **e,
531
+ "type": "image_reference_definition",
532
+ "url": url,
533
+ "alt": alt,
534
+ }
535
+ )
435
536
  except Exception:
436
537
  # Be conservative on any error
437
538
  return images
438
539
 
439
540
  return images
440
541
 
441
- def _format_advanced_text(self, data: Dict[str, Any]) -> str:
542
+ def _format_advanced_text(self, data: dict[str, Any]) -> str:
442
543
  """Format advanced analysis in text format"""
443
544
  output = ["--- Advanced Analysis Results ---"]
444
-
545
+
445
546
  # Basic info - format with quotes to match expected output
446
547
  output.append(f'"File: {data["file_path"]}"')
447
548
  output.append(f'"Language: {data["language"]}"')
448
549
  output.append(f'"Lines: {data["line_count"]}"')
449
550
  output.append(f'"Elements: {data["element_count"]}"')
450
-
551
+
451
552
  # Document metrics
452
553
  metrics = data["document_metrics"]
453
554
  output.append(f'"Headers: {metrics["header_count"]}"')
@@ -459,7 +560,7 @@ class MarkdownFormatter(BaseFormatter):
459
560
  output.append(f'"Code Lines: {metrics["total_code_lines"]}"')
460
561
  output.append(f'"Lists: {metrics["list_count"]}"')
461
562
  output.append(f'"Tables: {metrics["table_count"]}"')
462
-
563
+
463
564
  # Content analysis
464
565
  content = data["content_analysis"]
465
566
  output.append(f'"Has TOC: {content["has_toc"]}"')
@@ -467,26 +568,31 @@ class MarkdownFormatter(BaseFormatter):
467
568
  output.append(f'"Has Images: {content["has_images"]}"')
468
569
  output.append(f'"Has External Links: {content["has_external_links"]}"')
469
570
  output.append(f'"Document Complexity: {content["document_complexity"]}"')
470
-
571
+
471
572
  return "\n".join(output)
472
573
 
473
- def _calculate_document_complexity(self, headers: List[Dict], links: List[Dict],
474
- code_blocks: List[Dict], tables: List[Dict]) -> str:
574
+ def _calculate_document_complexity(
575
+ self,
576
+ headers: list[dict],
577
+ links: list[dict],
578
+ code_blocks: list[dict],
579
+ tables: list[dict],
580
+ ) -> str:
475
581
  """Calculate document complexity based on structure and content"""
476
582
  score = 0
477
-
583
+
478
584
  # Header complexity
479
585
  if headers:
480
586
  header_levels = [h.get("level", 1) for h in headers]
481
587
  max_level = max(header_levels)
482
588
  score += len(headers) * 2 # Base score for headers
483
- score += max_level * 3 # Deeper nesting increases complexity
484
-
589
+ score += max_level * 3 # Deeper nesting increases complexity
590
+
485
591
  # Content complexity
486
- score += len(links) * 1 # Links add moderate complexity
592
+ score += len(links) * 1 # Links add moderate complexity
487
593
  score += len(code_blocks) * 5 # Code blocks add significant complexity
488
- score += len(tables) * 3 # Tables add moderate complexity
489
-
594
+ score += len(tables) * 3 # Tables add moderate complexity
595
+
490
596
  # Classify complexity
491
597
  if score < 20:
492
598
  return "Simple"
@@ -497,36 +603,44 @@ class MarkdownFormatter(BaseFormatter):
497
603
  else:
498
604
  return "Very Complex"
499
605
 
500
- def _format_json_output(self, title: str, data: Dict[str, Any]) -> str:
606
+ def _format_json_output(self, title: str, data: dict[str, Any]) -> str:
501
607
  """Format JSON output with title"""
502
608
  import json
609
+
503
610
  output = [f"--- {title} ---"]
504
611
  output.append(json.dumps(data, indent=2, ensure_ascii=False))
505
612
  return "\n".join(output)
506
613
 
507
- def _compute_robust_counts_from_file(self, file_path: str) -> Dict[str, int]:
614
+ def _compute_robust_counts_from_file(self, file_path: str) -> dict[str, int]:
508
615
  """Compute robust counts for links and images directly from file content.
509
616
 
510
617
  This mitigates occasional undercount from AST element extraction by
511
618
  scanning the raw Markdown text with regex patterns.
512
619
  """
513
620
  import re
621
+
514
622
  counts = {"link_count": 0, "image_count": 0}
515
623
  if not file_path:
516
624
  return counts
517
625
 
518
626
  try:
519
- with open(file_path, "r", encoding="utf-8", errors="replace") as f:
627
+ with open(file_path, encoding="utf-8", errors="replace") as f:
520
628
  content = f.read()
521
629
  except Exception:
522
630
  return counts
523
631
 
524
632
  # Autolinks (URLs, mailto, and bare emails), exclude HTML tags by pattern
525
- autolink_pattern = re.compile(r"<(?:https?://[^>]+|mailto:[^>]+|[^@\s]+@[^@\s]+\.[^@\s]+)>")
633
+ autolink_pattern = re.compile(
634
+ r"<(?:https?://[^>]+|mailto:[^>]+|[^@\s]+@[^@\s]+\.[^@\s]+)>"
635
+ )
526
636
 
527
637
  # Count inline links (subtract image inlines later)
528
- inline_links_all = re.findall(r"\[[^\]]*\]\(([^)\s]+)(?:\s+\"[^\"]*\")?\)", content)
529
- inline_images = re.findall(r"!\[[^\]]*\]\(([^)\s]+)(?:\s+\"[^\"]*\")?\)", content)
638
+ inline_links_all = re.findall(
639
+ r"\[[^\]]*\]\(([^)\s]+)(?:\s+\"[^\"]*\")?\)", content
640
+ )
641
+ inline_images = re.findall(
642
+ r"!\[[^\]]*\]\(([^)\s]+)(?:\s+\"[^\"]*\")?\)", content
643
+ )
530
644
  inline_links = max(0, len(inline_links_all) - len(inline_images))
531
645
 
532
646
  # Count reference links (subtract image references later)
@@ -544,14 +658,23 @@ class MarkdownFormatter(BaseFormatter):
544
658
  # Reference images occurrences
545
659
  ref_images_count = len(ref_images)
546
660
  # Image reference definitions used by images
547
- used_labels = set(m.group(1).lower() for m in re.finditer(r"!\[[^\]]*\]\[([^\]]*)\]", content))
548
- def_pattern = re.compile(r"^\[([^\]]+)\]:\s*([^\s]+)(?:\s+\"([^\"]*)\")?", re.MULTILINE)
661
+ used_labels = {
662
+ m.group(1).lower() for m in re.finditer(r"!\[[^\]]*\]\[([^\]]*)\]", content)
663
+ }
664
+ def_pattern = re.compile(
665
+ r"^\[([^\]]+)\]:\s*([^\s]+)(?:\s+\"([^\"]*)\")?", re.MULTILINE
666
+ )
549
667
  image_ref_defs_used = 0
550
668
  for m in def_pattern.finditer(content):
551
669
  label = (m.group(1) or "").lower()
552
670
  url = (m.group(2) or "").lower()
553
- if label in used_labels or any(url.endswith(ext) for ext in [".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp", ".bmp"]):
671
+ if label in used_labels or any(
672
+ url.endswith(ext)
673
+ for ext in [".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp", ".bmp"]
674
+ ):
554
675
  image_ref_defs_used += 1
555
676
 
556
- counts["image_count"] = inline_images_count + ref_images_count + image_ref_defs_used
557
- return counts
677
+ counts["image_count"] = (
678
+ inline_images_count + ref_images_count + image_ref_defs_used
679
+ )
680
+ return counts