tree-sitter-analyzer 1.8.3__py3-none-any.whl → 1.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (64) hide show
  1. tree_sitter_analyzer/__init__.py +1 -1
  2. tree_sitter_analyzer/api.py +4 -4
  3. tree_sitter_analyzer/cli/argument_validator.py +29 -17
  4. tree_sitter_analyzer/cli/commands/advanced_command.py +7 -5
  5. tree_sitter_analyzer/cli/commands/structure_command.py +7 -5
  6. tree_sitter_analyzer/cli/commands/summary_command.py +10 -6
  7. tree_sitter_analyzer/cli/commands/table_command.py +8 -7
  8. tree_sitter_analyzer/cli/info_commands.py +1 -1
  9. tree_sitter_analyzer/cli_main.py +3 -2
  10. tree_sitter_analyzer/core/analysis_engine.py +5 -5
  11. tree_sitter_analyzer/core/cache_service.py +3 -1
  12. tree_sitter_analyzer/core/query.py +17 -5
  13. tree_sitter_analyzer/core/query_service.py +1 -1
  14. tree_sitter_analyzer/encoding_utils.py +3 -3
  15. tree_sitter_analyzer/exceptions.py +61 -50
  16. tree_sitter_analyzer/file_handler.py +3 -0
  17. tree_sitter_analyzer/formatters/base_formatter.py +10 -5
  18. tree_sitter_analyzer/formatters/formatter_registry.py +83 -68
  19. tree_sitter_analyzer/formatters/html_formatter.py +90 -54
  20. tree_sitter_analyzer/formatters/javascript_formatter.py +21 -16
  21. tree_sitter_analyzer/formatters/language_formatter_factory.py +7 -6
  22. tree_sitter_analyzer/formatters/markdown_formatter.py +247 -124
  23. tree_sitter_analyzer/formatters/python_formatter.py +61 -38
  24. tree_sitter_analyzer/formatters/typescript_formatter.py +113 -45
  25. tree_sitter_analyzer/interfaces/mcp_server.py +2 -2
  26. tree_sitter_analyzer/language_detector.py +6 -6
  27. tree_sitter_analyzer/language_loader.py +3 -1
  28. tree_sitter_analyzer/languages/css_plugin.py +120 -61
  29. tree_sitter_analyzer/languages/html_plugin.py +159 -62
  30. tree_sitter_analyzer/languages/java_plugin.py +42 -34
  31. tree_sitter_analyzer/languages/javascript_plugin.py +59 -30
  32. tree_sitter_analyzer/languages/markdown_plugin.py +402 -368
  33. tree_sitter_analyzer/languages/python_plugin.py +111 -64
  34. tree_sitter_analyzer/languages/typescript_plugin.py +241 -132
  35. tree_sitter_analyzer/mcp/server.py +22 -18
  36. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +13 -8
  37. tree_sitter_analyzer/mcp/tools/base_tool.py +2 -2
  38. tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +232 -26
  39. tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +31 -23
  40. tree_sitter_analyzer/mcp/tools/list_files_tool.py +21 -19
  41. tree_sitter_analyzer/mcp/tools/query_tool.py +17 -18
  42. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +30 -31
  43. tree_sitter_analyzer/mcp/tools/search_content_tool.py +131 -77
  44. tree_sitter_analyzer/mcp/tools/table_format_tool.py +29 -16
  45. tree_sitter_analyzer/mcp/utils/file_output_factory.py +64 -51
  46. tree_sitter_analyzer/mcp/utils/file_output_manager.py +34 -24
  47. tree_sitter_analyzer/mcp/utils/gitignore_detector.py +8 -4
  48. tree_sitter_analyzer/models.py +7 -5
  49. tree_sitter_analyzer/plugins/base.py +9 -7
  50. tree_sitter_analyzer/plugins/manager.py +1 -0
  51. tree_sitter_analyzer/queries/css.py +2 -21
  52. tree_sitter_analyzer/queries/html.py +2 -15
  53. tree_sitter_analyzer/queries/markdown.py +30 -41
  54. tree_sitter_analyzer/queries/python.py +20 -5
  55. tree_sitter_analyzer/query_loader.py +5 -5
  56. tree_sitter_analyzer/security/validator.py +114 -86
  57. tree_sitter_analyzer/utils/__init__.py +58 -28
  58. tree_sitter_analyzer/utils/tree_sitter_compat.py +72 -65
  59. tree_sitter_analyzer/utils.py +83 -25
  60. {tree_sitter_analyzer-1.8.3.dist-info → tree_sitter_analyzer-1.9.0.dist-info}/METADATA +19 -5
  61. tree_sitter_analyzer-1.9.0.dist-info/RECORD +109 -0
  62. tree_sitter_analyzer-1.8.3.dist-info/RECORD +0 -109
  63. {tree_sitter_analyzer-1.8.3.dist-info → tree_sitter_analyzer-1.9.0.dist-info}/WHEEL +0 -0
  64. {tree_sitter_analyzer-1.8.3.dist-info → tree_sitter_analyzer-1.9.0.dist-info}/entry_points.txt +0 -0
@@ -24,12 +24,12 @@ from ..encoding_utils import extract_text_slice, safe_encode
24
24
  from ..models import AnalysisResult, CodeElement
25
25
  from ..plugins.base import ElementExtractor, LanguagePlugin
26
26
  from ..utils import log_debug, log_error, log_warning
27
- from ..utils.tree_sitter_compat import TreeSitterQueryCompat, get_node_text_safe
27
+ from ..utils.tree_sitter_compat import TreeSitterQueryCompat
28
28
 
29
29
 
30
30
  class MarkdownElement(CodeElement):
31
31
  """Markdown-specific code element"""
32
-
32
+
33
33
  def __init__(
34
34
  self,
35
35
  name: str,
@@ -38,13 +38,13 @@ class MarkdownElement(CodeElement):
38
38
  raw_text: str,
39
39
  language: str = "markdown",
40
40
  element_type: str = "markdown",
41
- level: Optional[int] = None,
42
- url: Optional[str] = None,
43
- alt_text: Optional[str] = None,
44
- title: Optional[str] = None,
45
- language_info: Optional[str] = None,
46
- is_checked: Optional[bool] = None,
47
- **kwargs
41
+ level: int | None = None,
42
+ url: str | None = None,
43
+ alt_text: str | None = None,
44
+ title: str | None = None,
45
+ language_info: str | None = None,
46
+ is_checked: bool | None = None,
47
+ **kwargs,
48
48
  ):
49
49
  super().__init__(
50
50
  name=name,
@@ -52,7 +52,7 @@ class MarkdownElement(CodeElement):
52
52
  end_line=end_line,
53
53
  raw_text=raw_text,
54
54
  language=language,
55
- **kwargs
55
+ **kwargs,
56
56
  )
57
57
  self.element_type = element_type
58
58
  self.level = level # For headers (1-6)
@@ -172,15 +172,15 @@ class MarkdownElementExtractor(ElementExtractor):
172
172
  try:
173
173
  # Track extracted links to prevent global duplicates (ensure reset)
174
174
  self._extracted_links = set()
175
-
175
+
176
176
  self._extract_inline_links(tree.root_node, links)
177
177
  self._extract_reference_links(tree.root_node, links)
178
178
  self._extract_autolinks(tree.root_node, links)
179
-
179
+
180
180
  # Clean up after extraction is complete
181
- if hasattr(self, '_extracted_links'):
182
- delattr(self, '_extracted_links')
183
-
181
+ if hasattr(self, "_extracted_links"):
182
+ delattr(self, "_extracted_links")
183
+
184
184
  except Exception as e:
185
185
  log_debug(f"Error during link extraction: {e}")
186
186
  return []
@@ -189,7 +189,7 @@ class MarkdownElementExtractor(ElementExtractor):
189
189
  seen = set()
190
190
  unique_links = []
191
191
  for link in links:
192
- key = (getattr(link, 'text', '') or "", getattr(link, 'url', '') or "")
192
+ key = (getattr(link, "text", "") or "", getattr(link, "url", "") or "")
193
193
  if key not in seen:
194
194
  seen.add(key)
195
195
  unique_links.append(link)
@@ -292,7 +292,9 @@ class MarkdownElementExtractor(ElementExtractor):
292
292
  horizontal_rules: list[MarkdownElement] = []
293
293
 
294
294
  if tree is None or tree.root_node is None:
295
- log_debug("Tree or root_node is None, returning empty horizontal rules list")
295
+ log_debug(
296
+ "Tree or root_node is None, returning empty horizontal rules list"
297
+ )
296
298
  return horizontal_rules
297
299
 
298
300
  try:
@@ -339,7 +341,9 @@ class MarkdownElementExtractor(ElementExtractor):
339
341
  formatting_elements: list[MarkdownElement] = []
340
342
 
341
343
  if tree is None or tree.root_node is None:
342
- log_debug("Tree or root_node is None, returning empty formatting elements list")
344
+ log_debug(
345
+ "Tree or root_node is None, returning empty formatting elements list"
346
+ )
343
347
  return formatting_elements
344
348
 
345
349
  try:
@@ -454,10 +458,10 @@ class MarkdownElementExtractor(ElementExtractor):
454
458
  start_point = node.start_point
455
459
  end_point = node.end_point
456
460
 
457
- if (start_point[0] < 0 or start_point[0] >= len(self.content_lines)):
461
+ if start_point[0] < 0 or start_point[0] >= len(self.content_lines):
458
462
  return ""
459
-
460
- if (end_point[0] < 0 or end_point[0] >= len(self.content_lines)):
463
+
464
+ if end_point[0] < 0 or end_point[0] >= len(self.content_lines):
461
465
  return ""
462
466
 
463
467
  if start_point[0] == end_point[0]:
@@ -469,7 +473,9 @@ class MarkdownElementExtractor(ElementExtractor):
469
473
  return result
470
474
  else:
471
475
  lines = []
472
- for i in range(start_point[0], min(end_point[0] + 1, len(self.content_lines))):
476
+ for i in range(
477
+ start_point[0], min(end_point[0] + 1, len(self.content_lines))
478
+ ):
473
479
  if i < len(self.content_lines):
474
480
  line = self.content_lines[i]
475
481
  if i == start_point[0] and i == end_point[0]:
@@ -492,7 +498,9 @@ class MarkdownElementExtractor(ElementExtractor):
492
498
  log_error(f"Fallback text extraction also failed: {fallback_error}")
493
499
  return ""
494
500
 
495
- def _extract_atx_headers(self, root_node: "tree_sitter.Node", headers: list[MarkdownElement]) -> None:
501
+ def _extract_atx_headers(
502
+ self, root_node: "tree_sitter.Node", headers: list[MarkdownElement]
503
+ ) -> None:
496
504
  """Extract ATX-style headers (# ## ### etc.)"""
497
505
  for node in self._traverse_nodes(root_node):
498
506
  if node.type == "atx_heading":
@@ -500,23 +508,23 @@ class MarkdownElementExtractor(ElementExtractor):
500
508
  start_line = node.start_point[0] + 1
501
509
  end_line = node.end_point[0] + 1
502
510
  raw_text = self._get_node_text_optimized(node)
503
-
511
+
504
512
  # Extract header level and content
505
513
  level = 1
506
514
  content = raw_text.strip()
507
-
515
+
508
516
  # Count # symbols to determine level
509
517
  if content.startswith("#"):
510
518
  level = len(content) - len(content.lstrip("#"))
511
519
  content = content.lstrip("# ").rstrip()
512
-
520
+
513
521
  header = MarkdownElement(
514
522
  name=content or f"Header Level {level}",
515
523
  start_line=start_line,
516
524
  end_line=end_line,
517
525
  raw_text=raw_text,
518
526
  element_type="heading",
519
- level=level
527
+ level=level,
520
528
  )
521
529
  # Add additional attributes for formatter
522
530
  header.text = content or f"Header Level {level}"
@@ -525,7 +533,9 @@ class MarkdownElementExtractor(ElementExtractor):
525
533
  except Exception as e:
526
534
  log_debug(f"Failed to extract ATX header: {e}")
527
535
 
528
- def _extract_setext_headers(self, root_node: "tree_sitter.Node", headers: list[MarkdownElement]) -> None:
536
+ def _extract_setext_headers(
537
+ self, root_node: "tree_sitter.Node", headers: list[MarkdownElement]
538
+ ) -> None:
529
539
  """Extract Setext-style headers (underlined)"""
530
540
  for node in self._traverse_nodes(root_node):
531
541
  if node.type == "setext_heading":
@@ -533,7 +543,7 @@ class MarkdownElementExtractor(ElementExtractor):
533
543
  start_line = node.start_point[0] + 1
534
544
  end_line = node.end_point[0] + 1
535
545
  raw_text = self._get_node_text_optimized(node)
536
-
546
+
537
547
  # Determine level based on underline character
538
548
  level = 2 # Default to H2
539
549
  lines = raw_text.strip().split("\n")
@@ -546,14 +556,14 @@ class MarkdownElementExtractor(ElementExtractor):
546
556
  content = lines[0].strip()
547
557
  else:
548
558
  content = raw_text.strip()
549
-
559
+
550
560
  header = MarkdownElement(
551
561
  name=content or f"Header Level {level}",
552
562
  start_line=start_line,
553
563
  end_line=end_line,
554
564
  raw_text=raw_text,
555
565
  element_type="heading",
556
- level=level
566
+ level=level,
557
567
  )
558
568
  # Add additional attributes for formatter
559
569
  header.text = content or f"Header Level {level}"
@@ -562,7 +572,9 @@ class MarkdownElementExtractor(ElementExtractor):
562
572
  except Exception as e:
563
573
  log_debug(f"Failed to extract Setext header: {e}")
564
574
 
565
- def _extract_fenced_code_blocks(self, root_node: "tree_sitter.Node", code_blocks: list[MarkdownElement]) -> None:
575
+ def _extract_fenced_code_blocks(
576
+ self, root_node: "tree_sitter.Node", code_blocks: list[MarkdownElement]
577
+ ) -> None:
566
578
  """Extract fenced code blocks"""
567
579
  for node in self._traverse_nodes(root_node):
568
580
  if node.type == "fenced_code_block":
@@ -570,13 +582,13 @@ class MarkdownElementExtractor(ElementExtractor):
570
582
  start_line = node.start_point[0] + 1
571
583
  end_line = node.end_point[0] + 1
572
584
  raw_text = self._get_node_text_optimized(node)
573
-
585
+
574
586
  # Extract language info
575
587
  language_info = None
576
588
  lines = raw_text.strip().split("\n")
577
589
  if lines and lines[0].startswith("```"):
578
590
  language_info = lines[0][3:].strip()
579
-
591
+
580
592
  # Extract content (excluding fence markers)
581
593
  content_lines = []
582
594
  in_content = False
@@ -589,17 +601,16 @@ class MarkdownElementExtractor(ElementExtractor):
589
601
  break
590
602
  if in_content:
591
603
  content_lines.append(line)
592
-
593
- content = "\n".join(content_lines)
604
+
594
605
  name = f"Code Block ({language_info or 'unknown'})"
595
-
606
+
596
607
  code_block = MarkdownElement(
597
608
  name=name,
598
609
  start_line=start_line,
599
610
  end_line=end_line,
600
611
  raw_text=raw_text,
601
612
  element_type="code_block",
602
- language_info=language_info
613
+ language_info=language_info,
603
614
  )
604
615
  # Add additional attributes for formatter
605
616
  code_block.language = language_info or "text"
@@ -609,7 +620,9 @@ class MarkdownElementExtractor(ElementExtractor):
609
620
  except Exception as e:
610
621
  log_debug(f"Failed to extract fenced code block: {e}")
611
622
 
612
- def _extract_indented_code_blocks(self, root_node: "tree_sitter.Node", code_blocks: list[MarkdownElement]) -> None:
623
+ def _extract_indented_code_blocks(
624
+ self, root_node: "tree_sitter.Node", code_blocks: list[MarkdownElement]
625
+ ) -> None:
613
626
  """Extract indented code blocks"""
614
627
  for node in self._traverse_nodes(root_node):
615
628
  if node.type == "indented_code_block":
@@ -617,14 +630,14 @@ class MarkdownElementExtractor(ElementExtractor):
617
630
  start_line = node.start_point[0] + 1
618
631
  end_line = node.end_point[0] + 1
619
632
  raw_text = self._get_node_text_optimized(node)
620
-
633
+
621
634
  code_block = MarkdownElement(
622
635
  name="Indented Code Block",
623
636
  start_line=start_line,
624
637
  end_line=end_line,
625
638
  raw_text=raw_text,
626
639
  element_type="code_block",
627
- language_info="indented"
640
+ language_info="indented",
628
641
  )
629
642
  # Add additional attributes for formatter
630
643
  code_block.language = "text"
@@ -634,10 +647,12 @@ class MarkdownElementExtractor(ElementExtractor):
634
647
  except Exception as e:
635
648
  log_debug(f"Failed to extract indented code block: {e}")
636
649
 
637
- def _extract_inline_links(self, root_node: "tree_sitter.Node", links: list[MarkdownElement]) -> None:
650
+ def _extract_inline_links(
651
+ self, root_node: "tree_sitter.Node", links: list[MarkdownElement]
652
+ ) -> None:
638
653
  """Extract inline links"""
639
654
  import re
640
-
655
+
641
656
  # Extract links from text within inline nodes using regular expressions
642
657
  for node in self._traverse_nodes(root_node):
643
658
  if node.type == "inline":
@@ -645,27 +660,30 @@ class MarkdownElementExtractor(ElementExtractor):
645
660
  raw_text = self._get_node_text_optimized(node)
646
661
  if not raw_text:
647
662
  continue
648
-
663
+
649
664
  # Inline link pattern: [text](url "title") (excluding images)
650
665
  inline_pattern = r'(?<!\!)\[([^\]]*)\]\(([^)]*?)(?:\s+"([^"]*)")?\)'
651
666
  matches = re.finditer(inline_pattern, raw_text)
652
-
667
+
653
668
  for match in matches:
654
669
  text = match.group(1) or ""
655
670
  url = match.group(2) or ""
656
671
  title = match.group(3) or ""
657
-
672
+
658
673
  # Global duplicate check: process same text and URL combination only once
659
674
  link_signature = f"{text}|{url}"
660
- if hasattr(self, '_extracted_links') and link_signature in self._extracted_links:
675
+ if (
676
+ hasattr(self, "_extracted_links")
677
+ and link_signature in self._extracted_links
678
+ ):
661
679
  continue
662
-
663
- if hasattr(self, '_extracted_links'):
680
+
681
+ if hasattr(self, "_extracted_links"):
664
682
  self._extracted_links.add(link_signature)
665
-
683
+
666
684
  start_line = node.start_point[0] + 1
667
685
  end_line = node.end_point[0] + 1
668
-
686
+
669
687
  link = MarkdownElement(
670
688
  name=text or "Link",
671
689
  start_line=start_line,
@@ -673,72 +691,76 @@ class MarkdownElementExtractor(ElementExtractor):
673
691
  raw_text=match.group(0),
674
692
  element_type="link",
675
693
  url=url,
676
- title=title
694
+ title=title,
677
695
  )
678
696
  # Add additional attributes for formatter
679
697
  link.text = text or "Link"
680
698
  link.type = "link"
681
699
  links.append(link)
682
-
700
+
683
701
  except Exception as e:
684
702
  log_debug(f"Failed to extract inline link: {e}")
685
703
 
686
- def _extract_reference_links(self, root_node: "tree_sitter.Node", links: list[MarkdownElement]) -> None:
704
+ def _extract_reference_links(
705
+ self, root_node: "tree_sitter.Node", links: list[MarkdownElement]
706
+ ) -> None:
687
707
  """Extract reference links"""
688
708
  import re
689
-
709
+
690
710
  # Reference links also need to be extracted from inline nodes
691
711
  # Track already processed reference links to avoid duplicates
692
712
  processed_ref_links = set()
693
-
713
+
694
714
  for node in self._traverse_nodes(root_node):
695
715
  if node.type == "inline":
696
716
  try:
697
717
  raw_text = self._get_node_text_optimized(node)
698
718
  if not raw_text:
699
719
  continue
700
-
720
+
701
721
  # Reference link pattern: [text][ref]
702
- ref_pattern = r'\[([^\]]*)\]\[([^\]]*)\]'
722
+ ref_pattern = r"\[([^\]]*)\]\[([^\]]*)\]"
703
723
  matches = re.finditer(ref_pattern, raw_text)
704
-
724
+
705
725
  for match in matches:
706
726
  text = match.group(1) or ""
707
727
  ref = match.group(2) or ""
708
-
728
+
709
729
  # Skip image references (starting with !)
710
- if match.start() > 0 and raw_text[match.start()-1] == '!':
730
+ if match.start() > 0 and raw_text[match.start() - 1] == "!":
711
731
  continue
712
-
732
+
713
733
  # Duplicate check: process same text and reference combination only once
714
734
  start_line = node.start_point[0] + 1
715
735
  ref_link_key = (text, ref, start_line)
716
-
736
+
717
737
  if ref_link_key in processed_ref_links:
718
738
  continue
719
739
  processed_ref_links.add(ref_link_key)
720
-
740
+
721
741
  end_line = node.end_point[0] + 1
722
-
742
+
723
743
  link = MarkdownElement(
724
744
  name=text or "Reference Link",
725
745
  start_line=start_line,
726
746
  end_line=end_line,
727
747
  raw_text=match.group(0),
728
- element_type="reference_link"
748
+ element_type="reference_link",
729
749
  )
730
750
  # Add additional attributes for formatter
731
751
  link.text = text or "Reference Link"
732
752
  link.type = "reference_link"
733
753
  links.append(link)
734
-
754
+
735
755
  except Exception as e:
736
756
  log_debug(f"Failed to extract reference link: {e}")
737
757
 
738
- def _extract_autolinks(self, root_node: "tree_sitter.Node", links: list[MarkdownElement]) -> None:
758
+ def _extract_autolinks(
759
+ self, root_node: "tree_sitter.Node", links: list[MarkdownElement]
760
+ ) -> None:
739
761
  """Extract autolinks"""
740
762
  import re
741
-
763
+
742
764
  # Extract autolinks from text within inline nodes using regular expressions
743
765
  for node in self._traverse_nodes(root_node):
744
766
  if node.type == "inline":
@@ -746,46 +768,53 @@ class MarkdownElementExtractor(ElementExtractor):
746
768
  raw_text = self._get_node_text_optimized(node)
747
769
  if not raw_text:
748
770
  continue
749
-
771
+
750
772
  # Autolink pattern: <url> or <email>
751
- autolink_pattern = r'<(https?://[^>]+|mailto:[^>]+|[^@\s]+@[^@\s]+\.[^@\s]+)>'
773
+ autolink_pattern = (
774
+ r"<(https?://[^>]+|mailto:[^>]+|[^@\s]+@[^@\s]+\.[^@\s]+)>"
775
+ )
752
776
  matches = re.finditer(autolink_pattern, raw_text)
753
-
777
+
754
778
  for match in matches:
755
779
  url = match.group(1) or ""
756
780
  full_match = match.group(0)
757
-
781
+
758
782
  # Global duplicate check: process same URL for autolinks only once
759
783
  autolink_signature = f"autolink|{url}"
760
- if hasattr(self, '_extracted_links') and autolink_signature in self._extracted_links:
784
+ if (
785
+ hasattr(self, "_extracted_links")
786
+ and autolink_signature in self._extracted_links
787
+ ):
761
788
  continue
762
-
763
- if hasattr(self, '_extracted_links'):
789
+
790
+ if hasattr(self, "_extracted_links"):
764
791
  self._extracted_links.add(autolink_signature)
765
-
792
+
766
793
  start_line = node.start_point[0] + 1
767
794
  end_line = node.end_point[0] + 1
768
-
795
+
769
796
  link = MarkdownElement(
770
797
  name=url or "Autolink",
771
798
  start_line=start_line,
772
799
  end_line=end_line,
773
800
  raw_text=full_match,
774
801
  element_type="autolink",
775
- url=url
802
+ url=url,
776
803
  )
777
804
  # Add additional attributes for formatter
778
805
  link.text = url or "Autolink"
779
806
  link.type = "autolink"
780
807
  links.append(link)
781
-
808
+
782
809
  except Exception as e:
783
810
  log_debug(f"Failed to extract autolink: {e}")
784
811
 
785
- def _extract_inline_images(self, root_node: "tree_sitter.Node", images: list[MarkdownElement]) -> None:
812
+ def _extract_inline_images(
813
+ self, root_node: "tree_sitter.Node", images: list[MarkdownElement]
814
+ ) -> None:
786
815
  """Extract inline images"""
787
816
  import re
788
-
817
+
789
818
  # Extract images from text within inline nodes using regular expressions
790
819
  for node in self._traverse_nodes(root_node):
791
820
  if node.type == "inline":
@@ -793,20 +822,20 @@ class MarkdownElementExtractor(ElementExtractor):
793
822
  raw_text = self._get_node_text_optimized(node)
794
823
  if not raw_text:
795
824
  continue
796
-
825
+
797
826
  # Inline image pattern: ![alt](url "title")
798
827
  image_pattern = r'!\[([^\]]*)\]\(([^)]*?)(?:\s+"([^"]*)")?\)'
799
828
  matches = re.finditer(image_pattern, raw_text)
800
-
829
+
801
830
  for match in matches:
802
831
  alt_text = match.group(1) or ""
803
832
  url = match.group(2) or ""
804
833
  title = match.group(3) or ""
805
-
834
+
806
835
  # Calculate line number from matched position
807
836
  start_line = node.start_point[0] + 1
808
837
  end_line = node.end_point[0] + 1
809
-
838
+
810
839
  image = MarkdownElement(
811
840
  name=alt_text or "Image",
812
841
  start_line=start_line,
@@ -815,20 +844,22 @@ class MarkdownElementExtractor(ElementExtractor):
815
844
  element_type="image",
816
845
  url=url,
817
846
  alt_text=alt_text,
818
- title=title
847
+ title=title,
819
848
  )
820
849
  # Add additional attributes for formatter
821
850
  image.alt = alt_text or ""
822
851
  image.type = "image"
823
852
  images.append(image)
824
-
853
+
825
854
  except Exception as e:
826
855
  log_debug(f"Failed to extract inline image: {e}")
827
856
 
828
- def _extract_reference_images(self, root_node: "tree_sitter.Node", images: list[MarkdownElement]) -> None:
857
+ def _extract_reference_images(
858
+ self, root_node: "tree_sitter.Node", images: list[MarkdownElement]
859
+ ) -> None:
829
860
  """Extract reference images"""
830
861
  import re
831
-
862
+
832
863
  # Reference images also need to be extracted from inline nodes
833
864
  for node in self._traverse_nodes(root_node):
834
865
  if node.type == "inline":
@@ -836,40 +867,40 @@ class MarkdownElementExtractor(ElementExtractor):
836
867
  raw_text = self._get_node_text_optimized(node)
837
868
  if not raw_text:
838
869
  continue
839
-
870
+
840
871
  # Reference image pattern: ![alt][ref]
841
- ref_image_pattern = r'!\[([^\]]*)\]\[([^\]]*)\]'
872
+ ref_image_pattern = r"!\[([^\]]*)\]\[([^\]]*)\]"
842
873
  matches = re.finditer(ref_image_pattern, raw_text)
843
-
874
+
844
875
  for match in matches:
845
876
  alt_text = match.group(1) or ""
846
- ref = match.group(2) or ""
847
-
848
877
  start_line = node.start_point[0] + 1
849
878
  end_line = node.end_point[0] + 1
850
-
879
+
851
880
  image = MarkdownElement(
852
881
  name=alt_text or "Reference Image",
853
882
  start_line=start_line,
854
883
  end_line=end_line,
855
884
  raw_text=match.group(0),
856
- element_type="reference_image"
885
+ element_type="reference_image",
857
886
  )
858
887
  # Add additional attributes for formatter
859
888
  image.alt = alt_text or ""
860
889
  image.type = "reference_image"
861
890
  images.append(image)
862
-
891
+
863
892
  except Exception as e:
864
893
  log_debug(f"Failed to extract reference image: {e}")
865
894
 
866
- def _extract_image_reference_definitions(self, root_node: "tree_sitter.Node", images: list[MarkdownElement]) -> None:
895
+ def _extract_image_reference_definitions(
896
+ self, root_node: "tree_sitter.Node", images: list[MarkdownElement]
897
+ ) -> None:
867
898
  """Extract image reference definitions"""
868
899
  import re
869
-
900
+
870
901
  # Extract all reference definitions that could be used for images
871
902
  # We check if the URL points to an image file or if it's used by an image reference
872
-
903
+
873
904
  # First, collect all image references used in the document
874
905
  image_refs_used = set()
875
906
  for node in self._traverse_nodes(root_node):
@@ -878,19 +909,19 @@ class MarkdownElementExtractor(ElementExtractor):
878
909
  raw_text = self._get_node_text_optimized(node)
879
910
  if not raw_text:
880
911
  continue
881
-
912
+
882
913
  # Find image references: ![alt][ref]
883
- ref_image_pattern = r'!\[([^\]]*)\]\[([^\]]*)\]'
914
+ ref_image_pattern = r"!\[([^\]]*)\]\[([^\]]*)\]"
884
915
  matches = re.finditer(ref_image_pattern, raw_text)
885
-
916
+
886
917
  for match in matches:
887
918
  ref = match.group(2) or ""
888
919
  if ref:
889
920
  image_refs_used.add(ref.lower())
890
-
921
+
891
922
  except Exception as e:
892
923
  log_debug(f"Failed to scan for image references: {e}")
893
-
924
+
894
925
  # Now extract reference definitions that are used by images OR point to image files
895
926
  for node in self._traverse_nodes(root_node):
896
927
  if node.type == "link_reference_definition":
@@ -898,20 +929,31 @@ class MarkdownElementExtractor(ElementExtractor):
898
929
  start_line = node.start_point[0] + 1
899
930
  end_line = node.end_point[0] + 1
900
931
  raw_text = self._get_node_text_optimized(node)
901
-
932
+
902
933
  # Pattern: [label]: url "title"
903
934
  ref_pattern = r'^\[([^\]]+)\]:\s*([^\s]+)(?:\s+"([^"]*)")?'
904
935
  match = re.match(ref_pattern, raw_text.strip())
905
-
936
+
906
937
  if match:
907
938
  label = match.group(1) or ""
908
939
  url = match.group(2) or ""
909
940
  title = match.group(3) or ""
910
-
941
+
911
942
  # Include if this reference is used by an image OR if URL looks like an image
912
943
  is_used_by_image = label.lower() in image_refs_used
913
- is_image_url = any(url.lower().endswith(ext) for ext in ['.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp', '.bmp'])
914
-
944
+ is_image_url = any(
945
+ url.lower().endswith(ext)
946
+ for ext in [
947
+ ".png",
948
+ ".jpg",
949
+ ".jpeg",
950
+ ".gif",
951
+ ".svg",
952
+ ".webp",
953
+ ".bmp",
954
+ ]
955
+ )
956
+
915
957
  if is_used_by_image or is_image_url:
916
958
  image_ref = MarkdownElement(
917
959
  name=f"Image Reference Definition: {label}",
@@ -921,17 +963,19 @@ class MarkdownElementExtractor(ElementExtractor):
921
963
  element_type="image_reference_definition",
922
964
  url=url,
923
965
  alt_text=label,
924
- title=title
966
+ title=title,
925
967
  )
926
968
  # Add additional attributes for formatter
927
969
  image_ref.alt = label
928
970
  image_ref.type = "image_reference_definition"
929
971
  images.append(image_ref)
930
-
972
+
931
973
  except Exception as e:
932
974
  log_debug(f"Failed to extract image reference definition: {e}")
933
975
 
934
- def _extract_link_reference_definitions(self, root_node: "tree_sitter.Node", references: list[MarkdownElement]) -> None:
976
+ def _extract_link_reference_definitions(
977
+ self, root_node: "tree_sitter.Node", references: list[MarkdownElement]
978
+ ) -> None:
935
979
  """Extract link reference definitions"""
936
980
  for node in self._traverse_nodes(root_node):
937
981
  if node.type == "link_reference_definition":
@@ -939,19 +983,21 @@ class MarkdownElementExtractor(ElementExtractor):
939
983
  start_line = node.start_point[0] + 1
940
984
  end_line = node.end_point[0] + 1
941
985
  raw_text = self._get_node_text_optimized(node)
942
-
986
+
943
987
  reference = MarkdownElement(
944
988
  name=raw_text or "Reference Definition",
945
989
  start_line=start_line,
946
990
  end_line=end_line,
947
991
  raw_text=raw_text,
948
- element_type="reference_definition"
992
+ element_type="reference_definition",
949
993
  )
950
994
  references.append(reference)
951
995
  except Exception as e:
952
996
  log_debug(f"Failed to extract reference definition: {e}")
953
997
 
954
- def _extract_list_items(self, root_node: "tree_sitter.Node", lists: list[MarkdownElement]) -> None:
998
+ def _extract_list_items(
999
+ self, root_node: "tree_sitter.Node", lists: list[MarkdownElement]
1000
+ ) -> None:
955
1001
  """Extract lists (not individual items)"""
956
1002
  for node in self._traverse_nodes(root_node):
957
1003
  if node.type == "list":
@@ -959,25 +1005,29 @@ class MarkdownElementExtractor(ElementExtractor):
959
1005
  start_line = node.start_point[0] + 1
960
1006
  end_line = node.end_point[0] + 1
961
1007
  raw_text = self._get_node_text_optimized(node)
962
-
1008
+
963
1009
  # Count list items in this list
964
1010
  item_count = 0
965
1011
  is_task_list = False
966
1012
  is_ordered = False
967
-
1013
+
968
1014
  for child in node.children:
969
1015
  if child.type == "list_item":
970
1016
  item_count += 1
971
1017
  item_text = self._get_node_text_optimized(child)
972
-
1018
+
973
1019
  # Check if it's a task list item
974
- if "[ ]" in item_text or "[x]" in item_text or "[X]" in item_text:
1020
+ if (
1021
+ "[ ]" in item_text
1022
+ or "[x]" in item_text
1023
+ or "[X]" in item_text
1024
+ ):
975
1025
  is_task_list = True
976
-
1026
+
977
1027
  # Check if it's an ordered list (starts with number)
978
1028
  if item_text.strip() and item_text.strip()[0].isdigit():
979
1029
  is_ordered = True
980
-
1030
+
981
1031
  # Determine list type
982
1032
  if is_task_list:
983
1033
  list_type = "task"
@@ -988,15 +1038,15 @@ class MarkdownElementExtractor(ElementExtractor):
988
1038
  else:
989
1039
  list_type = "unordered"
990
1040
  element_type = "list"
991
-
1041
+
992
1042
  name = f"{list_type.title()} List ({item_count} items)"
993
-
1043
+
994
1044
  list_element = MarkdownElement(
995
1045
  name=name,
996
1046
  start_line=start_line,
997
1047
  end_line=end_line,
998
1048
  raw_text=raw_text,
999
- element_type=element_type
1049
+ element_type=element_type,
1000
1050
  )
1001
1051
  # Add additional attributes for formatter
1002
1052
  list_element.list_type = list_type
@@ -1006,7 +1056,9 @@ class MarkdownElementExtractor(ElementExtractor):
1006
1056
  except Exception as e:
1007
1057
  log_debug(f"Failed to extract list: {e}")
1008
1058
 
1009
- def _extract_pipe_tables(self, root_node: "tree_sitter.Node", tables: list[MarkdownElement]) -> None:
1059
+ def _extract_pipe_tables(
1060
+ self, root_node: "tree_sitter.Node", tables: list[MarkdownElement]
1061
+ ) -> None:
1010
1062
  """Extract pipe tables"""
1011
1063
  for node in self._traverse_nodes(root_node):
1012
1064
  if node.type == "pipe_table":
@@ -1014,23 +1066,31 @@ class MarkdownElementExtractor(ElementExtractor):
1014
1066
  start_line = node.start_point[0] + 1
1015
1067
  end_line = node.end_point[0] + 1
1016
1068
  raw_text = self._get_node_text_optimized(node)
1017
-
1069
+
1018
1070
  # Count rows and columns
1019
1071
  lines = raw_text.strip().split("\n")
1020
- row_count = len([line for line in lines if line.strip() and not line.strip().startswith("|---")])
1021
-
1072
+ row_count = len(
1073
+ [
1074
+ line
1075
+ for line in lines
1076
+ if line.strip() and not line.strip().startswith("|---")
1077
+ ]
1078
+ )
1079
+
1022
1080
  # Count columns from first row
1023
1081
  column_count = 0
1024
1082
  if lines:
1025
1083
  first_row = lines[0]
1026
- column_count = len([col for col in first_row.split("|") if col.strip()])
1027
-
1084
+ column_count = len(
1085
+ [col for col in first_row.split("|") if col.strip()]
1086
+ )
1087
+
1028
1088
  table = MarkdownElement(
1029
1089
  name=f"Table ({row_count} rows, {column_count} columns)",
1030
1090
  start_line=start_line,
1031
1091
  end_line=end_line,
1032
1092
  raw_text=raw_text,
1033
- element_type="table"
1093
+ element_type="table",
1034
1094
  )
1035
1095
  # Add additional attributes for formatter
1036
1096
  table.row_count = row_count
@@ -1040,10 +1100,12 @@ class MarkdownElementExtractor(ElementExtractor):
1040
1100
  except Exception as e:
1041
1101
  log_debug(f"Failed to extract pipe table: {e}")
1042
1102
 
1043
- def _extract_block_quotes(self, root_node: "tree_sitter.Node", blockquotes: list[MarkdownElement]) -> None:
1103
+ def _extract_block_quotes(
1104
+ self, root_node: "tree_sitter.Node", blockquotes: list[MarkdownElement]
1105
+ ) -> None:
1044
1106
  """Extract blockquotes"""
1045
1107
  import re
1046
-
1108
+
1047
1109
  # Blockquotes are often represented as paragraphs starting with >
1048
1110
  for node in self._traverse_nodes(root_node):
1049
1111
  if node.type == "block_quote":
@@ -1051,22 +1113,24 @@ class MarkdownElementExtractor(ElementExtractor):
1051
1113
  start_line = node.start_point[0] + 1
1052
1114
  end_line = node.end_point[0] + 1
1053
1115
  raw_text = self._get_node_text_optimized(node)
1054
-
1116
+
1055
1117
  # Extract content without > markers
1056
1118
  lines = raw_text.strip().split("\n")
1057
1119
  content_lines = []
1058
1120
  for line in lines:
1059
1121
  # Remove > marker and optional space
1060
- cleaned = re.sub(r'^>\s?', '', line)
1122
+ cleaned = re.sub(r"^>\s?", "", line)
1061
1123
  content_lines.append(cleaned)
1062
1124
  content = "\n".join(content_lines).strip()
1063
-
1125
+
1064
1126
  blockquote = MarkdownElement(
1065
- name=f"Blockquote: {content[:50]}..." if len(content) > 50 else f"Blockquote: {content}",
1127
+ name=f"Blockquote: {content[:50]}..."
1128
+ if len(content) > 50
1129
+ else f"Blockquote: {content}",
1066
1130
  start_line=start_line,
1067
1131
  end_line=end_line,
1068
1132
  raw_text=raw_text,
1069
- element_type="blockquote"
1133
+ element_type="blockquote",
1070
1134
  )
1071
1135
  blockquote.type = "blockquote"
1072
1136
  blockquote.text = content
@@ -1074,7 +1138,9 @@ class MarkdownElementExtractor(ElementExtractor):
1074
1138
  except Exception as e:
1075
1139
  log_debug(f"Failed to extract blockquote: {e}")
1076
1140
 
1077
- def _extract_thematic_breaks(self, root_node: "tree_sitter.Node", horizontal_rules: list[MarkdownElement]) -> None:
1141
+ def _extract_thematic_breaks(
1142
+ self, root_node: "tree_sitter.Node", horizontal_rules: list[MarkdownElement]
1143
+ ) -> None:
1078
1144
  """Extract thematic breaks (horizontal rules)"""
1079
1145
  for node in self._traverse_nodes(root_node):
1080
1146
  if node.type == "thematic_break":
@@ -1082,20 +1148,22 @@ class MarkdownElementExtractor(ElementExtractor):
1082
1148
  start_line = node.start_point[0] + 1
1083
1149
  end_line = node.end_point[0] + 1
1084
1150
  raw_text = self._get_node_text_optimized(node)
1085
-
1151
+
1086
1152
  hr = MarkdownElement(
1087
1153
  name="Horizontal Rule",
1088
1154
  start_line=start_line,
1089
1155
  end_line=end_line,
1090
1156
  raw_text=raw_text,
1091
- element_type="horizontal_rule"
1157
+ element_type="horizontal_rule",
1092
1158
  )
1093
1159
  hr.type = "horizontal_rule"
1094
1160
  horizontal_rules.append(hr)
1095
1161
  except Exception as e:
1096
1162
  log_debug(f"Failed to extract horizontal rule: {e}")
1097
1163
 
1098
- def _extract_html_blocks(self, root_node: "tree_sitter.Node", html_elements: list[MarkdownElement]) -> None:
1164
+ def _extract_html_blocks(
1165
+ self, root_node: "tree_sitter.Node", html_elements: list[MarkdownElement]
1166
+ ) -> None:
1099
1167
  """Extract HTML block elements"""
1100
1168
  for node in self._traverse_nodes(root_node):
1101
1169
  if node.type == "html_block":
@@ -1103,28 +1171,31 @@ class MarkdownElementExtractor(ElementExtractor):
1103
1171
  start_line = node.start_point[0] + 1
1104
1172
  end_line = node.end_point[0] + 1
1105
1173
  raw_text = self._get_node_text_optimized(node)
1106
-
1174
+
1107
1175
  # Extract tag name if possible
1108
1176
  import re
1109
- tag_match = re.search(r'<(\w+)', raw_text)
1177
+
1178
+ tag_match = re.search(r"<(\w+)", raw_text)
1110
1179
  tag_name = tag_match.group(1) if tag_match else "HTML"
1111
-
1180
+
1112
1181
  html_element = MarkdownElement(
1113
1182
  name=f"HTML Block: {tag_name}",
1114
1183
  start_line=start_line,
1115
1184
  end_line=end_line,
1116
1185
  raw_text=raw_text,
1117
- element_type="html_block"
1186
+ element_type="html_block",
1118
1187
  )
1119
1188
  html_element.type = "html_block"
1120
1189
  html_elements.append(html_element)
1121
1190
  except Exception as e:
1122
1191
  log_debug(f"Failed to extract HTML block: {e}")
1123
1192
 
1124
- def _extract_inline_html(self, root_node: "tree_sitter.Node", html_elements: list[MarkdownElement]) -> None:
1193
+ def _extract_inline_html(
1194
+ self, root_node: "tree_sitter.Node", html_elements: list[MarkdownElement]
1195
+ ) -> None:
1125
1196
  """Extract inline HTML elements"""
1126
1197
  import re
1127
-
1198
+
1128
1199
  # Look for HTML tags in inline content
1129
1200
  for node in self._traverse_nodes(root_node):
1130
1201
  if node.type == "inline":
@@ -1132,220 +1203,230 @@ class MarkdownElementExtractor(ElementExtractor):
1132
1203
  raw_text = self._get_node_text_optimized(node)
1133
1204
  if not raw_text:
1134
1205
  continue
1135
-
1206
+
1136
1207
  # Pattern for HTML tags (excluding autolinks)
1137
1208
  # Exclude autolink patterns: <url> or <email>
1138
- html_pattern = r'<(?!(?:https?://|mailto:|[^@\s]+@[^@\s]+\.[^@\s]+)[^>]*>)[^>]+>'
1209
+ html_pattern = r"<(?!(?:https?://|mailto:|[^@\s]+@[^@\s]+\.[^@\s]+)[^>]*>)[^>]+>"
1139
1210
  matches = re.finditer(html_pattern, raw_text)
1140
-
1211
+
1141
1212
  for match in matches:
1142
1213
  tag_text = match.group(0)
1143
-
1214
+
1144
1215
  # Extract tag name
1145
- tag_match = re.search(r'<(\w+)', tag_text)
1216
+ tag_match = re.search(r"<(\w+)", tag_text)
1146
1217
  tag_name = tag_match.group(1) if tag_match else "HTML"
1147
-
1218
+
1148
1219
  start_line = node.start_point[0] + 1
1149
1220
  end_line = node.end_point[0] + 1
1150
-
1221
+
1151
1222
  html_element = MarkdownElement(
1152
1223
  name=f"HTML Tag: {tag_name}",
1153
1224
  start_line=start_line,
1154
1225
  end_line=end_line,
1155
1226
  raw_text=tag_text,
1156
- element_type="html_inline"
1227
+ element_type="html_inline",
1157
1228
  )
1158
1229
  html_element.type = "html_inline"
1159
1230
  html_element.name = tag_name # Set name attribute for formatter
1160
1231
  html_elements.append(html_element)
1161
-
1232
+
1162
1233
  except Exception as e:
1163
1234
  log_debug(f"Failed to extract inline HTML: {e}")
1164
1235
 
1165
- def _extract_emphasis_elements(self, root_node: "tree_sitter.Node", formatting_elements: list[MarkdownElement]) -> None:
1236
+ def _extract_emphasis_elements(
1237
+ self, root_node: "tree_sitter.Node", formatting_elements: list[MarkdownElement]
1238
+ ) -> None:
1166
1239
  """Extract emphasis and strong emphasis elements"""
1167
1240
  import re
1168
-
1241
+
1169
1242
  for node in self._traverse_nodes(root_node):
1170
1243
  if node.type == "inline":
1171
1244
  try:
1172
1245
  raw_text = self._get_node_text_optimized(node)
1173
1246
  if not raw_text:
1174
1247
  continue
1175
-
1248
+
1176
1249
  # Pattern for bold text: **text** or __text__
1177
- bold_pattern = r'\*\*([^*]+)\*\*|__([^_]+)__'
1250
+ bold_pattern = r"\*\*([^*]+)\*\*|__([^_]+)__"
1178
1251
  bold_matches = re.finditer(bold_pattern, raw_text)
1179
-
1252
+
1180
1253
  for match in bold_matches:
1181
1254
  content = match.group(1) or match.group(2) or ""
1182
1255
  start_line = node.start_point[0] + 1
1183
1256
  end_line = node.end_point[0] + 1
1184
-
1257
+
1185
1258
  bold_element = MarkdownElement(
1186
1259
  name=f"Bold: {content}",
1187
1260
  start_line=start_line,
1188
1261
  end_line=end_line,
1189
1262
  raw_text=match.group(0),
1190
- element_type="strong_emphasis"
1263
+ element_type="strong_emphasis",
1191
1264
  )
1192
1265
  bold_element.type = "strong_emphasis"
1193
1266
  bold_element.text = content
1194
1267
  formatting_elements.append(bold_element)
1195
-
1268
+
1196
1269
  # Pattern for italic text: *text* or _text_ (but not **text** or __text__)
1197
- italic_pattern = r'(?<!\*)\*([^*]+)\*(?!\*)|(?<!_)_([^_]+)_(?!_)'
1270
+ italic_pattern = r"(?<!\*)\*([^*]+)\*(?!\*)|(?<!_)_([^_]+)_(?!_)"
1198
1271
  italic_matches = re.finditer(italic_pattern, raw_text)
1199
-
1272
+
1200
1273
  for match in italic_matches:
1201
1274
  content = match.group(1) or match.group(2) or ""
1202
1275
  start_line = node.start_point[0] + 1
1203
1276
  end_line = node.end_point[0] + 1
1204
-
1277
+
1205
1278
  italic_element = MarkdownElement(
1206
1279
  name=f"Italic: {content}",
1207
1280
  start_line=start_line,
1208
1281
  end_line=end_line,
1209
1282
  raw_text=match.group(0),
1210
- element_type="emphasis"
1283
+ element_type="emphasis",
1211
1284
  )
1212
1285
  italic_element.type = "emphasis"
1213
1286
  italic_element.text = content
1214
1287
  formatting_elements.append(italic_element)
1215
-
1288
+
1216
1289
  except Exception as e:
1217
1290
  log_debug(f"Failed to extract emphasis elements: {e}")
1218
1291
 
1219
- def _extract_inline_code_spans(self, root_node: "tree_sitter.Node", formatting_elements: list[MarkdownElement]) -> None:
1292
+ def _extract_inline_code_spans(
1293
+ self, root_node: "tree_sitter.Node", formatting_elements: list[MarkdownElement]
1294
+ ) -> None:
1220
1295
  """Extract inline code spans"""
1221
1296
  import re
1222
-
1297
+
1223
1298
  for node in self._traverse_nodes(root_node):
1224
1299
  if node.type == "inline":
1225
1300
  try:
1226
1301
  raw_text = self._get_node_text_optimized(node)
1227
1302
  if not raw_text:
1228
1303
  continue
1229
-
1304
+
1230
1305
  # Pattern for inline code: `code`
1231
- code_pattern = r'`([^`]+)`'
1306
+ code_pattern = r"`([^`]+)`"
1232
1307
  matches = re.finditer(code_pattern, raw_text)
1233
-
1308
+
1234
1309
  for match in matches:
1235
1310
  content = match.group(1) or ""
1236
1311
  start_line = node.start_point[0] + 1
1237
1312
  end_line = node.end_point[0] + 1
1238
-
1313
+
1239
1314
  code_element = MarkdownElement(
1240
1315
  name=f"Inline Code: {content}",
1241
1316
  start_line=start_line,
1242
1317
  end_line=end_line,
1243
1318
  raw_text=match.group(0),
1244
- element_type="inline_code"
1319
+ element_type="inline_code",
1245
1320
  )
1246
1321
  code_element.type = "inline_code"
1247
1322
  code_element.text = content
1248
1323
  formatting_elements.append(code_element)
1249
-
1324
+
1250
1325
  except Exception as e:
1251
1326
  log_debug(f"Failed to extract inline code: {e}")
1252
1327
 
1253
- def _extract_strikethrough_elements(self, root_node: "tree_sitter.Node", formatting_elements: list[MarkdownElement]) -> None:
1328
+ def _extract_strikethrough_elements(
1329
+ self, root_node: "tree_sitter.Node", formatting_elements: list[MarkdownElement]
1330
+ ) -> None:
1254
1331
  """Extract strikethrough elements"""
1255
1332
  import re
1256
-
1333
+
1257
1334
  for node in self._traverse_nodes(root_node):
1258
1335
  if node.type == "inline":
1259
1336
  try:
1260
1337
  raw_text = self._get_node_text_optimized(node)
1261
1338
  if not raw_text:
1262
1339
  continue
1263
-
1340
+
1264
1341
  # Pattern for strikethrough: ~~text~~
1265
- strike_pattern = r'~~([^~]+)~~'
1342
+ strike_pattern = r"~~([^~]+)~~"
1266
1343
  matches = re.finditer(strike_pattern, raw_text)
1267
-
1344
+
1268
1345
  for match in matches:
1269
1346
  content = match.group(1) or ""
1270
1347
  start_line = node.start_point[0] + 1
1271
1348
  end_line = node.end_point[0] + 1
1272
-
1349
+
1273
1350
  strike_element = MarkdownElement(
1274
1351
  name=f"Strikethrough: {content}",
1275
1352
  start_line=start_line,
1276
1353
  end_line=end_line,
1277
1354
  raw_text=match.group(0),
1278
- element_type="strikethrough"
1355
+ element_type="strikethrough",
1279
1356
  )
1280
1357
  strike_element.type = "strikethrough"
1281
1358
  strike_element.text = content
1282
1359
  formatting_elements.append(strike_element)
1283
-
1360
+
1284
1361
  except Exception as e:
1285
1362
  log_debug(f"Failed to extract strikethrough: {e}")
1286
1363
 
1287
- def _extract_footnote_elements(self, root_node: "tree_sitter.Node", footnotes: list[MarkdownElement]) -> None:
1364
+ def _extract_footnote_elements(
1365
+ self, root_node: "tree_sitter.Node", footnotes: list[MarkdownElement]
1366
+ ) -> None:
1288
1367
  """Extract footnote elements"""
1289
1368
  import re
1290
-
1369
+
1291
1370
  for node in self._traverse_nodes(root_node):
1292
1371
  if node.type == "inline":
1293
1372
  try:
1294
1373
  raw_text = self._get_node_text_optimized(node)
1295
1374
  if not raw_text:
1296
1375
  continue
1297
-
1376
+
1298
1377
  # Pattern for footnote references: [^1]
1299
- footnote_ref_pattern = r'\[\^([^\]]+)\]'
1378
+ footnote_ref_pattern = r"\[\^([^\]]+)\]"
1300
1379
  matches = re.finditer(footnote_ref_pattern, raw_text)
1301
-
1380
+
1302
1381
  for match in matches:
1303
1382
  ref_id = match.group(1) or ""
1304
1383
  start_line = node.start_point[0] + 1
1305
1384
  end_line = node.end_point[0] + 1
1306
-
1385
+
1307
1386
  footnote_element = MarkdownElement(
1308
1387
  name=f"Footnote Reference: {ref_id}",
1309
1388
  start_line=start_line,
1310
1389
  end_line=end_line,
1311
1390
  raw_text=match.group(0),
1312
- element_type="footnote_reference"
1391
+ element_type="footnote_reference",
1313
1392
  )
1314
1393
  footnote_element.type = "footnote_reference"
1315
1394
  footnote_element.text = ref_id
1316
1395
  footnotes.append(footnote_element)
1317
-
1396
+
1318
1397
  except Exception as e:
1319
1398
  log_debug(f"Failed to extract footnote reference: {e}")
1320
-
1399
+
1321
1400
  # Look for footnote definitions
1322
1401
  elif node.type == "paragraph":
1323
1402
  try:
1324
1403
  raw_text = self._get_node_text_optimized(node)
1325
1404
  if not raw_text:
1326
1405
  continue
1327
-
1406
+
1328
1407
  # Pattern for footnote definitions: [^1]: content
1329
- footnote_def_pattern = r'^\[\^([^\]]+)\]:\s*(.+)$'
1330
- match = re.match(footnote_def_pattern, raw_text.strip(), re.MULTILINE)
1331
-
1408
+ footnote_def_pattern = r"^\[\^([^\]]+)\]:\s*(.+)$"
1409
+ match = re.match(
1410
+ footnote_def_pattern, raw_text.strip(), re.MULTILINE
1411
+ )
1412
+
1332
1413
  if match:
1333
1414
  ref_id = match.group(1) or ""
1334
1415
  content = match.group(2) or ""
1335
1416
  start_line = node.start_point[0] + 1
1336
1417
  end_line = node.end_point[0] + 1
1337
-
1418
+
1338
1419
  footnote_element = MarkdownElement(
1339
1420
  name=f"Footnote Definition: {ref_id}",
1340
1421
  start_line=start_line,
1341
1422
  end_line=end_line,
1342
1423
  raw_text=raw_text,
1343
- element_type="footnote_definition"
1424
+ element_type="footnote_definition",
1344
1425
  )
1345
1426
  footnote_element.type = "footnote_definition"
1346
1427
  footnote_element.text = content
1347
1428
  footnotes.append(footnote_element)
1348
-
1429
+
1349
1430
  except Exception as e:
1350
1431
  log_debug(f"Failed to extract footnote definition: {e}")
1351
1432
 
@@ -1358,33 +1439,33 @@ class MarkdownElementExtractor(ElementExtractor):
1358
1439
  def _parse_link_components(self, raw_text: str) -> tuple[str, str, str]:
1359
1440
  """Parse link components from raw text"""
1360
1441
  import re
1361
-
1442
+
1362
1443
  # Pattern for [text](url "title")
1363
1444
  pattern = r'\[([^\]]*)\]\(([^)]*?)(?:\s+"([^"]*)")?\)'
1364
1445
  match = re.search(pattern, raw_text)
1365
-
1446
+
1366
1447
  if match:
1367
1448
  text = match.group(1) or ""
1368
1449
  url = match.group(2) or ""
1369
1450
  title = match.group(3) or ""
1370
1451
  return text, url, title
1371
-
1452
+
1372
1453
  return "", "", ""
1373
1454
 
1374
1455
  def _parse_image_components(self, raw_text: str) -> tuple[str, str, str]:
1375
1456
  """Parse image components from raw text"""
1376
1457
  import re
1377
-
1458
+
1378
1459
  # Pattern for ![alt](url "title")
1379
1460
  pattern = r'!\[([^\]]*)\]\(([^)]*?)(?:\s+"([^"]*)")?\)'
1380
1461
  match = re.search(pattern, raw_text)
1381
-
1462
+
1382
1463
  if match:
1383
1464
  alt_text = match.group(1) or ""
1384
1465
  url = match.group(2) or ""
1385
1466
  title = match.group(3) or ""
1386
1467
  return alt_text, url, title
1387
-
1468
+
1388
1469
  return "", "", ""
1389
1470
 
1390
1471
 
@@ -1396,7 +1477,7 @@ class MarkdownPlugin(LanguagePlugin):
1396
1477
  super().__init__()
1397
1478
  self._language_cache: tree_sitter.Language | None = None
1398
1479
  self._extractor: MarkdownElementExtractor = MarkdownElementExtractor()
1399
-
1480
+
1400
1481
  # Legacy compatibility attributes for tests
1401
1482
  self.language = "markdown"
1402
1483
  self.extractor = self._extractor
@@ -1421,22 +1502,30 @@ class MarkdownPlugin(LanguagePlugin):
1421
1502
  """Get the language name for Markdown (legacy compatibility)"""
1422
1503
  return "markdown"
1423
1504
 
1424
- def extract_functions(self, tree: "tree_sitter.Tree", source_code: str) -> list[CodeElement]:
1505
+ def extract_functions(
1506
+ self, tree: "tree_sitter.Tree", source_code: str
1507
+ ) -> list[CodeElement]:
1425
1508
  """Extract functions from the tree (legacy compatibility)"""
1426
1509
  extractor = self.get_extractor()
1427
1510
  return extractor.extract_functions(tree, source_code)
1428
1511
 
1429
- def extract_classes(self, tree: "tree_sitter.Tree", source_code: str) -> list[CodeElement]:
1512
+ def extract_classes(
1513
+ self, tree: "tree_sitter.Tree", source_code: str
1514
+ ) -> list[CodeElement]:
1430
1515
  """Extract classes from the tree (legacy compatibility)"""
1431
1516
  extractor = self.get_extractor()
1432
1517
  return extractor.extract_classes(tree, source_code)
1433
1518
 
1434
- def extract_variables(self, tree: "tree_sitter.Tree", source_code: str) -> list[CodeElement]:
1519
+ def extract_variables(
1520
+ self, tree: "tree_sitter.Tree", source_code: str
1521
+ ) -> list[CodeElement]:
1435
1522
  """Extract variables from the tree (legacy compatibility)"""
1436
1523
  extractor = self.get_extractor()
1437
1524
  return extractor.extract_variables(tree, source_code)
1438
1525
 
1439
- def extract_imports(self, tree: "tree_sitter.Tree", source_code: str) -> list[CodeElement]:
1526
+ def extract_imports(
1527
+ self, tree: "tree_sitter.Tree", source_code: str
1528
+ ) -> list[CodeElement]:
1440
1529
  """Extract imports from the tree (legacy compatibility)"""
1441
1530
  extractor = self.get_extractor()
1442
1531
  return extractor.extract_imports(tree, source_code)
@@ -1564,7 +1653,7 @@ class MarkdownPlugin(LanguagePlugin):
1564
1653
  references = extractor.extract_references(tree, source_code)
1565
1654
  lists = extractor.extract_lists(tree, source_code)
1566
1655
  tables = extractor.extract_tables(tree, source_code)
1567
-
1656
+
1568
1657
  # Extract new element types
1569
1658
  blockquotes = extractor.extract_blockquotes(tree, source_code)
1570
1659
  horizontal_rules = extractor.extract_horizontal_rules(tree, source_code)
@@ -1611,8 +1700,6 @@ class MarkdownPlugin(LanguagePlugin):
1611
1700
  def execute_query(self, tree: "tree_sitter.Tree", query_name: str) -> dict:
1612
1701
  """Execute a specific query on the tree"""
1613
1702
  try:
1614
- import tree_sitter
1615
-
1616
1703
  language = self.get_tree_sitter_language()
1617
1704
  if not language:
1618
1705
  return {"error": "Language not available"}
@@ -1629,7 +1716,11 @@ class MarkdownPlugin(LanguagePlugin):
1629
1716
  captures = TreeSitterQueryCompat.safe_execute_query(
1630
1717
  language, query_string, tree.root_node, fallback_result=[]
1631
1718
  )
1632
- return {"captures": captures, "query": query_string, "matches": len(captures)}
1719
+ return {
1720
+ "captures": captures,
1721
+ "query": query_string,
1722
+ "matches": len(captures),
1723
+ }
1633
1724
 
1634
1725
  except Exception as e:
1635
1726
  log_error(f"Query execution failed: {e}")
@@ -1639,7 +1730,7 @@ class MarkdownPlugin(LanguagePlugin):
1639
1730
  """Extract elements from source code using tree-sitter AST"""
1640
1731
  extractor = self.get_extractor()
1641
1732
  elements = []
1642
-
1733
+
1643
1734
  try:
1644
1735
  elements.extend(extractor.extract_headers(tree, source_code))
1645
1736
  elements.extend(extractor.extract_code_blocks(tree, source_code))
@@ -1655,10 +1746,12 @@ class MarkdownPlugin(LanguagePlugin):
1655
1746
  elements.extend(extractor.extract_footnotes(tree, source_code))
1656
1747
  except Exception as e:
1657
1748
  log_error(f"Failed to extract elements: {e}")
1658
-
1749
+
1659
1750
  return elements
1660
1751
 
1661
- def execute_query_strategy(self, tree: "tree_sitter.Tree", source_code: str, query_key: str) -> list[CodeElement]:
1752
+ def execute_query_strategy(
1753
+ self, tree: "tree_sitter.Tree", source_code: str, query_key: str
1754
+ ) -> list[CodeElement]:
1662
1755
  """Execute Markdown-specific query strategy based on query_key"""
1663
1756
  if not tree or not source_code:
1664
1757
  return []
@@ -1674,54 +1767,82 @@ class MarkdownPlugin(LanguagePlugin):
1674
1767
  "function": lambda: self._extractor.extract_headers(tree, source_code),
1675
1768
  "headers": lambda: self._extractor.extract_headers(tree, source_code),
1676
1769
  "heading": lambda: self._extractor.extract_headers(tree, source_code),
1677
-
1678
1770
  # Code block-related queries (mapped to classes)
1679
1771
  "class": lambda: self._extractor.extract_code_blocks(tree, source_code),
1680
- "code_blocks": lambda: self._extractor.extract_code_blocks(tree, source_code),
1681
- "code_block": lambda: self._extractor.extract_code_blocks(tree, source_code),
1682
-
1772
+ "code_blocks": lambda: self._extractor.extract_code_blocks(
1773
+ tree, source_code
1774
+ ),
1775
+ "code_block": lambda: self._extractor.extract_code_blocks(
1776
+ tree, source_code
1777
+ ),
1683
1778
  # Link and image queries (mapped to variables)
1684
- "variable": lambda: self._extractor.extract_links(tree, source_code) + self._extractor.extract_images(tree, source_code),
1779
+ "variable": lambda: self._extractor.extract_links(tree, source_code)
1780
+ + self._extractor.extract_images(tree, source_code),
1685
1781
  "links": lambda: self._extractor.extract_links(tree, source_code),
1686
1782
  "link": lambda: self._extractor.extract_links(tree, source_code),
1687
1783
  "images": lambda: self._extractor.extract_images(tree, source_code),
1688
1784
  "image": lambda: self._extractor.extract_images(tree, source_code),
1689
-
1690
1785
  # Reference queries (mapped to imports)
1691
1786
  "import": lambda: self._extractor.extract_references(tree, source_code),
1692
1787
  "references": lambda: self._extractor.extract_references(tree, source_code),
1693
1788
  "reference": lambda: self._extractor.extract_references(tree, source_code),
1694
-
1695
1789
  # List and table queries
1696
1790
  "lists": lambda: self._extractor.extract_lists(tree, source_code),
1697
1791
  "list": lambda: self._extractor.extract_lists(tree, source_code),
1698
- "task_lists": lambda: [l for l in self._extractor.extract_lists(tree, source_code) if getattr(l, 'element_type', '') == 'task_list'],
1792
+ "task_lists": lambda: [
1793
+ lst
1794
+ for lst in self._extractor.extract_lists(tree, source_code)
1795
+ if getattr(lst, "element_type", "") == "task_list"
1796
+ ],
1699
1797
  "tables": lambda: self._extractor.extract_tables(tree, source_code),
1700
1798
  "table": lambda: self._extractor.extract_tables(tree, source_code),
1701
-
1702
1799
  # Content structure queries
1703
- "blockquotes": lambda: self._extractor.extract_blockquotes(tree, source_code),
1704
- "blockquote": lambda: self._extractor.extract_blockquotes(tree, source_code),
1705
- "horizontal_rules": lambda: self._extractor.extract_horizontal_rules(tree, source_code),
1706
- "horizontal_rule": lambda: self._extractor.extract_horizontal_rules(tree, source_code),
1707
-
1800
+ "blockquotes": lambda: self._extractor.extract_blockquotes(
1801
+ tree, source_code
1802
+ ),
1803
+ "blockquote": lambda: self._extractor.extract_blockquotes(
1804
+ tree, source_code
1805
+ ),
1806
+ "horizontal_rules": lambda: self._extractor.extract_horizontal_rules(
1807
+ tree, source_code
1808
+ ),
1809
+ "horizontal_rule": lambda: self._extractor.extract_horizontal_rules(
1810
+ tree, source_code
1811
+ ),
1708
1812
  # HTML and formatting queries
1709
- "html_blocks": lambda: self._extractor.extract_html_elements(tree, source_code),
1710
- "html_block": lambda: self._extractor.extract_html_elements(tree, source_code),
1813
+ "html_blocks": lambda: self._extractor.extract_html_elements(
1814
+ tree, source_code
1815
+ ),
1816
+ "html_block": lambda: self._extractor.extract_html_elements(
1817
+ tree, source_code
1818
+ ),
1711
1819
  "html": lambda: self._extractor.extract_html_elements(tree, source_code),
1712
- "emphasis": lambda: self._extractor.extract_text_formatting(tree, source_code),
1713
- "formatting": lambda: self._extractor.extract_text_formatting(tree, source_code),
1714
- "text_formatting": lambda: self._extractor.extract_text_formatting(tree, source_code),
1715
- "inline_code": lambda: [f for f in self._extractor.extract_text_formatting(tree, source_code) if getattr(f, 'element_type', '') == 'inline_code'],
1716
- "strikethrough": lambda: [f for f in self._extractor.extract_text_formatting(tree, source_code) if getattr(f, 'element_type', '') == 'strikethrough'],
1717
-
1820
+ "emphasis": lambda: self._extractor.extract_text_formatting(
1821
+ tree, source_code
1822
+ ),
1823
+ "formatting": lambda: self._extractor.extract_text_formatting(
1824
+ tree, source_code
1825
+ ),
1826
+ "text_formatting": lambda: self._extractor.extract_text_formatting(
1827
+ tree, source_code
1828
+ ),
1829
+ "inline_code": lambda: [
1830
+ f
1831
+ for f in self._extractor.extract_text_formatting(tree, source_code)
1832
+ if getattr(f, "element_type", "") == "inline_code"
1833
+ ],
1834
+ "strikethrough": lambda: [
1835
+ f
1836
+ for f in self._extractor.extract_text_formatting(tree, source_code)
1837
+ if getattr(f, "element_type", "") == "strikethrough"
1838
+ ],
1718
1839
  # Footnote queries
1719
1840
  "footnotes": lambda: self._extractor.extract_footnotes(tree, source_code),
1720
1841
  "footnote": lambda: self._extractor.extract_footnotes(tree, source_code),
1721
-
1722
1842
  # Comprehensive queries
1723
1843
  "all_elements": lambda: self.extract_elements(tree, source_code),
1724
- "text_content": lambda: self._extractor.extract_headers(tree, source_code) + self._extractor.extract_text_formatting(tree, source_code),
1844
+ "text_content": lambda: self._extractor.extract_headers(tree, source_code)
1845
+ + self._extractor.extract_text_formatting(tree, source_code),
1725
1846
  }
1726
1847
 
1727
1848
  # Execute the appropriate extraction method
@@ -1739,152 +1860,70 @@ class MarkdownPlugin(LanguagePlugin):
1739
1860
  """Get Markdown element categories mapping query_key to node_types"""
1740
1861
  return {
1741
1862
  # Header categories (function-like)
1742
- "function": [
1743
- "atx_heading",
1744
- "setext_heading"
1745
- ],
1746
- "headers": [
1747
- "atx_heading",
1748
- "setext_heading"
1749
- ],
1750
- "heading": [
1751
- "atx_heading",
1752
- "setext_heading"
1753
- ],
1754
-
1863
+ "function": ["atx_heading", "setext_heading"],
1864
+ "headers": ["atx_heading", "setext_heading"],
1865
+ "heading": ["atx_heading", "setext_heading"],
1755
1866
  # Code block categories (class-like)
1756
- "class": [
1757
- "fenced_code_block",
1758
- "indented_code_block"
1759
- ],
1760
- "code_blocks": [
1761
- "fenced_code_block",
1762
- "indented_code_block"
1763
- ],
1764
- "code_block": [
1765
- "fenced_code_block",
1766
- "indented_code_block"
1767
- ],
1768
-
1867
+ "class": ["fenced_code_block", "indented_code_block"],
1868
+ "code_blocks": ["fenced_code_block", "indented_code_block"],
1869
+ "code_block": ["fenced_code_block", "indented_code_block"],
1769
1870
  # Link and image categories (variable-like)
1770
1871
  "variable": [
1771
1872
  "inline", # Contains links and images
1772
1873
  "link",
1773
1874
  "autolink",
1774
1875
  "reference_link",
1775
- "image"
1876
+ "image",
1776
1877
  ],
1777
1878
  "links": [
1778
1879
  "inline", # Contains inline links
1779
1880
  "link",
1780
1881
  "autolink",
1781
- "reference_link"
1782
- ],
1783
- "link": [
1784
- "inline",
1785
- "link",
1786
- "autolink",
1787
- "reference_link"
1882
+ "reference_link",
1788
1883
  ],
1884
+ "link": ["inline", "link", "autolink", "reference_link"],
1789
1885
  "images": [
1790
1886
  "inline", # Contains inline images
1791
- "image"
1792
- ],
1793
- "image": [
1794
- "inline",
1795
- "image"
1887
+ "image",
1796
1888
  ],
1797
-
1889
+ "image": ["inline", "image"],
1798
1890
  # Reference categories (import-like)
1799
- "import": [
1800
- "link_reference_definition"
1801
- ],
1802
- "references": [
1803
- "link_reference_definition"
1804
- ],
1805
- "reference": [
1806
- "link_reference_definition"
1807
- ],
1808
-
1891
+ "import": ["link_reference_definition"],
1892
+ "references": ["link_reference_definition"],
1893
+ "reference": ["link_reference_definition"],
1809
1894
  # List categories
1810
- "lists": [
1811
- "list",
1812
- "list_item"
1813
- ],
1814
- "list": [
1815
- "list",
1816
- "list_item"
1817
- ],
1818
- "task_lists": [
1819
- "list",
1820
- "list_item"
1821
- ],
1822
-
1895
+ "lists": ["list", "list_item"],
1896
+ "list": ["list", "list_item"],
1897
+ "task_lists": ["list", "list_item"],
1823
1898
  # Table categories
1824
- "tables": [
1825
- "pipe_table",
1826
- "table"
1827
- ],
1828
- "table": [
1829
- "pipe_table",
1830
- "table"
1831
- ],
1832
-
1899
+ "tables": ["pipe_table", "table"],
1900
+ "table": ["pipe_table", "table"],
1833
1901
  # Content structure categories
1834
- "blockquotes": [
1835
- "block_quote"
1836
- ],
1837
- "blockquote": [
1838
- "block_quote"
1839
- ],
1840
- "horizontal_rules": [
1841
- "thematic_break"
1842
- ],
1843
- "horizontal_rule": [
1844
- "thematic_break"
1845
- ],
1846
-
1902
+ "blockquotes": ["block_quote"],
1903
+ "blockquote": ["block_quote"],
1904
+ "horizontal_rules": ["thematic_break"],
1905
+ "horizontal_rule": ["thematic_break"],
1847
1906
  # HTML categories
1848
1907
  "html_blocks": [
1849
1908
  "html_block",
1850
- "inline" # Contains inline HTML
1851
- ],
1852
- "html_block": [
1853
- "html_block",
1854
- "inline"
1909
+ "inline", # Contains inline HTML
1855
1910
  ],
1856
- "html": [
1857
- "html_block",
1858
- "inline"
1859
- ],
1860
-
1911
+ "html_block": ["html_block", "inline"],
1912
+ "html": ["html_block", "inline"],
1861
1913
  # Text formatting categories
1862
1914
  "emphasis": [
1863
1915
  "inline" # Contains emphasis elements
1864
1916
  ],
1865
- "formatting": [
1866
- "inline"
1867
- ],
1868
- "text_formatting": [
1869
- "inline"
1870
- ],
1871
- "inline_code": [
1872
- "inline"
1873
- ],
1874
- "strikethrough": [
1875
- "inline"
1876
- ],
1877
-
1917
+ "formatting": ["inline"],
1918
+ "text_formatting": ["inline"],
1919
+ "inline_code": ["inline"],
1920
+ "strikethrough": ["inline"],
1878
1921
  # Footnote categories
1879
1922
  "footnotes": [
1880
1923
  "inline", # Contains footnote references
1881
- "paragraph" # Contains footnote definitions
1924
+ "paragraph", # Contains footnote definitions
1882
1925
  ],
1883
- "footnote": [
1884
- "inline",
1885
- "paragraph"
1886
- ],
1887
-
1926
+ "footnote": ["inline", "paragraph"],
1888
1927
  # Comprehensive categories
1889
1928
  "all_elements": [
1890
1929
  "atx_heading",
@@ -1904,12 +1943,7 @@ class MarkdownPlugin(LanguagePlugin):
1904
1943
  "block_quote",
1905
1944
  "thematic_break",
1906
1945
  "html_block",
1907
- "paragraph"
1946
+ "paragraph",
1908
1947
  ],
1909
- "text_content": [
1910
- "atx_heading",
1911
- "setext_heading",
1912
- "inline",
1913
- "paragraph"
1914
- ]
1915
- }
1948
+ "text_content": ["atx_heading", "setext_heading", "inline", "paragraph"],
1949
+ }