tree-sitter-analyzer 1.7.1__py3-none-any.whl → 1.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tree-sitter-analyzer might be problematic. Click here for more details.
- tree_sitter_analyzer/__init__.py +1 -1
- tree_sitter_analyzer/cli/commands/advanced_command.py +52 -0
- tree_sitter_analyzer/cli/commands/structure_command.py +50 -1
- tree_sitter_analyzer/cli/commands/summary_command.py +49 -0
- tree_sitter_analyzer/cli/commands/table_command.py +48 -0
- tree_sitter_analyzer/core/query_service.py +145 -5
- tree_sitter_analyzer/formatters/base_formatter.py +29 -2
- tree_sitter_analyzer/formatters/language_formatter_factory.py +83 -0
- tree_sitter_analyzer/formatters/markdown_formatter.py +426 -0
- tree_sitter_analyzer/language_detector.py +30 -0
- tree_sitter_analyzer/language_loader.py +1 -0
- tree_sitter_analyzer/languages/markdown_plugin.py +1569 -0
- tree_sitter_analyzer/languages/python_plugin.py +75 -16
- tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +184 -11
- tree_sitter_analyzer/mcp/tools/list_files_tool.py +112 -2
- tree_sitter_analyzer/mcp/tools/search_content_tool.py +210 -10
- tree_sitter_analyzer/queries/markdown.py +379 -0
- tree_sitter_analyzer/query_loader.py +1 -0
- {tree_sitter_analyzer-1.7.1.dist-info → tree_sitter_analyzer-1.7.3.dist-info}/METADATA +54 -18
- {tree_sitter_analyzer-1.7.1.dist-info → tree_sitter_analyzer-1.7.3.dist-info}/RECORD +22 -18
- {tree_sitter_analyzer-1.7.1.dist-info → tree_sitter_analyzer-1.7.3.dist-info}/entry_points.txt +1 -0
- {tree_sitter_analyzer-1.7.1.dist-info → tree_sitter_analyzer-1.7.3.dist-info}/WHEEL +0 -0
|
@@ -13,6 +13,7 @@ from pathlib import Path
|
|
|
13
13
|
from typing import Any
|
|
14
14
|
|
|
15
15
|
from ..utils.error_handler import handle_mcp_errors
|
|
16
|
+
from ..utils.file_output_manager import FileOutputManager
|
|
16
17
|
from ..utils.gitignore_detector import get_default_detector
|
|
17
18
|
from ..utils.search_cache import get_default_cache
|
|
18
19
|
from . import fd_rg_utils
|
|
@@ -36,6 +37,7 @@ class SearchContentTool(BaseMCPTool):
|
|
|
36
37
|
"""
|
|
37
38
|
super().__init__(project_root)
|
|
38
39
|
self.cache = get_default_cache() if enable_cache else None
|
|
40
|
+
self.file_output_manager = FileOutputManager(project_root)
|
|
39
41
|
|
|
40
42
|
def get_tool_definition(self) -> dict[str, Any]:
|
|
41
43
|
return {
|
|
@@ -153,6 +155,15 @@ class SearchContentTool(BaseMCPTool):
|
|
|
153
155
|
"default": False,
|
|
154
156
|
"description": "Return only the total match count as a number. Most token-efficient option for count queries. Takes priority over all other formats",
|
|
155
157
|
},
|
|
158
|
+
"output_file": {
|
|
159
|
+
"type": "string",
|
|
160
|
+
"description": "Optional filename to save output to file (extension auto-detected based on content)",
|
|
161
|
+
},
|
|
162
|
+
"suppress_output": {
|
|
163
|
+
"type": "boolean",
|
|
164
|
+
"description": "When true and output_file is specified, suppress detailed output in response to save tokens",
|
|
165
|
+
"default": False,
|
|
166
|
+
},
|
|
156
167
|
},
|
|
157
168
|
"required": ["query"],
|
|
158
169
|
"anyOf": [
|
|
@@ -312,12 +323,18 @@ class SearchContentTool(BaseMCPTool):
|
|
|
312
323
|
cached_result["cache_hit"] = True
|
|
313
324
|
return cached_result
|
|
314
325
|
|
|
315
|
-
#
|
|
316
|
-
max_count
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
326
|
+
# Handle max_count parameter properly
|
|
327
|
+
# If user specifies max_count, use it directly (with reasonable upper limit)
|
|
328
|
+
# If not specified, use None to let ripgrep return all matches (subject to hard cap later)
|
|
329
|
+
max_count = arguments.get("max_count")
|
|
330
|
+
if max_count is not None:
|
|
331
|
+
# Clamp user-specified max_count to reasonable limits
|
|
332
|
+
# Use 1 as minimum default, but respect user's small values
|
|
333
|
+
max_count = fd_rg_utils.clamp_int(
|
|
334
|
+
max_count,
|
|
335
|
+
1, # Minimum default value
|
|
336
|
+
fd_rg_utils.DEFAULT_RESULTS_LIMIT, # Upper limit for safety
|
|
337
|
+
)
|
|
321
338
|
timeout_ms = arguments.get("timeout_ms")
|
|
322
339
|
|
|
323
340
|
# Note: --files-from is not supported in this ripgrep version
|
|
@@ -461,9 +478,18 @@ class SearchContentTool(BaseMCPTool):
|
|
|
461
478
|
|
|
462
479
|
# Handle normal mode
|
|
463
480
|
matches = fd_rg_utils.parse_rg_json_lines_to_matches(out)
|
|
464
|
-
|
|
465
|
-
if
|
|
466
|
-
|
|
481
|
+
|
|
482
|
+
# Apply user-specified max_count limit if provided
|
|
483
|
+
# Note: ripgrep's -m option limits matches per file, not total matches
|
|
484
|
+
# So we need to apply the total limit here in post-processing
|
|
485
|
+
user_max_count = arguments.get("max_count")
|
|
486
|
+
if user_max_count is not None and len(matches) > user_max_count:
|
|
487
|
+
matches = matches[:user_max_count]
|
|
488
|
+
truncated = True
|
|
489
|
+
else:
|
|
490
|
+
truncated = len(matches) >= fd_rg_utils.MAX_RESULTS_HARD_CAP
|
|
491
|
+
if truncated:
|
|
492
|
+
matches = matches[: fd_rg_utils.MAX_RESULTS_HARD_CAP]
|
|
467
493
|
|
|
468
494
|
# Apply path optimization if requested
|
|
469
495
|
optimize_paths = arguments.get("optimize_paths", False)
|
|
@@ -475,6 +501,54 @@ class SearchContentTool(BaseMCPTool):
|
|
|
475
501
|
if group_by_file and matches:
|
|
476
502
|
result = fd_rg_utils.group_matches_by_file(matches)
|
|
477
503
|
|
|
504
|
+
# Handle output suppression and file output for grouped results
|
|
505
|
+
output_file = arguments.get("output_file")
|
|
506
|
+
suppress_output = arguments.get("suppress_output", False)
|
|
507
|
+
|
|
508
|
+
# Handle file output if requested
|
|
509
|
+
if output_file:
|
|
510
|
+
try:
|
|
511
|
+
# Save full result to file
|
|
512
|
+
import json
|
|
513
|
+
json_content = json.dumps(result, indent=2, ensure_ascii=False)
|
|
514
|
+
file_path = self.file_output_manager.save_to_file(
|
|
515
|
+
content=json_content,
|
|
516
|
+
base_name=output_file
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
# If suppress_output is True, return minimal response
|
|
520
|
+
if suppress_output:
|
|
521
|
+
minimal_result = {
|
|
522
|
+
"success": result.get("success", True),
|
|
523
|
+
"count": result.get("count", 0),
|
|
524
|
+
"output_file": output_file,
|
|
525
|
+
"file_saved": f"Results saved to {file_path}"
|
|
526
|
+
}
|
|
527
|
+
# Cache the full result, not the minimal one
|
|
528
|
+
if self.cache and cache_key:
|
|
529
|
+
self.cache.set(cache_key, result)
|
|
530
|
+
return minimal_result
|
|
531
|
+
else:
|
|
532
|
+
# Include file info in full response
|
|
533
|
+
result["output_file"] = output_file
|
|
534
|
+
result["file_saved"] = f"Results saved to {file_path}"
|
|
535
|
+
except Exception as e:
|
|
536
|
+
logger.error(f"Failed to save output to file: {e}")
|
|
537
|
+
result["file_save_error"] = str(e)
|
|
538
|
+
result["file_saved"] = False
|
|
539
|
+
elif suppress_output:
|
|
540
|
+
# If suppress_output is True but no output_file, remove detailed results
|
|
541
|
+
minimal_result = {
|
|
542
|
+
"success": result.get("success", True),
|
|
543
|
+
"count": result.get("count", 0),
|
|
544
|
+
"summary": result.get("summary", {}),
|
|
545
|
+
"meta": result.get("meta", {})
|
|
546
|
+
}
|
|
547
|
+
# Cache the full result, not the minimal one
|
|
548
|
+
if self.cache and cache_key:
|
|
549
|
+
self.cache.set(cache_key, result)
|
|
550
|
+
return minimal_result
|
|
551
|
+
|
|
478
552
|
# Cache the result
|
|
479
553
|
if self.cache and cache_key:
|
|
480
554
|
self.cache.set(cache_key, result)
|
|
@@ -492,6 +566,54 @@ class SearchContentTool(BaseMCPTool):
|
|
|
492
566
|
"summary": summary,
|
|
493
567
|
}
|
|
494
568
|
|
|
569
|
+
# Handle output suppression and file output for summary results
|
|
570
|
+
output_file = arguments.get("output_file")
|
|
571
|
+
suppress_output = arguments.get("suppress_output", False)
|
|
572
|
+
|
|
573
|
+
# Handle file output if requested
|
|
574
|
+
if output_file:
|
|
575
|
+
try:
|
|
576
|
+
# Save full result to file
|
|
577
|
+
import json
|
|
578
|
+
json_content = json.dumps(result, indent=2, ensure_ascii=False)
|
|
579
|
+
file_path = self.file_output_manager.save_to_file(
|
|
580
|
+
content=json_content,
|
|
581
|
+
base_name=output_file
|
|
582
|
+
)
|
|
583
|
+
|
|
584
|
+
# If suppress_output is True, return minimal response
|
|
585
|
+
if suppress_output:
|
|
586
|
+
minimal_result = {
|
|
587
|
+
"success": result.get("success", True),
|
|
588
|
+
"count": result.get("count", 0),
|
|
589
|
+
"output_file": output_file,
|
|
590
|
+
"file_saved": f"Results saved to {file_path}"
|
|
591
|
+
}
|
|
592
|
+
# Cache the full result, not the minimal one
|
|
593
|
+
if self.cache and cache_key:
|
|
594
|
+
self.cache.set(cache_key, result)
|
|
595
|
+
return minimal_result
|
|
596
|
+
else:
|
|
597
|
+
# Include file info in full response
|
|
598
|
+
result["output_file"] = output_file
|
|
599
|
+
result["file_saved"] = f"Results saved to {file_path}"
|
|
600
|
+
except Exception as e:
|
|
601
|
+
logger.error(f"Failed to save output to file: {e}")
|
|
602
|
+
result["file_save_error"] = str(e)
|
|
603
|
+
result["file_saved"] = False
|
|
604
|
+
elif suppress_output:
|
|
605
|
+
# If suppress_output is True but no output_file, remove detailed results
|
|
606
|
+
minimal_result = {
|
|
607
|
+
"success": result.get("success", True),
|
|
608
|
+
"count": result.get("count", 0),
|
|
609
|
+
"summary": result.get("summary", {}),
|
|
610
|
+
"elapsed_ms": result.get("elapsed_ms", 0)
|
|
611
|
+
}
|
|
612
|
+
# Cache the full result, not the minimal one
|
|
613
|
+
if self.cache and cache_key:
|
|
614
|
+
self.cache.set(cache_key, result)
|
|
615
|
+
return minimal_result
|
|
616
|
+
|
|
495
617
|
# Cache the result
|
|
496
618
|
if self.cache and cache_key:
|
|
497
619
|
self.cache.set(cache_key, result)
|
|
@@ -503,9 +625,87 @@ class SearchContentTool(BaseMCPTool):
|
|
|
503
625
|
"count": len(matches),
|
|
504
626
|
"truncated": truncated,
|
|
505
627
|
"elapsed_ms": elapsed_ms,
|
|
506
|
-
"results": matches,
|
|
507
628
|
}
|
|
508
629
|
|
|
630
|
+
# Handle output suppression and file output
|
|
631
|
+
output_file = arguments.get("output_file")
|
|
632
|
+
suppress_output = arguments.get("suppress_output", False)
|
|
633
|
+
|
|
634
|
+
# Always add results to the base result for file saving
|
|
635
|
+
result["results"] = matches
|
|
636
|
+
|
|
637
|
+
# Handle file output if requested
|
|
638
|
+
if output_file:
|
|
639
|
+
try:
|
|
640
|
+
# Create detailed output for file
|
|
641
|
+
file_content = {
|
|
642
|
+
"success": True,
|
|
643
|
+
"count": len(matches),
|
|
644
|
+
"truncated": truncated,
|
|
645
|
+
"elapsed_ms": elapsed_ms,
|
|
646
|
+
"results": matches,
|
|
647
|
+
"summary": fd_rg_utils.summarize_search_results(matches),
|
|
648
|
+
"grouped_by_file": fd_rg_utils.group_matches_by_file(matches)["files"] if matches else []
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
# Convert to JSON for file output
|
|
652
|
+
import json
|
|
653
|
+
json_content = json.dumps(file_content, indent=2, ensure_ascii=False)
|
|
654
|
+
|
|
655
|
+
# Save to file
|
|
656
|
+
saved_file_path = self.file_output_manager.save_to_file(
|
|
657
|
+
content=json_content,
|
|
658
|
+
base_name=output_file
|
|
659
|
+
)
|
|
660
|
+
|
|
661
|
+
result["output_file_path"] = saved_file_path
|
|
662
|
+
result["file_saved"] = True
|
|
663
|
+
|
|
664
|
+
logger.info(f"Search results saved to: {saved_file_path}")
|
|
665
|
+
|
|
666
|
+
except Exception as e:
|
|
667
|
+
logger.error(f"Failed to save output to file: {e}")
|
|
668
|
+
result["file_save_error"] = str(e)
|
|
669
|
+
result["file_saved"] = False
|
|
670
|
+
|
|
671
|
+
# Handle file output and suppression
|
|
672
|
+
output_file = arguments.get("output_file")
|
|
673
|
+
suppress_output = arguments.get("suppress_output", False)
|
|
674
|
+
|
|
675
|
+
if output_file:
|
|
676
|
+
# Save full result to file
|
|
677
|
+
import json
|
|
678
|
+
json_content = json.dumps(result, indent=2, ensure_ascii=False)
|
|
679
|
+
file_path = self.file_output_manager.save_to_file(
|
|
680
|
+
content=json_content,
|
|
681
|
+
base_name=output_file
|
|
682
|
+
)
|
|
683
|
+
|
|
684
|
+
# If suppress_output is True, return minimal response
|
|
685
|
+
if suppress_output:
|
|
686
|
+
minimal_result = {
|
|
687
|
+
"success": result.get("success", True),
|
|
688
|
+
"count": result.get("count", 0),
|
|
689
|
+
"output_file": output_file,
|
|
690
|
+
"file_saved": f"Results saved to {file_path}"
|
|
691
|
+
}
|
|
692
|
+
# Cache the full result, not the minimal one
|
|
693
|
+
if self.cache and cache_key:
|
|
694
|
+
self.cache.set(cache_key, result)
|
|
695
|
+
return minimal_result
|
|
696
|
+
else:
|
|
697
|
+
# Include file info in full response
|
|
698
|
+
result["output_file"] = output_file
|
|
699
|
+
result["file_saved"] = f"Results saved to {file_path}"
|
|
700
|
+
elif suppress_output:
|
|
701
|
+
# If suppress_output is True but no output_file, remove results from response
|
|
702
|
+
result_copy = result.copy()
|
|
703
|
+
result_copy.pop("results", None)
|
|
704
|
+
# Cache the full result, not the minimal one
|
|
705
|
+
if self.cache and cache_key:
|
|
706
|
+
self.cache.set(cache_key, result)
|
|
707
|
+
return result_copy
|
|
708
|
+
|
|
509
709
|
# Cache the result
|
|
510
710
|
if self.cache and cache_key:
|
|
511
711
|
self.cache.set(cache_key, result)
|
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Markdown Query Definitions
|
|
4
|
+
|
|
5
|
+
Tree-sitter queries for extracting Markdown elements including headers,
|
|
6
|
+
links, code blocks, lists, and other structural elements.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Dict, List
|
|
10
|
+
|
|
11
|
+
# Markdown element extraction queries
|
|
12
|
+
MARKDOWN_QUERIES: Dict[str, str] = {
|
|
13
|
+
# Headers (H1-H6)
|
|
14
|
+
"headers": """
|
|
15
|
+
(atx_heading
|
|
16
|
+
(atx_h1_marker) @h1.marker
|
|
17
|
+
heading_content: (inline) @h1.content) @h1.heading
|
|
18
|
+
|
|
19
|
+
(atx_heading
|
|
20
|
+
(atx_h2_marker) @h2.marker
|
|
21
|
+
heading_content: (inline) @h2.content) @h2.heading
|
|
22
|
+
|
|
23
|
+
(atx_heading
|
|
24
|
+
(atx_h3_marker) @h3.marker
|
|
25
|
+
heading_content: (inline) @h3.content) @h3.heading
|
|
26
|
+
|
|
27
|
+
(atx_heading
|
|
28
|
+
(atx_h4_marker) @h4.marker
|
|
29
|
+
heading_content: (inline) @h4.content) @h4.heading
|
|
30
|
+
|
|
31
|
+
(atx_heading
|
|
32
|
+
(atx_h5_marker) @h5.marker
|
|
33
|
+
heading_content: (inline) @h5.content) @h5.heading
|
|
34
|
+
|
|
35
|
+
(atx_heading
|
|
36
|
+
(atx_h6_marker) @h6.marker
|
|
37
|
+
heading_content: (inline) @h6.content) @h6.heading
|
|
38
|
+
|
|
39
|
+
(setext_heading
|
|
40
|
+
heading_content: (paragraph) @setext.content
|
|
41
|
+
(setext_h1_underline) @setext.h1) @setext.h1.heading
|
|
42
|
+
|
|
43
|
+
(setext_heading
|
|
44
|
+
heading_content: (paragraph) @setext.content
|
|
45
|
+
(setext_h2_underline) @setext.h2) @setext.h2.heading
|
|
46
|
+
""",
|
|
47
|
+
|
|
48
|
+
# Code blocks
|
|
49
|
+
"code_blocks": """
|
|
50
|
+
(fenced_code_block
|
|
51
|
+
(fenced_code_block_delimiter) @code.start
|
|
52
|
+
(info_string)? @code.language
|
|
53
|
+
(code_fence_content) @code.content
|
|
54
|
+
(fenced_code_block_delimiter) @code.end) @code.block
|
|
55
|
+
|
|
56
|
+
(indented_code_block
|
|
57
|
+
(code_fence_content) @indented_code.content) @indented_code.block
|
|
58
|
+
""",
|
|
59
|
+
|
|
60
|
+
# Inline code
|
|
61
|
+
"inline_code": """
|
|
62
|
+
(code_span
|
|
63
|
+
(code_span_delimiter) @inline_code.start
|
|
64
|
+
(code_span_content) @inline_code.content
|
|
65
|
+
(code_span_delimiter) @inline_code.end) @inline_code.span
|
|
66
|
+
""",
|
|
67
|
+
|
|
68
|
+
# Links
|
|
69
|
+
"links": """
|
|
70
|
+
(link
|
|
71
|
+
(link_text) @link.text
|
|
72
|
+
(link_destination) @link.url
|
|
73
|
+
(link_title)? @link.title) @link.element
|
|
74
|
+
|
|
75
|
+
(autolink
|
|
76
|
+
(uri_autolink) @autolink.uri) @autolink.element
|
|
77
|
+
|
|
78
|
+
(autolink
|
|
79
|
+
(email_autolink) @autolink.email) @autolink.element
|
|
80
|
+
|
|
81
|
+
(reference_link
|
|
82
|
+
(link_text) @ref_link.text
|
|
83
|
+
(link_label) @ref_link.label) @ref_link.element
|
|
84
|
+
|
|
85
|
+
(link_reference_definition
|
|
86
|
+
(link_label) @link_def.label
|
|
87
|
+
(link_destination) @link_def.url
|
|
88
|
+
(link_title)? @link_def.title) @link_def.element
|
|
89
|
+
""",
|
|
90
|
+
|
|
91
|
+
# Images
|
|
92
|
+
"images": """
|
|
93
|
+
(image
|
|
94
|
+
(image_description) @image.alt
|
|
95
|
+
(link_destination) @image.url
|
|
96
|
+
(link_title)? @image.title) @image.element
|
|
97
|
+
|
|
98
|
+
(reference_image
|
|
99
|
+
(image_description) @ref_image.alt
|
|
100
|
+
(link_label) @ref_image.label) @ref_image.element
|
|
101
|
+
""",
|
|
102
|
+
|
|
103
|
+
# Lists
|
|
104
|
+
"lists": """
|
|
105
|
+
(list
|
|
106
|
+
(list_item
|
|
107
|
+
(list_marker) @list_item.marker
|
|
108
|
+
(paragraph)? @list_item.content) @list_item.element) @list.element
|
|
109
|
+
|
|
110
|
+
(tight_list
|
|
111
|
+
(list_item
|
|
112
|
+
(list_marker) @tight_list_item.marker
|
|
113
|
+
(paragraph)? @tight_list_item.content) @tight_list_item.element) @tight_list.element
|
|
114
|
+
""",
|
|
115
|
+
|
|
116
|
+
# Emphasis and strong
|
|
117
|
+
"emphasis": """
|
|
118
|
+
(emphasis
|
|
119
|
+
(emphasis_delimiter) @emphasis.start
|
|
120
|
+
(inline) @emphasis.content
|
|
121
|
+
(emphasis_delimiter) @emphasis.end) @emphasis.element
|
|
122
|
+
|
|
123
|
+
(strong_emphasis
|
|
124
|
+
(strong_emphasis_delimiter) @strong.start
|
|
125
|
+
(inline) @strong.content
|
|
126
|
+
(strong_emphasis_delimiter) @strong.end) @strong.element
|
|
127
|
+
""",
|
|
128
|
+
|
|
129
|
+
# Blockquotes
|
|
130
|
+
"blockquotes": """
|
|
131
|
+
(block_quote
|
|
132
|
+
(block_quote_marker) @blockquote.marker
|
|
133
|
+
(paragraph) @blockquote.content) @blockquote.element
|
|
134
|
+
""",
|
|
135
|
+
|
|
136
|
+
# Tables
|
|
137
|
+
"tables": """
|
|
138
|
+
(pipe_table
|
|
139
|
+
(pipe_table_header
|
|
140
|
+
(pipe_table_cell) @table_header.cell) @table.header
|
|
141
|
+
(pipe_table_delimiter_row) @table.delimiter
|
|
142
|
+
(pipe_table_row
|
|
143
|
+
(pipe_table_cell) @table_row.cell) @table.row) @table.element
|
|
144
|
+
""",
|
|
145
|
+
|
|
146
|
+
# Horizontal rules
|
|
147
|
+
"horizontal_rules": """
|
|
148
|
+
(thematic_break) @hr.element
|
|
149
|
+
""",
|
|
150
|
+
|
|
151
|
+
# HTML blocks
|
|
152
|
+
"html_blocks": """
|
|
153
|
+
(html_block) @html.block
|
|
154
|
+
""",
|
|
155
|
+
|
|
156
|
+
# Inline HTML
|
|
157
|
+
"inline_html": """
|
|
158
|
+
(html_tag) @html.inline
|
|
159
|
+
""",
|
|
160
|
+
|
|
161
|
+
# Strikethrough (if supported)
|
|
162
|
+
"strikethrough": """
|
|
163
|
+
(strikethrough
|
|
164
|
+
(strikethrough_delimiter) @strike.start
|
|
165
|
+
(inline) @strike.content
|
|
166
|
+
(strikethrough_delimiter) @strike.end) @strike.element
|
|
167
|
+
""",
|
|
168
|
+
|
|
169
|
+
# Task lists (if supported)
|
|
170
|
+
"task_lists": """
|
|
171
|
+
(list_item
|
|
172
|
+
(list_marker) @task.marker
|
|
173
|
+
(task_list_marker_checked) @task.checked) @task.checked_item
|
|
174
|
+
|
|
175
|
+
(list_item
|
|
176
|
+
(list_marker) @task.marker
|
|
177
|
+
(task_list_marker_unchecked) @task.unchecked) @task.unchecked_item
|
|
178
|
+
""",
|
|
179
|
+
|
|
180
|
+
# Footnotes
|
|
181
|
+
"footnotes": """
|
|
182
|
+
(footnote_reference
|
|
183
|
+
(footnote_label) @footnote.ref_label) @footnote.reference
|
|
184
|
+
|
|
185
|
+
(footnote_definition
|
|
186
|
+
(footnote_label) @footnote.def_label
|
|
187
|
+
(paragraph) @footnote.content) @footnote.definition
|
|
188
|
+
""",
|
|
189
|
+
|
|
190
|
+
# All text content
|
|
191
|
+
"text_content": """
|
|
192
|
+
(paragraph
|
|
193
|
+
(inline) @text.content) @text.paragraph
|
|
194
|
+
|
|
195
|
+
(inline) @text.inline
|
|
196
|
+
""",
|
|
197
|
+
|
|
198
|
+
# Document structure
|
|
199
|
+
"document": """
|
|
200
|
+
(document) @document.root
|
|
201
|
+
""",
|
|
202
|
+
|
|
203
|
+
# All elements (comprehensive)
|
|
204
|
+
"all_elements": """
|
|
205
|
+
(atx_heading) @element.heading
|
|
206
|
+
(setext_heading) @element.heading
|
|
207
|
+
(fenced_code_block) @element.code_block
|
|
208
|
+
(indented_code_block) @element.code_block
|
|
209
|
+
(code_span) @element.inline_code
|
|
210
|
+
(link) @element.link
|
|
211
|
+
(autolink) @element.autolink
|
|
212
|
+
(reference_link) @element.ref_link
|
|
213
|
+
(image) @element.image
|
|
214
|
+
(reference_image) @element.ref_image
|
|
215
|
+
(list) @element.list
|
|
216
|
+
(tight_list) @element.list
|
|
217
|
+
(emphasis) @element.emphasis
|
|
218
|
+
(strong_emphasis) @element.strong
|
|
219
|
+
(strikethrough) @element.strikethrough
|
|
220
|
+
(block_quote) @element.blockquote
|
|
221
|
+
(pipe_table) @element.table
|
|
222
|
+
(thematic_break) @element.hr
|
|
223
|
+
(html_block) @element.html_block
|
|
224
|
+
(html_tag) @element.html_inline
|
|
225
|
+
(footnote_reference) @element.footnote_ref
|
|
226
|
+
(footnote_definition) @element.footnote_def
|
|
227
|
+
(paragraph) @element.paragraph
|
|
228
|
+
""",
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
# Query aliases for convenience
|
|
232
|
+
QUERY_ALIASES: Dict[str, str] = {
|
|
233
|
+
"heading": "headers",
|
|
234
|
+
"h1": "headers",
|
|
235
|
+
"h2": "headers",
|
|
236
|
+
"h3": "headers",
|
|
237
|
+
"h4": "headers",
|
|
238
|
+
"h5": "headers",
|
|
239
|
+
"h6": "headers",
|
|
240
|
+
"code": "code_blocks",
|
|
241
|
+
"fenced_code": "code_blocks",
|
|
242
|
+
"code_span": "inline_code",
|
|
243
|
+
"link": "links",
|
|
244
|
+
"url": "links",
|
|
245
|
+
"image": "images",
|
|
246
|
+
"img": "images",
|
|
247
|
+
"list": "lists",
|
|
248
|
+
"ul": "lists",
|
|
249
|
+
"ol": "lists",
|
|
250
|
+
"em": "emphasis",
|
|
251
|
+
"strong": "emphasis",
|
|
252
|
+
"bold": "emphasis",
|
|
253
|
+
"italic": "emphasis",
|
|
254
|
+
"quote": "blockquotes",
|
|
255
|
+
"blockquote": "blockquotes",
|
|
256
|
+
"table": "tables",
|
|
257
|
+
"hr": "horizontal_rules",
|
|
258
|
+
"html": "html_blocks",
|
|
259
|
+
"strike": "strikethrough",
|
|
260
|
+
"task": "task_lists",
|
|
261
|
+
"todo": "task_lists",
|
|
262
|
+
"footnote": "footnotes",
|
|
263
|
+
"note": "footnotes",
|
|
264
|
+
"text": "text_content",
|
|
265
|
+
"paragraph": "text_content",
|
|
266
|
+
"all": "all_elements",
|
|
267
|
+
"everything": "all_elements",
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
def get_query(query_name: str) -> str:
|
|
271
|
+
"""
|
|
272
|
+
Get a query by name, supporting aliases
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
query_name: Name of the query or alias
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
Query string
|
|
279
|
+
|
|
280
|
+
Raises:
|
|
281
|
+
KeyError: If query name is not found
|
|
282
|
+
"""
|
|
283
|
+
# Check direct queries first
|
|
284
|
+
if query_name in MARKDOWN_QUERIES:
|
|
285
|
+
return MARKDOWN_QUERIES[query_name]
|
|
286
|
+
|
|
287
|
+
# Check aliases
|
|
288
|
+
if query_name in QUERY_ALIASES:
|
|
289
|
+
actual_query = QUERY_ALIASES[query_name]
|
|
290
|
+
return MARKDOWN_QUERIES[actual_query]
|
|
291
|
+
|
|
292
|
+
raise KeyError(f"Unknown query: {query_name}")
|
|
293
|
+
|
|
294
|
+
def get_available_queries() -> List[str]:
|
|
295
|
+
"""
|
|
296
|
+
Get list of all available query names including aliases
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
List of query names
|
|
300
|
+
"""
|
|
301
|
+
queries = list(MARKDOWN_QUERIES.keys())
|
|
302
|
+
aliases = list(QUERY_ALIASES.keys())
|
|
303
|
+
return sorted(queries + aliases)
|
|
304
|
+
|
|
305
|
+
def get_query_info(query_name: str) -> Dict[str, str]:
|
|
306
|
+
"""
|
|
307
|
+
Get information about a query
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
query_name: Name of the query
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
Dictionary with query information
|
|
314
|
+
"""
|
|
315
|
+
try:
|
|
316
|
+
query_string = get_query(query_name)
|
|
317
|
+
is_alias = query_name in QUERY_ALIASES
|
|
318
|
+
actual_name = QUERY_ALIASES.get(query_name, query_name) if is_alias else query_name
|
|
319
|
+
|
|
320
|
+
return {
|
|
321
|
+
"name": query_name,
|
|
322
|
+
"actual_name": actual_name,
|
|
323
|
+
"is_alias": is_alias,
|
|
324
|
+
"query": query_string,
|
|
325
|
+
"description": _get_query_description(actual_name)
|
|
326
|
+
}
|
|
327
|
+
except KeyError:
|
|
328
|
+
return {"error": f"Query '{query_name}' not found"}
|
|
329
|
+
|
|
330
|
+
def _get_query_description(query_name: str) -> str:
|
|
331
|
+
"""Get description for a query"""
|
|
332
|
+
descriptions = {
|
|
333
|
+
"headers": "Extract all heading elements (H1-H6, both ATX and Setext styles)",
|
|
334
|
+
"code_blocks": "Extract fenced and indented code blocks",
|
|
335
|
+
"inline_code": "Extract inline code spans",
|
|
336
|
+
"links": "Extract all types of links (inline, reference, autolinks)",
|
|
337
|
+
"images": "Extract image elements (inline and reference)",
|
|
338
|
+
"lists": "Extract ordered and unordered lists",
|
|
339
|
+
"emphasis": "Extract emphasis and strong emphasis elements",
|
|
340
|
+
"blockquotes": "Extract blockquote elements",
|
|
341
|
+
"tables": "Extract pipe table elements",
|
|
342
|
+
"horizontal_rules": "Extract horizontal rule elements",
|
|
343
|
+
"html_blocks": "Extract HTML block elements",
|
|
344
|
+
"inline_html": "Extract inline HTML elements",
|
|
345
|
+
"strikethrough": "Extract strikethrough elements",
|
|
346
|
+
"task_lists": "Extract task list items (checkboxes)",
|
|
347
|
+
"footnotes": "Extract footnote references and definitions",
|
|
348
|
+
"text_content": "Extract all text content",
|
|
349
|
+
"document": "Extract document root",
|
|
350
|
+
"all_elements": "Extract all Markdown elements"
|
|
351
|
+
}
|
|
352
|
+
return descriptions.get(query_name, "No description available")
|
|
353
|
+
|
|
354
|
+
def get_all_queries() -> dict[str, str]:
|
|
355
|
+
"""
|
|
356
|
+
Get all queries for the query loader
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
Dictionary mapping query names to query strings
|
|
360
|
+
"""
|
|
361
|
+
# Combine direct queries and aliases
|
|
362
|
+
all_queries = MARKDOWN_QUERIES.copy()
|
|
363
|
+
|
|
364
|
+
# Add aliases that point to actual queries
|
|
365
|
+
for alias, target in QUERY_ALIASES.items():
|
|
366
|
+
if target in MARKDOWN_QUERIES:
|
|
367
|
+
all_queries[alias] = MARKDOWN_QUERIES[target]
|
|
368
|
+
|
|
369
|
+
return all_queries
|
|
370
|
+
|
|
371
|
+
# Export main functions and constants
|
|
372
|
+
__all__ = [
|
|
373
|
+
"MARKDOWN_QUERIES",
|
|
374
|
+
"QUERY_ALIASES",
|
|
375
|
+
"get_query",
|
|
376
|
+
"get_available_queries",
|
|
377
|
+
"get_query_info",
|
|
378
|
+
"get_all_queries"
|
|
379
|
+
]
|