tree-sitter-analyzer 1.7.1__py3-none-any.whl → 1.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

@@ -13,6 +13,7 @@ from pathlib import Path
13
13
  from typing import Any
14
14
 
15
15
  from ..utils.error_handler import handle_mcp_errors
16
+ from ..utils.file_output_manager import FileOutputManager
16
17
  from ..utils.gitignore_detector import get_default_detector
17
18
  from ..utils.search_cache import get_default_cache
18
19
  from . import fd_rg_utils
@@ -36,6 +37,7 @@ class SearchContentTool(BaseMCPTool):
36
37
  """
37
38
  super().__init__(project_root)
38
39
  self.cache = get_default_cache() if enable_cache else None
40
+ self.file_output_manager = FileOutputManager(project_root)
39
41
 
40
42
  def get_tool_definition(self) -> dict[str, Any]:
41
43
  return {
@@ -153,6 +155,15 @@ class SearchContentTool(BaseMCPTool):
153
155
  "default": False,
154
156
  "description": "Return only the total match count as a number. Most token-efficient option for count queries. Takes priority over all other formats",
155
157
  },
158
+ "output_file": {
159
+ "type": "string",
160
+ "description": "Optional filename to save output to file (extension auto-detected based on content)",
161
+ },
162
+ "suppress_output": {
163
+ "type": "boolean",
164
+ "description": "When true and output_file is specified, suppress detailed output in response to save tokens",
165
+ "default": False,
166
+ },
156
167
  },
157
168
  "required": ["query"],
158
169
  "anyOf": [
@@ -312,12 +323,18 @@ class SearchContentTool(BaseMCPTool):
312
323
  cached_result["cache_hit"] = True
313
324
  return cached_result
314
325
 
315
- # Clamp counts to safety limits
316
- max_count = fd_rg_utils.clamp_int(
317
- arguments.get("max_count"),
318
- fd_rg_utils.DEFAULT_RESULTS_LIMIT,
319
- fd_rg_utils.DEFAULT_RESULTS_LIMIT,
320
- )
326
+ # Handle max_count parameter properly
327
+ # If user specifies max_count, use it directly (with reasonable upper limit)
328
+ # If not specified, use None to let ripgrep return all matches (subject to hard cap later)
329
+ max_count = arguments.get("max_count")
330
+ if max_count is not None:
331
+ # Clamp user-specified max_count to reasonable limits
332
+ # Use 1 as minimum default, but respect user's small values
333
+ max_count = fd_rg_utils.clamp_int(
334
+ max_count,
335
+ 1, # Minimum default value
336
+ fd_rg_utils.DEFAULT_RESULTS_LIMIT, # Upper limit for safety
337
+ )
321
338
  timeout_ms = arguments.get("timeout_ms")
322
339
 
323
340
  # Note: --files-from is not supported in this ripgrep version
@@ -461,9 +478,18 @@ class SearchContentTool(BaseMCPTool):
461
478
 
462
479
  # Handle normal mode
463
480
  matches = fd_rg_utils.parse_rg_json_lines_to_matches(out)
464
- truncated = len(matches) >= fd_rg_utils.MAX_RESULTS_HARD_CAP
465
- if truncated:
466
- matches = matches[: fd_rg_utils.MAX_RESULTS_HARD_CAP]
481
+
482
+ # Apply user-specified max_count limit if provided
483
+ # Note: ripgrep's -m option limits matches per file, not total matches
484
+ # So we need to apply the total limit here in post-processing
485
+ user_max_count = arguments.get("max_count")
486
+ if user_max_count is not None and len(matches) > user_max_count:
487
+ matches = matches[:user_max_count]
488
+ truncated = True
489
+ else:
490
+ truncated = len(matches) >= fd_rg_utils.MAX_RESULTS_HARD_CAP
491
+ if truncated:
492
+ matches = matches[: fd_rg_utils.MAX_RESULTS_HARD_CAP]
467
493
 
468
494
  # Apply path optimization if requested
469
495
  optimize_paths = arguments.get("optimize_paths", False)
@@ -475,6 +501,54 @@ class SearchContentTool(BaseMCPTool):
475
501
  if group_by_file and matches:
476
502
  result = fd_rg_utils.group_matches_by_file(matches)
477
503
 
504
+ # Handle output suppression and file output for grouped results
505
+ output_file = arguments.get("output_file")
506
+ suppress_output = arguments.get("suppress_output", False)
507
+
508
+ # Handle file output if requested
509
+ if output_file:
510
+ try:
511
+ # Save full result to file
512
+ import json
513
+ json_content = json.dumps(result, indent=2, ensure_ascii=False)
514
+ file_path = self.file_output_manager.save_to_file(
515
+ content=json_content,
516
+ base_name=output_file
517
+ )
518
+
519
+ # If suppress_output is True, return minimal response
520
+ if suppress_output:
521
+ minimal_result = {
522
+ "success": result.get("success", True),
523
+ "count": result.get("count", 0),
524
+ "output_file": output_file,
525
+ "file_saved": f"Results saved to {file_path}"
526
+ }
527
+ # Cache the full result, not the minimal one
528
+ if self.cache and cache_key:
529
+ self.cache.set(cache_key, result)
530
+ return minimal_result
531
+ else:
532
+ # Include file info in full response
533
+ result["output_file"] = output_file
534
+ result["file_saved"] = f"Results saved to {file_path}"
535
+ except Exception as e:
536
+ logger.error(f"Failed to save output to file: {e}")
537
+ result["file_save_error"] = str(e)
538
+ result["file_saved"] = False
539
+ elif suppress_output:
540
+ # If suppress_output is True but no output_file, remove detailed results
541
+ minimal_result = {
542
+ "success": result.get("success", True),
543
+ "count": result.get("count", 0),
544
+ "summary": result.get("summary", {}),
545
+ "meta": result.get("meta", {})
546
+ }
547
+ # Cache the full result, not the minimal one
548
+ if self.cache and cache_key:
549
+ self.cache.set(cache_key, result)
550
+ return minimal_result
551
+
478
552
  # Cache the result
479
553
  if self.cache and cache_key:
480
554
  self.cache.set(cache_key, result)
@@ -492,6 +566,54 @@ class SearchContentTool(BaseMCPTool):
492
566
  "summary": summary,
493
567
  }
494
568
 
569
+ # Handle output suppression and file output for summary results
570
+ output_file = arguments.get("output_file")
571
+ suppress_output = arguments.get("suppress_output", False)
572
+
573
+ # Handle file output if requested
574
+ if output_file:
575
+ try:
576
+ # Save full result to file
577
+ import json
578
+ json_content = json.dumps(result, indent=2, ensure_ascii=False)
579
+ file_path = self.file_output_manager.save_to_file(
580
+ content=json_content,
581
+ base_name=output_file
582
+ )
583
+
584
+ # If suppress_output is True, return minimal response
585
+ if suppress_output:
586
+ minimal_result = {
587
+ "success": result.get("success", True),
588
+ "count": result.get("count", 0),
589
+ "output_file": output_file,
590
+ "file_saved": f"Results saved to {file_path}"
591
+ }
592
+ # Cache the full result, not the minimal one
593
+ if self.cache and cache_key:
594
+ self.cache.set(cache_key, result)
595
+ return minimal_result
596
+ else:
597
+ # Include file info in full response
598
+ result["output_file"] = output_file
599
+ result["file_saved"] = f"Results saved to {file_path}"
600
+ except Exception as e:
601
+ logger.error(f"Failed to save output to file: {e}")
602
+ result["file_save_error"] = str(e)
603
+ result["file_saved"] = False
604
+ elif suppress_output:
605
+ # If suppress_output is True but no output_file, remove detailed results
606
+ minimal_result = {
607
+ "success": result.get("success", True),
608
+ "count": result.get("count", 0),
609
+ "summary": result.get("summary", {}),
610
+ "elapsed_ms": result.get("elapsed_ms", 0)
611
+ }
612
+ # Cache the full result, not the minimal one
613
+ if self.cache and cache_key:
614
+ self.cache.set(cache_key, result)
615
+ return minimal_result
616
+
495
617
  # Cache the result
496
618
  if self.cache and cache_key:
497
619
  self.cache.set(cache_key, result)
@@ -503,9 +625,87 @@ class SearchContentTool(BaseMCPTool):
503
625
  "count": len(matches),
504
626
  "truncated": truncated,
505
627
  "elapsed_ms": elapsed_ms,
506
- "results": matches,
507
628
  }
508
629
 
630
+ # Handle output suppression and file output
631
+ output_file = arguments.get("output_file")
632
+ suppress_output = arguments.get("suppress_output", False)
633
+
634
+ # Always add results to the base result for file saving
635
+ result["results"] = matches
636
+
637
+ # Handle file output if requested
638
+ if output_file:
639
+ try:
640
+ # Create detailed output for file
641
+ file_content = {
642
+ "success": True,
643
+ "count": len(matches),
644
+ "truncated": truncated,
645
+ "elapsed_ms": elapsed_ms,
646
+ "results": matches,
647
+ "summary": fd_rg_utils.summarize_search_results(matches),
648
+ "grouped_by_file": fd_rg_utils.group_matches_by_file(matches)["files"] if matches else []
649
+ }
650
+
651
+ # Convert to JSON for file output
652
+ import json
653
+ json_content = json.dumps(file_content, indent=2, ensure_ascii=False)
654
+
655
+ # Save to file
656
+ saved_file_path = self.file_output_manager.save_to_file(
657
+ content=json_content,
658
+ base_name=output_file
659
+ )
660
+
661
+ result["output_file_path"] = saved_file_path
662
+ result["file_saved"] = True
663
+
664
+ logger.info(f"Search results saved to: {saved_file_path}")
665
+
666
+ except Exception as e:
667
+ logger.error(f"Failed to save output to file: {e}")
668
+ result["file_save_error"] = str(e)
669
+ result["file_saved"] = False
670
+
671
+ # Handle file output and suppression
672
+ output_file = arguments.get("output_file")
673
+ suppress_output = arguments.get("suppress_output", False)
674
+
675
+ if output_file:
676
+ # Save full result to file
677
+ import json
678
+ json_content = json.dumps(result, indent=2, ensure_ascii=False)
679
+ file_path = self.file_output_manager.save_to_file(
680
+ content=json_content,
681
+ base_name=output_file
682
+ )
683
+
684
+ # If suppress_output is True, return minimal response
685
+ if suppress_output:
686
+ minimal_result = {
687
+ "success": result.get("success", True),
688
+ "count": result.get("count", 0),
689
+ "output_file": output_file,
690
+ "file_saved": f"Results saved to {file_path}"
691
+ }
692
+ # Cache the full result, not the minimal one
693
+ if self.cache and cache_key:
694
+ self.cache.set(cache_key, result)
695
+ return minimal_result
696
+ else:
697
+ # Include file info in full response
698
+ result["output_file"] = output_file
699
+ result["file_saved"] = f"Results saved to {file_path}"
700
+ elif suppress_output:
701
+ # If suppress_output is True but no output_file, remove results from response
702
+ result_copy = result.copy()
703
+ result_copy.pop("results", None)
704
+ # Cache the full result, not the minimal one
705
+ if self.cache and cache_key:
706
+ self.cache.set(cache_key, result)
707
+ return result_copy
708
+
509
709
  # Cache the result
510
710
  if self.cache and cache_key:
511
711
  self.cache.set(cache_key, result)
@@ -0,0 +1,379 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Markdown Query Definitions
4
+
5
+ Tree-sitter queries for extracting Markdown elements including headers,
6
+ links, code blocks, lists, and other structural elements.
7
+ """
8
+
9
+ from typing import Dict, List
10
+
11
+ # Markdown element extraction queries
12
+ MARKDOWN_QUERIES: Dict[str, str] = {
13
+ # Headers (H1-H6)
14
+ "headers": """
15
+ (atx_heading
16
+ (atx_h1_marker) @h1.marker
17
+ heading_content: (inline) @h1.content) @h1.heading
18
+
19
+ (atx_heading
20
+ (atx_h2_marker) @h2.marker
21
+ heading_content: (inline) @h2.content) @h2.heading
22
+
23
+ (atx_heading
24
+ (atx_h3_marker) @h3.marker
25
+ heading_content: (inline) @h3.content) @h3.heading
26
+
27
+ (atx_heading
28
+ (atx_h4_marker) @h4.marker
29
+ heading_content: (inline) @h4.content) @h4.heading
30
+
31
+ (atx_heading
32
+ (atx_h5_marker) @h5.marker
33
+ heading_content: (inline) @h5.content) @h5.heading
34
+
35
+ (atx_heading
36
+ (atx_h6_marker) @h6.marker
37
+ heading_content: (inline) @h6.content) @h6.heading
38
+
39
+ (setext_heading
40
+ heading_content: (paragraph) @setext.content
41
+ (setext_h1_underline) @setext.h1) @setext.h1.heading
42
+
43
+ (setext_heading
44
+ heading_content: (paragraph) @setext.content
45
+ (setext_h2_underline) @setext.h2) @setext.h2.heading
46
+ """,
47
+
48
+ # Code blocks
49
+ "code_blocks": """
50
+ (fenced_code_block
51
+ (fenced_code_block_delimiter) @code.start
52
+ (info_string)? @code.language
53
+ (code_fence_content) @code.content
54
+ (fenced_code_block_delimiter) @code.end) @code.block
55
+
56
+ (indented_code_block
57
+ (code_fence_content) @indented_code.content) @indented_code.block
58
+ """,
59
+
60
+ # Inline code
61
+ "inline_code": """
62
+ (code_span
63
+ (code_span_delimiter) @inline_code.start
64
+ (code_span_content) @inline_code.content
65
+ (code_span_delimiter) @inline_code.end) @inline_code.span
66
+ """,
67
+
68
+ # Links
69
+ "links": """
70
+ (link
71
+ (link_text) @link.text
72
+ (link_destination) @link.url
73
+ (link_title)? @link.title) @link.element
74
+
75
+ (autolink
76
+ (uri_autolink) @autolink.uri) @autolink.element
77
+
78
+ (autolink
79
+ (email_autolink) @autolink.email) @autolink.element
80
+
81
+ (reference_link
82
+ (link_text) @ref_link.text
83
+ (link_label) @ref_link.label) @ref_link.element
84
+
85
+ (link_reference_definition
86
+ (link_label) @link_def.label
87
+ (link_destination) @link_def.url
88
+ (link_title)? @link_def.title) @link_def.element
89
+ """,
90
+
91
+ # Images
92
+ "images": """
93
+ (image
94
+ (image_description) @image.alt
95
+ (link_destination) @image.url
96
+ (link_title)? @image.title) @image.element
97
+
98
+ (reference_image
99
+ (image_description) @ref_image.alt
100
+ (link_label) @ref_image.label) @ref_image.element
101
+ """,
102
+
103
+ # Lists
104
+ "lists": """
105
+ (list
106
+ (list_item
107
+ (list_marker) @list_item.marker
108
+ (paragraph)? @list_item.content) @list_item.element) @list.element
109
+
110
+ (tight_list
111
+ (list_item
112
+ (list_marker) @tight_list_item.marker
113
+ (paragraph)? @tight_list_item.content) @tight_list_item.element) @tight_list.element
114
+ """,
115
+
116
+ # Emphasis and strong
117
+ "emphasis": """
118
+ (emphasis
119
+ (emphasis_delimiter) @emphasis.start
120
+ (inline) @emphasis.content
121
+ (emphasis_delimiter) @emphasis.end) @emphasis.element
122
+
123
+ (strong_emphasis
124
+ (strong_emphasis_delimiter) @strong.start
125
+ (inline) @strong.content
126
+ (strong_emphasis_delimiter) @strong.end) @strong.element
127
+ """,
128
+
129
+ # Blockquotes
130
+ "blockquotes": """
131
+ (block_quote
132
+ (block_quote_marker) @blockquote.marker
133
+ (paragraph) @blockquote.content) @blockquote.element
134
+ """,
135
+
136
+ # Tables
137
+ "tables": """
138
+ (pipe_table
139
+ (pipe_table_header
140
+ (pipe_table_cell) @table_header.cell) @table.header
141
+ (pipe_table_delimiter_row) @table.delimiter
142
+ (pipe_table_row
143
+ (pipe_table_cell) @table_row.cell) @table.row) @table.element
144
+ """,
145
+
146
+ # Horizontal rules
147
+ "horizontal_rules": """
148
+ (thematic_break) @hr.element
149
+ """,
150
+
151
+ # HTML blocks
152
+ "html_blocks": """
153
+ (html_block) @html.block
154
+ """,
155
+
156
+ # Inline HTML
157
+ "inline_html": """
158
+ (html_tag) @html.inline
159
+ """,
160
+
161
+ # Strikethrough (if supported)
162
+ "strikethrough": """
163
+ (strikethrough
164
+ (strikethrough_delimiter) @strike.start
165
+ (inline) @strike.content
166
+ (strikethrough_delimiter) @strike.end) @strike.element
167
+ """,
168
+
169
+ # Task lists (if supported)
170
+ "task_lists": """
171
+ (list_item
172
+ (list_marker) @task.marker
173
+ (task_list_marker_checked) @task.checked) @task.checked_item
174
+
175
+ (list_item
176
+ (list_marker) @task.marker
177
+ (task_list_marker_unchecked) @task.unchecked) @task.unchecked_item
178
+ """,
179
+
180
+ # Footnotes
181
+ "footnotes": """
182
+ (footnote_reference
183
+ (footnote_label) @footnote.ref_label) @footnote.reference
184
+
185
+ (footnote_definition
186
+ (footnote_label) @footnote.def_label
187
+ (paragraph) @footnote.content) @footnote.definition
188
+ """,
189
+
190
+ # All text content
191
+ "text_content": """
192
+ (paragraph
193
+ (inline) @text.content) @text.paragraph
194
+
195
+ (inline) @text.inline
196
+ """,
197
+
198
+ # Document structure
199
+ "document": """
200
+ (document) @document.root
201
+ """,
202
+
203
+ # All elements (comprehensive)
204
+ "all_elements": """
205
+ (atx_heading) @element.heading
206
+ (setext_heading) @element.heading
207
+ (fenced_code_block) @element.code_block
208
+ (indented_code_block) @element.code_block
209
+ (code_span) @element.inline_code
210
+ (link) @element.link
211
+ (autolink) @element.autolink
212
+ (reference_link) @element.ref_link
213
+ (image) @element.image
214
+ (reference_image) @element.ref_image
215
+ (list) @element.list
216
+ (tight_list) @element.list
217
+ (emphasis) @element.emphasis
218
+ (strong_emphasis) @element.strong
219
+ (strikethrough) @element.strikethrough
220
+ (block_quote) @element.blockquote
221
+ (pipe_table) @element.table
222
+ (thematic_break) @element.hr
223
+ (html_block) @element.html_block
224
+ (html_tag) @element.html_inline
225
+ (footnote_reference) @element.footnote_ref
226
+ (footnote_definition) @element.footnote_def
227
+ (paragraph) @element.paragraph
228
+ """,
229
+ }
230
+
231
+ # Query aliases for convenience
232
+ QUERY_ALIASES: Dict[str, str] = {
233
+ "heading": "headers",
234
+ "h1": "headers",
235
+ "h2": "headers",
236
+ "h3": "headers",
237
+ "h4": "headers",
238
+ "h5": "headers",
239
+ "h6": "headers",
240
+ "code": "code_blocks",
241
+ "fenced_code": "code_blocks",
242
+ "code_span": "inline_code",
243
+ "link": "links",
244
+ "url": "links",
245
+ "image": "images",
246
+ "img": "images",
247
+ "list": "lists",
248
+ "ul": "lists",
249
+ "ol": "lists",
250
+ "em": "emphasis",
251
+ "strong": "emphasis",
252
+ "bold": "emphasis",
253
+ "italic": "emphasis",
254
+ "quote": "blockquotes",
255
+ "blockquote": "blockquotes",
256
+ "table": "tables",
257
+ "hr": "horizontal_rules",
258
+ "html": "html_blocks",
259
+ "strike": "strikethrough",
260
+ "task": "task_lists",
261
+ "todo": "task_lists",
262
+ "footnote": "footnotes",
263
+ "note": "footnotes",
264
+ "text": "text_content",
265
+ "paragraph": "text_content",
266
+ "all": "all_elements",
267
+ "everything": "all_elements",
268
+ }
269
+
270
+ def get_query(query_name: str) -> str:
271
+ """
272
+ Get a query by name, supporting aliases
273
+
274
+ Args:
275
+ query_name: Name of the query or alias
276
+
277
+ Returns:
278
+ Query string
279
+
280
+ Raises:
281
+ KeyError: If query name is not found
282
+ """
283
+ # Check direct queries first
284
+ if query_name in MARKDOWN_QUERIES:
285
+ return MARKDOWN_QUERIES[query_name]
286
+
287
+ # Check aliases
288
+ if query_name in QUERY_ALIASES:
289
+ actual_query = QUERY_ALIASES[query_name]
290
+ return MARKDOWN_QUERIES[actual_query]
291
+
292
+ raise KeyError(f"Unknown query: {query_name}")
293
+
294
+ def get_available_queries() -> List[str]:
295
+ """
296
+ Get list of all available query names including aliases
297
+
298
+ Returns:
299
+ List of query names
300
+ """
301
+ queries = list(MARKDOWN_QUERIES.keys())
302
+ aliases = list(QUERY_ALIASES.keys())
303
+ return sorted(queries + aliases)
304
+
305
+ def get_query_info(query_name: str) -> Dict[str, str]:
306
+ """
307
+ Get information about a query
308
+
309
+ Args:
310
+ query_name: Name of the query
311
+
312
+ Returns:
313
+ Dictionary with query information
314
+ """
315
+ try:
316
+ query_string = get_query(query_name)
317
+ is_alias = query_name in QUERY_ALIASES
318
+ actual_name = QUERY_ALIASES.get(query_name, query_name) if is_alias else query_name
319
+
320
+ return {
321
+ "name": query_name,
322
+ "actual_name": actual_name,
323
+ "is_alias": is_alias,
324
+ "query": query_string,
325
+ "description": _get_query_description(actual_name)
326
+ }
327
+ except KeyError:
328
+ return {"error": f"Query '{query_name}' not found"}
329
+
330
+ def _get_query_description(query_name: str) -> str:
331
+ """Get description for a query"""
332
+ descriptions = {
333
+ "headers": "Extract all heading elements (H1-H6, both ATX and Setext styles)",
334
+ "code_blocks": "Extract fenced and indented code blocks",
335
+ "inline_code": "Extract inline code spans",
336
+ "links": "Extract all types of links (inline, reference, autolinks)",
337
+ "images": "Extract image elements (inline and reference)",
338
+ "lists": "Extract ordered and unordered lists",
339
+ "emphasis": "Extract emphasis and strong emphasis elements",
340
+ "blockquotes": "Extract blockquote elements",
341
+ "tables": "Extract pipe table elements",
342
+ "horizontal_rules": "Extract horizontal rule elements",
343
+ "html_blocks": "Extract HTML block elements",
344
+ "inline_html": "Extract inline HTML elements",
345
+ "strikethrough": "Extract strikethrough elements",
346
+ "task_lists": "Extract task list items (checkboxes)",
347
+ "footnotes": "Extract footnote references and definitions",
348
+ "text_content": "Extract all text content",
349
+ "document": "Extract document root",
350
+ "all_elements": "Extract all Markdown elements"
351
+ }
352
+ return descriptions.get(query_name, "No description available")
353
+
354
+ def get_all_queries() -> dict[str, str]:
355
+ """
356
+ Get all queries for the query loader
357
+
358
+ Returns:
359
+ Dictionary mapping query names to query strings
360
+ """
361
+ # Combine direct queries and aliases
362
+ all_queries = MARKDOWN_QUERIES.copy()
363
+
364
+ # Add aliases that point to actual queries
365
+ for alias, target in QUERY_ALIASES.items():
366
+ if target in MARKDOWN_QUERIES:
367
+ all_queries[alias] = MARKDOWN_QUERIES[target]
368
+
369
+ return all_queries
370
+
371
+ # Export main functions and constants
372
+ __all__ = [
373
+ "MARKDOWN_QUERIES",
374
+ "QUERY_ALIASES",
375
+ "get_query",
376
+ "get_available_queries",
377
+ "get_query_info",
378
+ "get_all_queries"
379
+ ]
@@ -156,6 +156,7 @@ class QueryLoader:
156
156
  "cpp",
157
157
  "rust",
158
158
  "go",
159
+ "markdown",
159
160
  ]
160
161
 
161
162
  for language in known_languages: