tree-sitter-analyzer 1.2.2__py3-none-any.whl → 1.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

@@ -150,7 +150,7 @@ class JavaElementExtractor(ElementExtractor):
150
150
  def extract_imports(
151
151
  self, tree: "tree_sitter.Tree", source_code: str
152
152
  ) -> list[Import]:
153
- """Extract Java import statements"""
153
+ """Extract Java import statements with enhanced robustness"""
154
154
  self.source_code = source_code
155
155
  self.content_lines = source_code.split("\n")
156
156
 
@@ -172,9 +172,79 @@ class JavaElementExtractor(ElementExtractor):
172
172
  # After package and imports come class declarations, so stop
173
173
  break
174
174
 
175
+ # Fallback: if no imports found via tree-sitter, try regex-based extraction
176
+ if not imports and "import" in source_code:
177
+ log_debug("No imports found via tree-sitter, trying regex fallback")
178
+ fallback_imports = self._extract_imports_fallback(source_code)
179
+ imports.extend(fallback_imports)
180
+
175
181
  log_debug(f"Extracted {len(imports)} imports")
176
182
  return imports
177
183
 
184
+ def _extract_imports_fallback(self, source_code: str) -> list[Import]:
185
+ """Fallback import extraction using regex when tree-sitter fails"""
186
+ imports = []
187
+ lines = source_code.split("\n")
188
+
189
+ for line_num, line in enumerate(lines, 1):
190
+ line = line.strip()
191
+ if line.startswith("import ") and line.endswith(";"):
192
+ # Extract import statement
193
+ import_content = line[:-1] # Remove semicolon
194
+
195
+ if "static" in import_content:
196
+ # Static import
197
+ static_match = re.search(
198
+ r"import\s+static\s+([\w.]+)", import_content
199
+ )
200
+ if static_match:
201
+ import_name = static_match.group(1)
202
+ if import_content.endswith(".*"):
203
+ import_name = import_name.replace(".*", "")
204
+ parts = import_name.split(".")
205
+ if len(parts) > 1:
206
+ import_name = ".".join(parts[:-1])
207
+
208
+ imports.append(
209
+ Import(
210
+ name=import_name,
211
+ start_line=line_num,
212
+ end_line=line_num,
213
+ raw_text=line,
214
+ language="java",
215
+ module_name=import_name,
216
+ is_static=True,
217
+ is_wildcard=import_content.endswith(".*"),
218
+ import_statement=import_content,
219
+ )
220
+ )
221
+ else:
222
+ # Normal import
223
+ normal_match = re.search(r"import\s+([\w.]+)", import_content)
224
+ if normal_match:
225
+ import_name = normal_match.group(1)
226
+ if import_content.endswith(".*"):
227
+ if import_name.endswith(".*"):
228
+ import_name = import_name[:-2]
229
+ elif import_name.endswith("."):
230
+ import_name = import_name[:-1]
231
+
232
+ imports.append(
233
+ Import(
234
+ name=import_name,
235
+ start_line=line_num,
236
+ end_line=line_num,
237
+ raw_text=line,
238
+ language="java",
239
+ module_name=import_name,
240
+ is_static=False,
241
+ is_wildcard=import_content.endswith(".*"),
242
+ import_statement=import_content,
243
+ )
244
+ )
245
+
246
+ return imports
247
+
178
248
  def extract_packages(
179
249
  self, tree: "tree_sitter.Tree", source_code: str
180
250
  ) -> list[Package]:
@@ -60,8 +60,11 @@ from ..utils import setup_logger
60
60
  from . import MCP_INFO
61
61
  from .resources import CodeFileResource, ProjectStatsResource
62
62
  from .tools.analyze_scale_tool import AnalyzeScaleTool
63
+ from .tools.find_and_grep_tool import FindAndGrepTool
64
+ from .tools.list_files_tool import ListFilesTool
63
65
  from .tools.query_tool import QueryTool
64
66
  from .tools.read_partial_tool import ReadPartialTool
67
+ from .tools.search_content_tool import SearchContentTool
65
68
  from .tools.table_format_tool import TableFormatTool
66
69
 
67
70
  # Set up logging
@@ -87,11 +90,15 @@ class TreeSitterAnalyzerMCPServer:
87
90
  self.security_validator = SecurityValidator(project_root)
88
91
  # Use unified analysis engine instead of deprecated AdvancedAnalyzer
89
92
 
90
- # Initialize MCP tools with security validation (four core tools)
93
+ # Initialize MCP tools with security validation (core tools + fd/rg tools)
91
94
  self.query_tool = QueryTool(project_root) # query_code
92
95
  self.read_partial_tool = ReadPartialTool(project_root) # extract_code_section
93
96
  self.table_format_tool = TableFormatTool(project_root) # analyze_code_structure
94
97
  self.analyze_scale_tool = AnalyzeScaleTool(project_root) # check_code_scale
98
+ # New fd/rg tools
99
+ self.list_files_tool = ListFilesTool(project_root) # list_files
100
+ self.search_content_tool = SearchContentTool(project_root) # search_content
101
+ self.find_and_grep_tool = FindAndGrepTool(project_root) # find_and_grep
95
102
 
96
103
  # Optional universal tool to satisfy initialization tests
97
104
  try:
@@ -466,6 +473,9 @@ class TreeSitterAnalyzerMCPServer:
466
473
  },
467
474
  ),
468
475
  Tool(**self.query_tool.get_tool_definition()),
476
+ Tool(**self.list_files_tool.get_tool_definition()),
477
+ Tool(**self.search_content_tool.get_tool_definition()),
478
+ Tool(**self.find_and_grep_tool.get_tool_definition()),
469
479
  ]
470
480
 
471
481
  logger.info(f"Returning {len(tools)} tools: {[t.name for t in tools]}")
@@ -545,6 +555,15 @@ class TreeSitterAnalyzerMCPServer:
545
555
  elif name == "query_code":
546
556
  result = await self.query_tool.execute(arguments)
547
557
 
558
+ elif name == "list_files":
559
+ result = await self.list_files_tool.execute(arguments)
560
+
561
+ elif name == "search_content":
562
+ result = await self.search_content_tool.execute(arguments)
563
+
564
+ elif name == "find_and_grep":
565
+ result = await self.find_and_grep_tool.execute(arguments)
566
+
548
567
  else:
549
568
  raise ValueError(f"Unknown tool: {name}")
550
569
 
@@ -653,6 +672,9 @@ class TreeSitterAnalyzerMCPServer:
653
672
  self.read_partial_tool.set_project_path(project_path)
654
673
  self.table_format_tool.set_project_path(project_path)
655
674
  self.analyze_scale_tool.set_project_path(project_path)
675
+ self.list_files_tool.set_project_path(project_path)
676
+ self.search_content_tool.set_project_path(project_path)
677
+ self.find_and_grep_tool.set_project_path(project_path)
656
678
 
657
679
  # Update universal tool if available
658
680
  if hasattr(self, "universal_analyze_tool") and self.universal_analyze_tool:
@@ -0,0 +1,550 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Shared utilities for fd/ripgrep based MCP tools.
4
+
5
+ This module centralizes subprocess execution, command building, result caps,
6
+ and JSON line parsing for ripgrep.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import asyncio
12
+ import json
13
+ import os
14
+ import tempfile
15
+ from dataclasses import dataclass
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+ # Safety caps (hard limits)
20
+ MAX_RESULTS_HARD_CAP = 10000
21
+ DEFAULT_RESULTS_LIMIT = 2000
22
+
23
+ DEFAULT_RG_MAX_FILESIZE = "10M"
24
+ RG_MAX_FILESIZE_HARD_CAP_BYTES = 200 * 1024 * 1024 # 200M
25
+
26
+ DEFAULT_RG_TIMEOUT_MS = 4000
27
+ RG_TIMEOUT_HARD_CAP_MS = 30000
28
+
29
+
30
+ def clamp_int(value: int | None, default_value: int, hard_cap: int) -> int:
31
+ if value is None:
32
+ return default_value
33
+ try:
34
+ v = int(value)
35
+ except (TypeError, ValueError):
36
+ return default_value
37
+ return max(0, min(v, hard_cap))
38
+
39
+
40
+ def parse_size_to_bytes(size_str: str) -> int | None:
41
+ """Parse ripgrep --max-filesize strings like '10M', '200K' to bytes."""
42
+ if not size_str:
43
+ return None
44
+ s = size_str.strip().upper()
45
+ try:
46
+ if s.endswith("K"):
47
+ return int(float(s[:-1]) * 1024)
48
+ if s.endswith("M"):
49
+ return int(float(s[:-1]) * 1024 * 1024)
50
+ if s.endswith("G"):
51
+ return int(float(s[:-1]) * 1024 * 1024 * 1024)
52
+ return int(s)
53
+ except ValueError:
54
+ return None
55
+
56
+
57
+ async def run_command_capture(
58
+ cmd: list[str],
59
+ input_data: bytes | None = None,
60
+ timeout_ms: int | None = None,
61
+ ) -> tuple[int, bytes, bytes]:
62
+ """Run a subprocess and capture output.
63
+
64
+ Returns (returncode, stdout, stderr). On timeout, kills process and returns 124.
65
+ Separated into a util for easy monkeypatching in tests.
66
+ """
67
+ # Create process
68
+ proc = await asyncio.create_subprocess_exec(
69
+ *cmd,
70
+ stdin=asyncio.subprocess.PIPE if input_data is not None else None,
71
+ stdout=asyncio.subprocess.PIPE,
72
+ stderr=asyncio.subprocess.PIPE,
73
+ )
74
+
75
+ # Compute timeout seconds
76
+ timeout_s: float | None = None
77
+ if timeout_ms and timeout_ms > 0:
78
+ timeout_s = timeout_ms / 1000.0
79
+
80
+ try:
81
+ stdout, stderr = await asyncio.wait_for(
82
+ proc.communicate(input=input_data), timeout=timeout_s
83
+ )
84
+ return proc.returncode, stdout, stderr
85
+ except asyncio.TimeoutError:
86
+ try:
87
+ proc.kill()
88
+ finally:
89
+ with contextlib.suppress(Exception):
90
+ await proc.wait()
91
+ return 124, b"", f"Timeout after {timeout_ms} ms".encode()
92
+
93
+
94
+ def build_fd_command(
95
+ *,
96
+ pattern: str | None,
97
+ glob: bool,
98
+ types: list[str] | None,
99
+ extensions: list[str] | None,
100
+ exclude: list[str] | None,
101
+ depth: int | None,
102
+ follow_symlinks: bool,
103
+ hidden: bool,
104
+ no_ignore: bool,
105
+ size: list[str] | None,
106
+ changed_within: str | None,
107
+ changed_before: str | None,
108
+ full_path_match: bool,
109
+ absolute: bool,
110
+ limit: int | None,
111
+ roots: list[str],
112
+ ) -> list[str]:
113
+ """Build an fd command with appropriate flags."""
114
+ cmd: list[str] = ["fd", "--color", "never"]
115
+ if glob:
116
+ cmd.append("--glob")
117
+ if full_path_match:
118
+ cmd.append("-p")
119
+ if absolute:
120
+ cmd.append("-a")
121
+ if follow_symlinks:
122
+ cmd.append("-L")
123
+ if hidden:
124
+ cmd.append("-H")
125
+ if no_ignore:
126
+ cmd.append("-I")
127
+ if depth is not None:
128
+ cmd += ["-d", str(depth)]
129
+ if types:
130
+ for t in types:
131
+ cmd += ["-t", str(t)]
132
+ if extensions:
133
+ for ext in extensions:
134
+ if ext.startswith("."):
135
+ ext = ext[1:]
136
+ cmd += ["-e", ext]
137
+ if exclude:
138
+ for ex in exclude:
139
+ cmd += ["-E", ex]
140
+ if size:
141
+ for s in size:
142
+ cmd += ["-S", s]
143
+ if changed_within:
144
+ cmd += ["--changed-within", str(changed_within)]
145
+ if changed_before:
146
+ cmd += ["--changed-before", str(changed_before)]
147
+ if limit is not None:
148
+ cmd += ["--max-results", str(limit)]
149
+
150
+ # Pattern goes before roots if present
151
+ # If no pattern is specified, use '.' to match all files
152
+ if pattern:
153
+ cmd.append(pattern)
154
+ else:
155
+ cmd.append(".")
156
+
157
+ # Append roots - these are search directories, not patterns
158
+ if roots:
159
+ cmd += roots
160
+
161
+ return cmd
162
+
163
+
164
+ def normalize_max_filesize(user_value: str | None) -> str:
165
+ if not user_value:
166
+ return DEFAULT_RG_MAX_FILESIZE
167
+ bytes_val = parse_size_to_bytes(user_value)
168
+ if bytes_val is None:
169
+ return DEFAULT_RG_MAX_FILESIZE
170
+ if bytes_val > RG_MAX_FILESIZE_HARD_CAP_BYTES:
171
+ return "200M"
172
+ return user_value
173
+
174
+
175
+ def build_rg_command(
176
+ *,
177
+ query: str,
178
+ case: str | None,
179
+ fixed_strings: bool,
180
+ word: bool,
181
+ multiline: bool,
182
+ include_globs: list[str] | None,
183
+ exclude_globs: list[str] | None,
184
+ follow_symlinks: bool,
185
+ hidden: bool,
186
+ no_ignore: bool,
187
+ max_filesize: str | None,
188
+ context_before: int | None,
189
+ context_after: int | None,
190
+ encoding: str | None,
191
+ max_count: int | None,
192
+ timeout_ms: int | None,
193
+ roots: list[str] | None,
194
+ files_from: str | None,
195
+ count_only_matches: bool = False,
196
+ ) -> list[str]:
197
+ """Build ripgrep command with JSON output and options."""
198
+ if count_only_matches:
199
+ # Use --count-matches for count-only mode (no JSON output)
200
+ cmd: list[str] = [
201
+ "rg",
202
+ "--count-matches",
203
+ "--no-heading",
204
+ "--color",
205
+ "never",
206
+ ]
207
+ else:
208
+ # Use --json for full match details
209
+ cmd: list[str] = [
210
+ "rg",
211
+ "--json",
212
+ "--no-heading",
213
+ "--color",
214
+ "never",
215
+ ]
216
+
217
+ # Case sensitivity
218
+ if case == "smart":
219
+ cmd.append("-S")
220
+ elif case == "insensitive":
221
+ cmd.append("-i")
222
+ elif case == "sensitive":
223
+ cmd.append("-s")
224
+
225
+ if fixed_strings:
226
+ cmd.append("-F")
227
+ if word:
228
+ cmd.append("-w")
229
+ if multiline:
230
+ # Prefer --multiline (does not imply binary)
231
+ cmd.append("--multiline")
232
+
233
+ if follow_symlinks:
234
+ cmd.append("-L")
235
+ if hidden:
236
+ cmd.append("-H")
237
+ if no_ignore:
238
+ # Use -u (respect ignore but include hidden); do not escalate to -uu automatically
239
+ cmd.append("-u")
240
+
241
+ if include_globs:
242
+ for g in include_globs:
243
+ cmd += ["-g", g]
244
+ if exclude_globs:
245
+ for g in exclude_globs:
246
+ # ripgrep exclusion via !pattern
247
+ if not g.startswith("!"):
248
+ cmd += ["-g", f"!{g}"]
249
+ else:
250
+ cmd += ["-g", g]
251
+
252
+ if context_before is not None:
253
+ cmd += ["-B", str(context_before)]
254
+ if context_after is not None:
255
+ cmd += ["-A", str(context_after)]
256
+ if encoding:
257
+ cmd += ["--encoding", encoding]
258
+ if max_count is not None:
259
+ cmd += ["-m", str(max_count)]
260
+
261
+ # Normalize filesize
262
+ cmd += ["--max-filesize", normalize_max_filesize(max_filesize)]
263
+
264
+ # Only add timeout if supported (check if timeout_ms is provided and > 0)
265
+ # Note: --timeout flag may not be available in all ripgrep versions
266
+ # For now, we'll skip the timeout flag to ensure compatibility
267
+ # effective_timeout = clamp_int(timeout_ms, DEFAULT_RG_TIMEOUT_MS, RG_TIMEOUT_HARD_CAP_MS)
268
+ # cmd += ["--timeout", str(effective_timeout)]
269
+
270
+ # Query must be last before roots/files
271
+ cmd.append(query)
272
+
273
+ # Skip --files-from flag as it's not supported in this ripgrep version
274
+ # Use roots instead for compatibility
275
+ if roots:
276
+ cmd += roots
277
+ # Note: files_from functionality is disabled for compatibility
278
+
279
+ return cmd
280
+
281
+
282
+ def parse_rg_json_lines_to_matches(stdout_bytes: bytes) -> list[dict[str, Any]]:
283
+ """Parse ripgrep JSON event stream and keep only match events."""
284
+ results: list[dict[str, Any]] = []
285
+ for raw_line in stdout_bytes.splitlines():
286
+ if not raw_line.strip():
287
+ continue
288
+ try:
289
+ evt = json.loads(raw_line.decode("utf-8", errors="replace"))
290
+ except (json.JSONDecodeError, UnicodeDecodeError): # nosec B112
291
+ continue
292
+ if evt.get("type") != "match":
293
+ continue
294
+ data = evt.get("data", {})
295
+ path_text = (data.get("path", {}) or {}).get("text")
296
+ line_number = data.get("line_number")
297
+ line_text = (data.get("lines", {}) or {}).get("text")
298
+ submatches_raw = data.get("submatches", []) or []
299
+ # Normalize line content to reduce token usage
300
+ normalized_line = " ".join(line_text.split()) if line_text else ""
301
+
302
+ # Simplify submatches - remove redundant match text, keep only positions
303
+ simplified_matches = []
304
+ for sm in submatches_raw:
305
+ start = sm.get("start")
306
+ end = sm.get("end")
307
+ if start is not None and end is not None:
308
+ simplified_matches.append([start, end])
309
+
310
+ results.append(
311
+ {
312
+ "file": path_text,
313
+ "line": line_number, # Shortened field name
314
+ "text": normalized_line, # Normalized content
315
+ "matches": simplified_matches, # Simplified match positions
316
+ }
317
+ )
318
+ return results
319
+
320
+
321
+ def group_matches_by_file(matches: list[dict[str, Any]]) -> dict[str, Any]:
322
+ """Group matches by file to eliminate file path duplication."""
323
+ if not matches:
324
+ return {"success": True, "count": 0, "files": []}
325
+
326
+ # Group matches by file
327
+ file_groups: dict[str, list[dict[str, Any]]] = {}
328
+ total_matches = 0
329
+
330
+ for match in matches:
331
+ file_path = match.get("file", "unknown")
332
+ if file_path not in file_groups:
333
+ file_groups[file_path] = []
334
+
335
+ # Create match entry without file path
336
+ match_entry = {
337
+ "line": match.get("line", match.get("line_number", "?")),
338
+ "text": match.get("text", match.get("line", "")),
339
+ "positions": match.get("matches", match.get("submatches", [])),
340
+ }
341
+ file_groups[file_path].append(match_entry)
342
+ total_matches += 1
343
+
344
+ # Convert to grouped structure
345
+ files = []
346
+ for file_path, file_matches in file_groups.items():
347
+ files.append({"file": file_path, "matches": file_matches})
348
+
349
+ return {"success": True, "count": total_matches, "files": files}
350
+
351
+
352
+ def optimize_match_paths(matches: list[dict[str, Any]]) -> list[dict[str, Any]]:
353
+ """Optimize file paths in match results to reduce token consumption."""
354
+ if not matches:
355
+ return matches
356
+
357
+ # Find common prefix among all file paths
358
+ file_paths = [match.get("file", "") for match in matches if match.get("file")]
359
+ common_prefix = ""
360
+ if len(file_paths) > 1:
361
+ import os
362
+
363
+ try:
364
+ common_prefix = os.path.commonpath(file_paths)
365
+ except (ValueError, TypeError):
366
+ common_prefix = ""
367
+
368
+ # Optimize each match
369
+ optimized_matches = []
370
+ for match in matches:
371
+ optimized_match = match.copy()
372
+ file_path = match.get("file")
373
+ if file_path:
374
+ optimized_match["file"] = _optimize_file_path(file_path, common_prefix)
375
+ optimized_matches.append(optimized_match)
376
+
377
+ return optimized_matches
378
+
379
+
380
+ def _optimize_file_path(file_path: str, common_prefix: str = "") -> str:
381
+ """Optimize file path for token efficiency by removing common prefixes and shortening."""
382
+ if not file_path:
383
+ return file_path
384
+
385
+ # Remove common prefix if provided
386
+ if common_prefix and file_path.startswith(common_prefix):
387
+ optimized = file_path[len(common_prefix) :].lstrip("/\\")
388
+ if optimized:
389
+ return optimized
390
+
391
+ # For very long paths, show only the last few components
392
+ from pathlib import Path
393
+
394
+ path_obj = Path(file_path)
395
+ parts = path_obj.parts
396
+
397
+ if len(parts) > 4:
398
+ # Show first part + ... + last 3 parts
399
+ return str(Path(parts[0]) / "..." / Path(*parts[-3:]))
400
+
401
+ return file_path
402
+
403
+
404
+ def summarize_search_results(
405
+ matches: list[dict[str, Any]], max_files: int = 10, max_total_lines: int = 50
406
+ ) -> dict[str, Any]:
407
+ """Summarize search results to reduce context size while preserving key information."""
408
+ if not matches:
409
+ return {
410
+ "total_matches": 0,
411
+ "total_files": 0,
412
+ "summary": "No matches found",
413
+ "top_files": [],
414
+ }
415
+
416
+ # Group matches by file and find common prefix for optimization
417
+ file_groups: dict[str, list[dict[str, Any]]] = {}
418
+ all_file_paths = []
419
+ for match in matches:
420
+ file_path = match.get("file", "unknown")
421
+ all_file_paths.append(file_path)
422
+ if file_path not in file_groups:
423
+ file_groups[file_path] = []
424
+ file_groups[file_path].append(match)
425
+
426
+ # Find common prefix to optimize paths
427
+ common_prefix = ""
428
+ if len(all_file_paths) > 1:
429
+ import os
430
+
431
+ common_prefix = os.path.commonpath(all_file_paths) if all_file_paths else ""
432
+
433
+ # Sort files by match count (descending)
434
+ sorted_files = sorted(file_groups.items(), key=lambda x: len(x[1]), reverse=True)
435
+
436
+ # Create summary
437
+ total_matches = len(matches)
438
+ total_files = len(file_groups)
439
+
440
+ # Top files with match counts
441
+ top_files = []
442
+ remaining_lines = max_total_lines
443
+
444
+ for file_path, file_matches in sorted_files[:max_files]:
445
+ match_count = len(file_matches)
446
+
447
+ # Include a few sample lines from this file
448
+ sample_lines = []
449
+ lines_to_include = min(3, remaining_lines, len(file_matches))
450
+
451
+ for _i, match in enumerate(file_matches[:lines_to_include]):
452
+ line_num = match.get(
453
+ "line", match.get("line_number", "?")
454
+ ) # Support both old and new format
455
+ line_text = match.get(
456
+ "text", match.get("line", "")
457
+ ).strip() # Support both old and new format
458
+ if line_text:
459
+ # Truncate long lines and remove extra whitespace to save tokens
460
+ truncated_line = " ".join(line_text.split())[:60]
461
+ if len(line_text) > 60:
462
+ truncated_line += "..."
463
+ sample_lines.append(f"L{line_num}: {truncated_line}")
464
+ remaining_lines -= 1
465
+
466
+ # Optimize file path for token efficiency
467
+ optimized_path = _optimize_file_path(file_path, common_prefix)
468
+
469
+ top_files.append(
470
+ {
471
+ "file": optimized_path,
472
+ "match_count": match_count,
473
+ "sample_lines": sample_lines,
474
+ }
475
+ )
476
+
477
+ if remaining_lines <= 0:
478
+ break
479
+
480
+ # Create summary text
481
+ if total_files <= max_files:
482
+ summary = f"Found {total_matches} matches in {total_files} files"
483
+ else:
484
+ summary = f"Found {total_matches} matches in {total_files} files (showing top {len(top_files)})"
485
+
486
+ return {
487
+ "total_matches": total_matches,
488
+ "total_files": total_files,
489
+ "summary": summary,
490
+ "top_files": top_files,
491
+ "truncated": total_files > max_files,
492
+ }
493
+
494
+
495
+ def parse_rg_count_output(stdout_bytes: bytes) -> dict[str, int]:
496
+ """Parse ripgrep --count-matches output and return file->count mapping."""
497
+ results: dict[str, int] = {}
498
+ total_matches = 0
499
+
500
+ for line in stdout_bytes.decode("utf-8", errors="replace").splitlines():
501
+ line = line.strip()
502
+ if not line:
503
+ continue
504
+
505
+ # Format: "file_path:count"
506
+ if ":" in line:
507
+ file_path, count_str = line.rsplit(":", 1)
508
+ try:
509
+ count = int(count_str)
510
+ results[file_path] = count
511
+ total_matches += count
512
+ except ValueError:
513
+ # Skip lines that don't have valid count format
514
+ continue
515
+
516
+ # Add total count as special key
517
+ results["__total__"] = total_matches
518
+ return results
519
+
520
+
521
+ @dataclass
522
+ class TempFileList:
523
+ path: str
524
+
525
+ def __enter__(self) -> TempFileList:
526
+ return self
527
+
528
+ def __exit__(self, exc_type, exc, tb) -> None:
529
+ with contextlib.suppress(Exception):
530
+ Path(self.path).unlink(missing_ok=True)
531
+
532
+
533
+ class contextlib: # minimal shim for suppress without importing globally
534
+ class suppress:
535
+ def __init__(self, *exceptions: type[BaseException]) -> None:
536
+ self.exceptions = exceptions
537
+
538
+ def __enter__(self) -> None: # noqa: D401
539
+ return None
540
+
541
+ def __exit__(self, exc_type, exc, tb) -> bool:
542
+ return exc_type is not None and issubclass(exc_type, self.exceptions)
543
+
544
+
545
+ def write_files_to_temp(files: list[str]) -> TempFileList:
546
+ fd, temp_path = tempfile.mkstemp(prefix="rg-files-", suffix=".lst")
547
+ os.close(fd)
548
+ content = "\n".join(files)
549
+ Path(temp_path).write_text(content, encoding="utf-8")
550
+ return TempFileList(path=temp_path)