tree-sitter-analyzer 1.8.3__py3-none-any.whl → 1.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (64) hide show
  1. tree_sitter_analyzer/__init__.py +1 -1
  2. tree_sitter_analyzer/api.py +4 -4
  3. tree_sitter_analyzer/cli/argument_validator.py +29 -17
  4. tree_sitter_analyzer/cli/commands/advanced_command.py +7 -5
  5. tree_sitter_analyzer/cli/commands/structure_command.py +7 -5
  6. tree_sitter_analyzer/cli/commands/summary_command.py +10 -6
  7. tree_sitter_analyzer/cli/commands/table_command.py +8 -7
  8. tree_sitter_analyzer/cli/info_commands.py +1 -1
  9. tree_sitter_analyzer/cli_main.py +3 -2
  10. tree_sitter_analyzer/core/analysis_engine.py +5 -5
  11. tree_sitter_analyzer/core/cache_service.py +3 -1
  12. tree_sitter_analyzer/core/query.py +17 -5
  13. tree_sitter_analyzer/core/query_service.py +1 -1
  14. tree_sitter_analyzer/encoding_utils.py +3 -3
  15. tree_sitter_analyzer/exceptions.py +61 -50
  16. tree_sitter_analyzer/file_handler.py +3 -0
  17. tree_sitter_analyzer/formatters/base_formatter.py +10 -5
  18. tree_sitter_analyzer/formatters/formatter_registry.py +83 -68
  19. tree_sitter_analyzer/formatters/html_formatter.py +90 -54
  20. tree_sitter_analyzer/formatters/javascript_formatter.py +21 -16
  21. tree_sitter_analyzer/formatters/language_formatter_factory.py +7 -6
  22. tree_sitter_analyzer/formatters/markdown_formatter.py +247 -124
  23. tree_sitter_analyzer/formatters/python_formatter.py +61 -38
  24. tree_sitter_analyzer/formatters/typescript_formatter.py +113 -45
  25. tree_sitter_analyzer/interfaces/mcp_server.py +2 -2
  26. tree_sitter_analyzer/language_detector.py +6 -6
  27. tree_sitter_analyzer/language_loader.py +3 -1
  28. tree_sitter_analyzer/languages/css_plugin.py +120 -61
  29. tree_sitter_analyzer/languages/html_plugin.py +159 -62
  30. tree_sitter_analyzer/languages/java_plugin.py +42 -34
  31. tree_sitter_analyzer/languages/javascript_plugin.py +59 -30
  32. tree_sitter_analyzer/languages/markdown_plugin.py +402 -368
  33. tree_sitter_analyzer/languages/python_plugin.py +111 -64
  34. tree_sitter_analyzer/languages/typescript_plugin.py +241 -132
  35. tree_sitter_analyzer/mcp/server.py +22 -18
  36. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +13 -8
  37. tree_sitter_analyzer/mcp/tools/base_tool.py +2 -2
  38. tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +232 -26
  39. tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +31 -23
  40. tree_sitter_analyzer/mcp/tools/list_files_tool.py +21 -19
  41. tree_sitter_analyzer/mcp/tools/query_tool.py +17 -18
  42. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +30 -31
  43. tree_sitter_analyzer/mcp/tools/search_content_tool.py +131 -77
  44. tree_sitter_analyzer/mcp/tools/table_format_tool.py +29 -16
  45. tree_sitter_analyzer/mcp/utils/file_output_factory.py +64 -51
  46. tree_sitter_analyzer/mcp/utils/file_output_manager.py +34 -24
  47. tree_sitter_analyzer/mcp/utils/gitignore_detector.py +8 -4
  48. tree_sitter_analyzer/models.py +7 -5
  49. tree_sitter_analyzer/plugins/base.py +9 -7
  50. tree_sitter_analyzer/plugins/manager.py +1 -0
  51. tree_sitter_analyzer/queries/css.py +2 -21
  52. tree_sitter_analyzer/queries/html.py +2 -15
  53. tree_sitter_analyzer/queries/markdown.py +30 -41
  54. tree_sitter_analyzer/queries/python.py +20 -5
  55. tree_sitter_analyzer/query_loader.py +5 -5
  56. tree_sitter_analyzer/security/validator.py +114 -86
  57. tree_sitter_analyzer/utils/__init__.py +58 -28
  58. tree_sitter_analyzer/utils/tree_sitter_compat.py +72 -65
  59. tree_sitter_analyzer/utils.py +83 -25
  60. {tree_sitter_analyzer-1.8.3.dist-info → tree_sitter_analyzer-1.9.0.dist-info}/METADATA +19 -5
  61. tree_sitter_analyzer-1.9.0.dist-info/RECORD +109 -0
  62. tree_sitter_analyzer-1.8.3.dist-info/RECORD +0 -109
  63. {tree_sitter_analyzer-1.8.3.dist-info → tree_sitter_analyzer-1.9.0.dist-info}/WHEEL +0 -0
  64. {tree_sitter_analyzer-1.8.3.dist-info → tree_sitter_analyzer-1.9.0.dist-info}/entry_points.txt +0 -0
@@ -175,6 +175,11 @@ class SearchContentTool(BaseMCPTool):
175
175
  "description": "When true and output_file is specified, suppress detailed output in response to save tokens",
176
176
  "default": False,
177
177
  },
178
+ "enable_parallel": {
179
+ "type": "boolean",
180
+ "description": "Enable parallel processing for multiple root directories to improve performance. Default: True",
181
+ "default": True,
182
+ },
178
183
  },
179
184
  "required": ["query"],
180
185
  "anyOf": [
@@ -236,6 +241,7 @@ class SearchContentTool(BaseMCPTool):
236
241
  "no_ignore",
237
242
  "count_only_matches",
238
243
  "summary_only",
244
+ "enable_parallel",
239
245
  ]:
240
246
  if key in arguments and not isinstance(arguments[key], bool):
241
247
  raise ValueError(f"{key} must be a boolean")
@@ -306,9 +312,9 @@ class SearchContentTool(BaseMCPTool):
306
312
  "success": False,
307
313
  "error": "rg (ripgrep) command not found. Please install ripgrep (https://github.com/BurntSushi/ripgrep) to use this tool.",
308
314
  "count": 0,
309
- "results": []
315
+ "results": [],
310
316
  }
311
-
317
+
312
318
  self.validate_arguments(arguments)
313
319
 
314
320
  roots = arguments.get("roots")
@@ -318,30 +324,38 @@ class SearchContentTool(BaseMCPTool):
318
324
  if files:
319
325
  files = self._validate_files(files)
320
326
 
321
- # Check cache if enabled
327
+ # Check cache if enabled (simplified for performance)
322
328
  cache_key = None
323
329
  if self.cache:
324
- # Create cache key with relevant parameters (excluding 'query' and 'roots' from kwargs)
330
+ # Create simplified cache key for better performance
325
331
  cache_params = {
326
332
  k: v
327
333
  for k, v in arguments.items()
328
- if k not in ["query", "roots", "files"]
334
+ if k
335
+ not in ["query", "roots", "files", "output_file", "suppress_output"]
329
336
  }
330
337
  cache_key = self.cache.create_cache_key(
331
338
  query=arguments["query"], roots=roots or [], **cache_params
332
339
  )
333
340
 
334
- # Try smart cross-format caching first
335
- requested_format = self._determine_requested_format(arguments)
336
- cached_result = self.cache.get_compatible_result(
337
- cache_key, requested_format
338
- )
341
+ # Simple cache lookup without complex cross-format logic for performance
342
+ cached_result = self.cache.get(cache_key)
339
343
  if cached_result is not None:
340
344
  # Add cache hit indicator to result
341
345
  if isinstance(cached_result, dict):
342
346
  cached_result = cached_result.copy()
343
347
  cached_result["cache_hit"] = True
344
- return cached_result
348
+ return cached_result
349
+ elif isinstance(cached_result, int):
350
+ # For integer results (like total_only mode), return as-is
351
+ return cached_result
352
+ else:
353
+ # For other types, convert to dict format
354
+ return {
355
+ "success": True,
356
+ "cached_result": cached_result,
357
+ "cache_hit": True,
358
+ }
345
359
 
346
360
  # Handle max_count parameter properly
347
361
  # If user specifies max_count, use it directly (with reasonable upper limit)
@@ -411,30 +425,82 @@ class SearchContentTool(BaseMCPTool):
411
425
  )
412
426
 
413
427
  # Roots mode
414
- cmd = fd_rg_utils.build_rg_command(
415
- query=arguments["query"],
416
- case=arguments.get("case", "smart"),
417
- fixed_strings=bool(arguments.get("fixed_strings", False)),
418
- word=bool(arguments.get("word", False)),
419
- multiline=bool(arguments.get("multiline", False)),
420
- include_globs=arguments.get("include_globs"),
421
- exclude_globs=arguments.get("exclude_globs"),
422
- follow_symlinks=bool(arguments.get("follow_symlinks", False)),
423
- hidden=bool(arguments.get("hidden", False)),
424
- no_ignore=no_ignore, # Use the potentially auto-detected value
425
- max_filesize=arguments.get("max_filesize"),
426
- context_before=arguments.get("context_before"),
427
- context_after=arguments.get("context_after"),
428
- encoding=arguments.get("encoding"),
429
- max_count=max_count,
430
- timeout_ms=timeout_ms,
431
- roots=roots,
432
- files_from=None,
433
- count_only_matches=count_only_matches,
428
+ # Determine if we should use parallel processing
429
+ use_parallel = (
430
+ roots is not None
431
+ and len(roots) > 1
432
+ and arguments.get("enable_parallel", True)
434
433
  )
435
434
 
436
435
  started = time.time()
437
- rc, out, err = await fd_rg_utils.run_command_capture(cmd, timeout_ms=timeout_ms)
436
+
437
+ if use_parallel and roots is not None:
438
+ # Split roots for parallel processing
439
+ root_chunks = fd_rg_utils.split_roots_for_parallel_processing(
440
+ roots, max_chunks=4
441
+ )
442
+
443
+ # Build commands for each chunk
444
+ commands = []
445
+ for chunk in root_chunks:
446
+ cmd = fd_rg_utils.build_rg_command(
447
+ query=arguments["query"],
448
+ case=arguments.get("case", "smart"),
449
+ fixed_strings=bool(arguments.get("fixed_strings", False)),
450
+ word=bool(arguments.get("word", False)),
451
+ multiline=bool(arguments.get("multiline", False)),
452
+ include_globs=arguments.get("include_globs"),
453
+ exclude_globs=arguments.get("exclude_globs"),
454
+ follow_symlinks=bool(arguments.get("follow_symlinks", False)),
455
+ hidden=bool(arguments.get("hidden", False)),
456
+ no_ignore=no_ignore,
457
+ max_filesize=arguments.get("max_filesize"),
458
+ context_before=arguments.get("context_before"),
459
+ context_after=arguments.get("context_after"),
460
+ encoding=arguments.get("encoding"),
461
+ max_count=max_count,
462
+ timeout_ms=timeout_ms,
463
+ roots=chunk,
464
+ files_from=None,
465
+ count_only_matches=count_only_matches,
466
+ )
467
+ commands.append(cmd)
468
+
469
+ # Execute commands in parallel
470
+ results = await fd_rg_utils.run_parallel_rg_searches(
471
+ commands, timeout_ms=timeout_ms, max_concurrent=4
472
+ )
473
+
474
+ # Merge results
475
+ rc, out, err = fd_rg_utils.merge_rg_results(results, count_only_matches)
476
+ else:
477
+ # Single command execution (original behavior)
478
+ cmd = fd_rg_utils.build_rg_command(
479
+ query=arguments["query"],
480
+ case=arguments.get("case", "smart"),
481
+ fixed_strings=bool(arguments.get("fixed_strings", False)),
482
+ word=bool(arguments.get("word", False)),
483
+ multiline=bool(arguments.get("multiline", False)),
484
+ include_globs=arguments.get("include_globs"),
485
+ exclude_globs=arguments.get("exclude_globs"),
486
+ follow_symlinks=bool(arguments.get("follow_symlinks", False)),
487
+ hidden=bool(arguments.get("hidden", False)),
488
+ no_ignore=no_ignore,
489
+ max_filesize=arguments.get("max_filesize"),
490
+ context_before=arguments.get("context_before"),
491
+ context_after=arguments.get("context_after"),
492
+ encoding=arguments.get("encoding"),
493
+ max_count=max_count,
494
+ timeout_ms=timeout_ms,
495
+ roots=roots,
496
+ files_from=None,
497
+ count_only_matches=count_only_matches,
498
+ )
499
+
500
+ rc, out, err = await fd_rg_utils.run_command_capture(
501
+ cmd, timeout_ms=timeout_ms
502
+ )
503
+
438
504
  elapsed_ms = int((time.time() - started) * 1000)
439
505
 
440
506
  if rc not in (0, 1):
@@ -498,7 +564,7 @@ class SearchContentTool(BaseMCPTool):
498
564
 
499
565
  # Handle normal mode
500
566
  matches = fd_rg_utils.parse_rg_json_lines_to_matches(out)
501
-
567
+
502
568
  # Apply user-specified max_count limit if provided
503
569
  # Note: ripgrep's -m option limits matches per file, not total matches
504
570
  # So we need to apply the total limit here in post-processing
@@ -530,19 +596,19 @@ class SearchContentTool(BaseMCPTool):
530
596
  try:
531
597
  # Save full result to file
532
598
  import json
599
+
533
600
  json_content = json.dumps(result, indent=2, ensure_ascii=False)
534
601
  file_path = self.file_output_manager.save_to_file(
535
- content=json_content,
536
- base_name=output_file
602
+ content=json_content, base_name=output_file
537
603
  )
538
-
604
+
539
605
  # If suppress_output is True, return minimal response
540
606
  if suppress_output:
541
607
  minimal_result = {
542
608
  "success": result.get("success", True),
543
609
  "count": result.get("count", 0),
544
610
  "output_file": output_file,
545
- "file_saved": f"Results saved to {file_path}"
611
+ "file_saved": f"Results saved to {file_path}",
546
612
  }
547
613
  # Cache the full result, not the minimal one
548
614
  if self.cache and cache_key:
@@ -562,7 +628,7 @@ class SearchContentTool(BaseMCPTool):
562
628
  "success": result.get("success", True),
563
629
  "count": result.get("count", 0),
564
630
  "summary": result.get("summary", {}),
565
- "meta": result.get("meta", {})
631
+ "meta": result.get("meta", {}),
566
632
  }
567
633
  # Cache the full result, not the minimal one
568
634
  if self.cache and cache_key:
@@ -595,19 +661,19 @@ class SearchContentTool(BaseMCPTool):
595
661
  try:
596
662
  # Save full result to file
597
663
  import json
664
+
598
665
  json_content = json.dumps(result, indent=2, ensure_ascii=False)
599
666
  file_path = self.file_output_manager.save_to_file(
600
- content=json_content,
601
- base_name=output_file
667
+ content=json_content, base_name=output_file
602
668
  )
603
-
669
+
604
670
  # If suppress_output is True, return minimal response
605
671
  if suppress_output:
606
672
  minimal_result = {
607
673
  "success": result.get("success", True),
608
674
  "count": result.get("count", 0),
609
675
  "output_file": output_file,
610
- "file_saved": f"Results saved to {file_path}"
676
+ "file_saved": f"Results saved to {file_path}",
611
677
  }
612
678
  # Cache the full result, not the minimal one
613
679
  if self.cache and cache_key:
@@ -627,7 +693,7 @@ class SearchContentTool(BaseMCPTool):
627
693
  "success": result.get("success", True),
628
694
  "count": result.get("count", 0),
629
695
  "summary": result.get("summary", {}),
630
- "elapsed_ms": result.get("elapsed_ms", 0)
696
+ "elapsed_ms": result.get("elapsed_ms", 0),
631
697
  }
632
698
  # Cache the full result, not the minimal one
633
699
  if self.cache and cache_key:
@@ -651,7 +717,7 @@ class SearchContentTool(BaseMCPTool):
651
717
  output_file = arguments.get("output_file")
652
718
  suppress_output = arguments.get("suppress_output", False)
653
719
 
654
- # Always add results to the base result for file saving
720
+ # Always add results to the base result for caching
655
721
  result["results"] = matches
656
722
 
657
723
  # Handle file output if requested
@@ -665,58 +731,46 @@ class SearchContentTool(BaseMCPTool):
665
731
  "elapsed_ms": elapsed_ms,
666
732
  "results": matches,
667
733
  "summary": fd_rg_utils.summarize_search_results(matches),
668
- "grouped_by_file": fd_rg_utils.group_matches_by_file(matches)["files"] if matches else []
734
+ "grouped_by_file": fd_rg_utils.group_matches_by_file(matches)[
735
+ "files"
736
+ ]
737
+ if matches
738
+ else [],
669
739
  }
670
740
 
671
741
  # Convert to JSON for file output
672
742
  import json
743
+
673
744
  json_content = json.dumps(file_content, indent=2, ensure_ascii=False)
674
745
 
675
746
  # Save to file
676
747
  saved_file_path = self.file_output_manager.save_to_file(
677
- content=json_content,
678
- base_name=output_file
748
+ content=json_content, base_name=output_file
679
749
  )
680
750
 
751
+ result["output_file"] = output_file
681
752
  result["output_file_path"] = saved_file_path
682
753
  result["file_saved"] = True
683
754
 
684
755
  logger.info(f"Search results saved to: {saved_file_path}")
685
756
 
757
+ # If suppress_output is True, return minimal response
758
+ if suppress_output:
759
+ minimal_result = {
760
+ "success": result.get("success", True),
761
+ "count": result.get("count", 0),
762
+ "output_file": output_file,
763
+ "file_saved": f"Results saved to {saved_file_path}",
764
+ }
765
+ # Cache the full result, not the minimal one
766
+ if self.cache and cache_key:
767
+ self.cache.set(cache_key, result)
768
+ return minimal_result
769
+
686
770
  except Exception as e:
687
771
  logger.error(f"Failed to save output to file: {e}")
688
772
  result["file_save_error"] = str(e)
689
773
  result["file_saved"] = False
690
-
691
- # Handle file output and suppression
692
- output_file = arguments.get("output_file")
693
- suppress_output = arguments.get("suppress_output", False)
694
-
695
- if output_file:
696
- # Save full result to file
697
- import json
698
- json_content = json.dumps(result, indent=2, ensure_ascii=False)
699
- file_path = self.file_output_manager.save_to_file(
700
- content=json_content,
701
- base_name=output_file
702
- )
703
-
704
- # If suppress_output is True, return minimal response
705
- if suppress_output:
706
- minimal_result = {
707
- "success": result.get("success", True),
708
- "count": result.get("count", 0),
709
- "output_file": output_file,
710
- "file_saved": f"Results saved to {file_path}"
711
- }
712
- # Cache the full result, not the minimal one
713
- if self.cache and cache_key:
714
- self.cache.set(cache_key, result)
715
- return minimal_result
716
- else:
717
- # Include file info in full response
718
- result["output_file"] = output_file
719
- result["file_saved"] = f"Results saved to {file_path}"
720
774
  elif suppress_output:
721
775
  # If suppress_output is True but no output_file, remove results from response
722
776
  result_copy = result.copy()
@@ -38,7 +38,7 @@ class TableFormatTool(BaseMCPTool):
38
38
  structured table output through the MCP protocol.
39
39
  """
40
40
 
41
- def __init__(self, project_root: str = None) -> None:
41
+ def __init__(self, project_root: str | None = None) -> None:
42
42
  """Initialize the table format tool."""
43
43
  super().__init__(project_root)
44
44
  self.analysis_engine = get_analysis_engine(project_root)
@@ -74,7 +74,12 @@ class TableFormatTool(BaseMCPTool):
74
74
  "format_type": {
75
75
  "type": "string",
76
76
  "description": "Table format type",
77
- "enum": list(set(FormatterRegistry.get_available_formats() + ["full", "compact", "csv", "json"])),
77
+ "enum": list(
78
+ set(
79
+ FormatterRegistry.get_available_formats()
80
+ + ["full", "compact", "csv", "json"]
81
+ )
82
+ ),
78
83
  "default": "full",
79
84
  },
80
85
  "language": {
@@ -124,11 +129,18 @@ class TableFormatTool(BaseMCPTool):
124
129
  format_type = arguments["format_type"]
125
130
  if not isinstance(format_type, str):
126
131
  raise ValueError("format_type must be a string")
127
-
132
+
128
133
  # Check both new FormatterRegistry formats and legacy formats
129
- available_formats = list(set(FormatterRegistry.get_available_formats() + ["full", "compact", "csv", "json"]))
134
+ available_formats = list(
135
+ set(
136
+ FormatterRegistry.get_available_formats()
137
+ + ["full", "compact", "csv", "json"]
138
+ )
139
+ )
130
140
  if format_type not in available_formats:
131
- raise ValueError(f"format_type must be one of: {', '.join(sorted(available_formats))}")
141
+ raise ValueError(
142
+ f"format_type must be one of: {', '.join(sorted(available_formats))}"
143
+ )
132
144
 
133
145
  # Validate language if provided
134
146
  if "language" in arguments:
@@ -454,7 +466,7 @@ class TableFormatTool(BaseMCPTool):
454
466
 
455
467
  # Always convert analysis result to dict for metadata extraction
456
468
  structure_dict = self._convert_analysis_result_to_dict(structure_result)
457
-
469
+
458
470
  # Try to use new FormatterRegistry first, fallback to legacy TableFormatter
459
471
  try:
460
472
  if FormatterRegistry.is_format_supported(format_type):
@@ -463,13 +475,15 @@ class TableFormatTool(BaseMCPTool):
463
475
  table_output = formatter.format(structure_result.elements)
464
476
  else:
465
477
  # Fallback to legacy TableFormatter for backward compatibility
466
- formatter = TableFormatter(format_type)
467
- table_output = formatter.format_structure(structure_dict)
478
+ formatter: Any = TableFormatter(format_type)
479
+ table_output = formatter.format_structure(structure_dict) # type: ignore[attr-defined]
468
480
  except Exception as e:
469
481
  # If FormatterRegistry fails, fallback to legacy TableFormatter
470
- logger.warning(f"FormatterRegistry failed, using legacy formatter: {e}")
471
- formatter = TableFormatter(format_type)
472
- table_output = formatter.format_structure(structure_dict)
482
+ logger.warning(
483
+ f"FormatterRegistry failed, using legacy formatter: {e}"
484
+ )
485
+ formatter: Any = TableFormatter(format_type)
486
+ table_output = formatter.format_structure(structure_dict) # type: ignore[attr-defined]
473
487
 
474
488
  # Ensure output format matches CLI exactly
475
489
  # Fix line ending differences: normalize to Unix-style LF (\n)
@@ -515,15 +529,14 @@ class TableFormatTool(BaseMCPTool):
515
529
 
516
530
  # Save to file with automatic extension detection
517
531
  saved_file_path = self.file_output_manager.save_to_file(
518
- content=table_output,
519
- base_name=base_name
532
+ content=table_output, base_name=base_name
520
533
  )
521
-
534
+
522
535
  result["output_file_path"] = saved_file_path
523
536
  result["file_saved"] = True
524
-
537
+
525
538
  self.logger.info(f"Analysis output saved to: {saved_file_path}")
526
-
539
+
527
540
  except Exception as e:
528
541
  self.logger.error(f"Failed to save output to file: {e}")
529
542
  result["file_save_error"] = str(e)