tree-sitter-analyzer 1.7.4__py3-none-any.whl → 1.7.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tree-sitter-analyzer might be problematic. Click here for more details.
- tree_sitter_analyzer/__init__.py +1 -1
- tree_sitter_analyzer/api.py +3 -2
- tree_sitter_analyzer/exceptions.py +334 -0
- tree_sitter_analyzer/file_handler.py +16 -1
- tree_sitter_analyzer/interfaces/mcp_server.py +3 -1
- tree_sitter_analyzer/language_detector.py +12 -1
- tree_sitter_analyzer/languages/markdown_plugin.py +22 -0
- tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +68 -3
- tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +32 -7
- tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +10 -0
- tree_sitter_analyzer/mcp/tools/list_files_tool.py +9 -0
- tree_sitter_analyzer/mcp/tools/query_tool.py +9 -2
- tree_sitter_analyzer/mcp/tools/read_partial_tool.py +98 -14
- tree_sitter_analyzer/mcp/tools/search_content_tool.py +9 -0
- tree_sitter_analyzer/mcp/tools/table_format_tool.py +13 -3
- tree_sitter_analyzer/security/validator.py +168 -9
- {tree_sitter_analyzer-1.7.4.dist-info → tree_sitter_analyzer-1.7.7.dist-info}/METADATA +44 -35
- {tree_sitter_analyzer-1.7.4.dist-info → tree_sitter_analyzer-1.7.7.dist-info}/RECORD +20 -20
- {tree_sitter_analyzer-1.7.4.dist-info → tree_sitter_analyzer-1.7.7.dist-info}/WHEEL +0 -0
- {tree_sitter_analyzer-1.7.4.dist-info → tree_sitter_analyzer-1.7.7.dist-info}/entry_points.txt +0 -0
tree_sitter_analyzer/__init__.py
CHANGED
tree_sitter_analyzer/api.py
CHANGED
|
@@ -11,6 +11,7 @@ import logging
|
|
|
11
11
|
from pathlib import Path
|
|
12
12
|
from typing import Any
|
|
13
13
|
|
|
14
|
+
from . import __version__
|
|
14
15
|
from .core.engine import AnalysisEngine
|
|
15
16
|
from .utils import log_error
|
|
16
17
|
|
|
@@ -500,7 +501,7 @@ def get_framework_info() -> dict[str, Any]:
|
|
|
500
501
|
|
|
501
502
|
return {
|
|
502
503
|
"name": "tree-sitter-analyzer",
|
|
503
|
-
"version":
|
|
504
|
+
"version": __version__,
|
|
504
505
|
"supported_languages": engine.get_supported_languages(),
|
|
505
506
|
"total_languages": len(engine.get_supported_languages()),
|
|
506
507
|
"plugin_info": {
|
|
@@ -521,7 +522,7 @@ def get_framework_info() -> dict[str, Any]:
|
|
|
521
522
|
}
|
|
522
523
|
except Exception as e:
|
|
523
524
|
log_error(f"Failed to get framework info: {e}")
|
|
524
|
-
return {"name": "tree-sitter-analyzer", "version":
|
|
525
|
+
return {"name": "tree-sitter-analyzer", "version": __version__, "error": str(e)}
|
|
525
526
|
|
|
526
527
|
|
|
527
528
|
def execute_query(
|
|
@@ -398,3 +398,337 @@ class RegexSecurityError(SecurityError):
|
|
|
398
398
|
)
|
|
399
399
|
self.pattern = pattern
|
|
400
400
|
self.dangerous_construct = dangerous_construct
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
# MCP-specific exceptions for enhanced error handling
|
|
404
|
+
class MCPToolError(MCPError):
|
|
405
|
+
"""Raised when MCP tool execution fails."""
|
|
406
|
+
|
|
407
|
+
def __init__(
|
|
408
|
+
self,
|
|
409
|
+
message: str,
|
|
410
|
+
tool_name: str | None = None,
|
|
411
|
+
input_params: dict[str, Any] | None = None,
|
|
412
|
+
execution_stage: str | None = None,
|
|
413
|
+
**kwargs: Any,
|
|
414
|
+
) -> None:
|
|
415
|
+
context = kwargs.get("context", {})
|
|
416
|
+
if input_params:
|
|
417
|
+
# Sanitize sensitive information from input params
|
|
418
|
+
sanitized_params = self._sanitize_params(input_params)
|
|
419
|
+
context["input_params"] = sanitized_params
|
|
420
|
+
if execution_stage:
|
|
421
|
+
context["execution_stage"] = execution_stage
|
|
422
|
+
|
|
423
|
+
super().__init__(message, tool_name=tool_name, context=context, **kwargs)
|
|
424
|
+
self.input_params = input_params
|
|
425
|
+
self.execution_stage = execution_stage
|
|
426
|
+
|
|
427
|
+
@staticmethod
|
|
428
|
+
def _sanitize_params(params: dict[str, Any]) -> dict[str, Any]:
|
|
429
|
+
"""Sanitize sensitive information from parameters."""
|
|
430
|
+
sanitized = {}
|
|
431
|
+
sensitive_keys = {"password", "token", "key", "secret", "auth", "credential"}
|
|
432
|
+
|
|
433
|
+
for key, value in params.items():
|
|
434
|
+
if any(sensitive in key.lower() for sensitive in sensitive_keys):
|
|
435
|
+
sanitized[key] = "***REDACTED***"
|
|
436
|
+
elif isinstance(value, str) and len(value) > 100:
|
|
437
|
+
sanitized[key] = value[:100] + "...[TRUNCATED]"
|
|
438
|
+
else:
|
|
439
|
+
sanitized[key] = value
|
|
440
|
+
|
|
441
|
+
return sanitized
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
class MCPResourceError(MCPError):
|
|
445
|
+
"""Raised when MCP resource access fails."""
|
|
446
|
+
|
|
447
|
+
def __init__(
|
|
448
|
+
self,
|
|
449
|
+
message: str,
|
|
450
|
+
resource_uri: str | None = None,
|
|
451
|
+
resource_type: str | None = None,
|
|
452
|
+
access_mode: str | None = None,
|
|
453
|
+
**kwargs: Any,
|
|
454
|
+
) -> None:
|
|
455
|
+
context = kwargs.get("context", {})
|
|
456
|
+
if resource_type:
|
|
457
|
+
context["resource_type"] = resource_type
|
|
458
|
+
if access_mode:
|
|
459
|
+
context["access_mode"] = access_mode
|
|
460
|
+
|
|
461
|
+
super().__init__(message, resource_uri=resource_uri, context=context, **kwargs)
|
|
462
|
+
self.resource_type = resource_type
|
|
463
|
+
self.access_mode = access_mode
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
class MCPTimeoutError(MCPError):
|
|
467
|
+
"""Raised when MCP operation times out."""
|
|
468
|
+
|
|
469
|
+
def __init__(
|
|
470
|
+
self,
|
|
471
|
+
message: str,
|
|
472
|
+
timeout_seconds: float | None = None,
|
|
473
|
+
operation_type: str | None = None,
|
|
474
|
+
**kwargs: Any,
|
|
475
|
+
) -> None:
|
|
476
|
+
context = kwargs.get("context", {})
|
|
477
|
+
if timeout_seconds:
|
|
478
|
+
context["timeout_seconds"] = timeout_seconds
|
|
479
|
+
if operation_type:
|
|
480
|
+
context["operation_type"] = operation_type
|
|
481
|
+
|
|
482
|
+
super().__init__(message, context=context, **kwargs)
|
|
483
|
+
self.timeout_seconds = timeout_seconds
|
|
484
|
+
self.operation_type = operation_type
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
class MCPValidationError(ValidationError):
|
|
488
|
+
"""Raised when MCP input validation fails."""
|
|
489
|
+
|
|
490
|
+
def __init__(
|
|
491
|
+
self,
|
|
492
|
+
message: str,
|
|
493
|
+
tool_name: str | None = None,
|
|
494
|
+
parameter_name: str | None = None,
|
|
495
|
+
parameter_value: Any | None = None,
|
|
496
|
+
validation_rule: str | None = None,
|
|
497
|
+
**kwargs: Any,
|
|
498
|
+
) -> None:
|
|
499
|
+
context = kwargs.get("context", {})
|
|
500
|
+
if tool_name:
|
|
501
|
+
context["tool_name"] = tool_name
|
|
502
|
+
if parameter_name:
|
|
503
|
+
context["parameter_name"] = parameter_name
|
|
504
|
+
if validation_rule:
|
|
505
|
+
context["validation_rule"] = validation_rule
|
|
506
|
+
|
|
507
|
+
# Sanitize parameter value for logging
|
|
508
|
+
if parameter_value is not None:
|
|
509
|
+
if isinstance(parameter_value, str) and len(parameter_value) > 200:
|
|
510
|
+
context["parameter_value"] = parameter_value[:200] + "...[TRUNCATED]"
|
|
511
|
+
else:
|
|
512
|
+
context["parameter_value"] = parameter_value
|
|
513
|
+
|
|
514
|
+
super().__init__(message, validation_type="mcp_parameter", context=context, **kwargs)
|
|
515
|
+
self.tool_name = tool_name
|
|
516
|
+
self.parameter_name = parameter_name
|
|
517
|
+
self.validation_rule = validation_rule
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
class FileRestrictionError(SecurityError):
|
|
521
|
+
"""Raised when file access is restricted by mode or security policy."""
|
|
522
|
+
|
|
523
|
+
def __init__(
|
|
524
|
+
self,
|
|
525
|
+
message: str,
|
|
526
|
+
file_path: str | Path | None = None,
|
|
527
|
+
current_mode: str | None = None,
|
|
528
|
+
allowed_patterns: list[str] | None = None,
|
|
529
|
+
**kwargs: Any,
|
|
530
|
+
) -> None:
|
|
531
|
+
context = kwargs.get("context", {})
|
|
532
|
+
if current_mode:
|
|
533
|
+
context["current_mode"] = current_mode
|
|
534
|
+
if allowed_patterns:
|
|
535
|
+
context["allowed_patterns"] = allowed_patterns
|
|
536
|
+
|
|
537
|
+
super().__init__(
|
|
538
|
+
message,
|
|
539
|
+
security_type="file_restriction",
|
|
540
|
+
file_path=file_path,
|
|
541
|
+
context=context,
|
|
542
|
+
**kwargs
|
|
543
|
+
)
|
|
544
|
+
self.current_mode = current_mode
|
|
545
|
+
self.allowed_patterns = allowed_patterns
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
# Enhanced error response utilities for MCP
|
|
549
|
+
def create_mcp_error_response(
|
|
550
|
+
exception: Exception,
|
|
551
|
+
tool_name: str | None = None,
|
|
552
|
+
include_debug_info: bool = False,
|
|
553
|
+
sanitize_sensitive: bool = True,
|
|
554
|
+
) -> dict[str, Any]:
|
|
555
|
+
"""
|
|
556
|
+
Create standardized MCP error response dictionary.
|
|
557
|
+
|
|
558
|
+
Args:
|
|
559
|
+
exception: The exception to convert
|
|
560
|
+
tool_name: Name of the MCP tool that failed
|
|
561
|
+
include_debug_info: Whether to include debug information
|
|
562
|
+
sanitize_sensitive: Whether to sanitize sensitive information
|
|
563
|
+
|
|
564
|
+
Returns:
|
|
565
|
+
MCP-compliant error response dictionary
|
|
566
|
+
"""
|
|
567
|
+
import traceback
|
|
568
|
+
|
|
569
|
+
response: dict[str, Any] = {
|
|
570
|
+
"success": False,
|
|
571
|
+
"error": {
|
|
572
|
+
"type": exception.__class__.__name__,
|
|
573
|
+
"message": str(exception),
|
|
574
|
+
"timestamp": __import__("datetime").datetime.utcnow().isoformat() + "Z"
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
# Add tool name if provided
|
|
579
|
+
if tool_name:
|
|
580
|
+
response["error"]["tool"] = tool_name
|
|
581
|
+
|
|
582
|
+
# Add context if available
|
|
583
|
+
if hasattr(exception, "context") and exception.context:
|
|
584
|
+
context = exception.context.copy()
|
|
585
|
+
|
|
586
|
+
# Sanitize sensitive information if requested
|
|
587
|
+
if sanitize_sensitive:
|
|
588
|
+
context = _sanitize_error_context(context)
|
|
589
|
+
|
|
590
|
+
response["error"]["context"] = context
|
|
591
|
+
|
|
592
|
+
# Add error code if available
|
|
593
|
+
if hasattr(exception, "error_code"):
|
|
594
|
+
response["error"]["code"] = exception.error_code
|
|
595
|
+
|
|
596
|
+
# Add debug information if requested
|
|
597
|
+
if include_debug_info:
|
|
598
|
+
response["error"]["debug"] = {
|
|
599
|
+
"traceback": traceback.format_exc(),
|
|
600
|
+
"exception_args": list(exception.args) if exception.args else []
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
# Add specific error details for known exception types
|
|
604
|
+
if isinstance(exception, MCPToolError):
|
|
605
|
+
response["error"]["execution_stage"] = exception.execution_stage
|
|
606
|
+
elif isinstance(exception, MCPTimeoutError):
|
|
607
|
+
response["error"]["timeout_seconds"] = exception.timeout_seconds
|
|
608
|
+
elif isinstance(exception, FileRestrictionError):
|
|
609
|
+
response["error"]["current_mode"] = exception.current_mode
|
|
610
|
+
response["error"]["allowed_patterns"] = exception.allowed_patterns
|
|
611
|
+
|
|
612
|
+
return response
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
def _sanitize_error_context(context: dict[str, Any]) -> dict[str, Any]:
|
|
616
|
+
"""Sanitize sensitive information from error context."""
|
|
617
|
+
sanitized = {}
|
|
618
|
+
sensitive_keys = {
|
|
619
|
+
"password", "token", "key", "secret", "auth", "credential",
|
|
620
|
+
"api_key", "access_token", "private_key", "session_id"
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
for key, value in context.items():
|
|
624
|
+
if any(sensitive in key.lower() for sensitive in sensitive_keys):
|
|
625
|
+
sanitized[key] = "***REDACTED***"
|
|
626
|
+
elif isinstance(value, str) and len(value) > 500:
|
|
627
|
+
sanitized[key] = value[:500] + "...[TRUNCATED]"
|
|
628
|
+
elif isinstance(value, (list, tuple)) and len(value) > 10:
|
|
629
|
+
sanitized[key] = list(value[:10]) + ["...[TRUNCATED]"]
|
|
630
|
+
elif isinstance(value, dict) and len(value) > 20:
|
|
631
|
+
# Recursively sanitize nested dictionaries
|
|
632
|
+
truncated_dict = dict(list(value.items())[:20])
|
|
633
|
+
sanitized[key] = _sanitize_error_context(truncated_dict)
|
|
634
|
+
sanitized[key]["__truncated__"] = True
|
|
635
|
+
else:
|
|
636
|
+
sanitized[key] = value
|
|
637
|
+
|
|
638
|
+
return sanitized
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
# Async exception handling utilities for MCP tools
|
|
642
|
+
async def safe_execute_async(
|
|
643
|
+
coro: Any,
|
|
644
|
+
default_return: Any = None,
|
|
645
|
+
exception_types: tuple[type[Exception], ...] = (Exception,),
|
|
646
|
+
log_errors: bool = True,
|
|
647
|
+
tool_name: str | None = None,
|
|
648
|
+
) -> Any:
|
|
649
|
+
"""
|
|
650
|
+
Safely execute an async function with exception handling.
|
|
651
|
+
|
|
652
|
+
Args:
|
|
653
|
+
coro: Coroutine to execute
|
|
654
|
+
default_return: Value to return on exception
|
|
655
|
+
exception_types: Exception types to catch
|
|
656
|
+
log_errors: Whether to log errors
|
|
657
|
+
tool_name: Name of the tool for error context
|
|
658
|
+
|
|
659
|
+
Returns:
|
|
660
|
+
Coroutine result or default_return on exception
|
|
661
|
+
"""
|
|
662
|
+
try:
|
|
663
|
+
return await coro
|
|
664
|
+
except exception_types as e:
|
|
665
|
+
if log_errors:
|
|
666
|
+
from .utils import log_error
|
|
667
|
+
|
|
668
|
+
error_context = {"tool_name": tool_name} if tool_name else {}
|
|
669
|
+
log_error(f"Async execution failed: {e}", extra=error_context)
|
|
670
|
+
|
|
671
|
+
return default_return
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
def mcp_exception_handler(
|
|
675
|
+
tool_name: str,
|
|
676
|
+
include_debug: bool = False,
|
|
677
|
+
sanitize_sensitive: bool = True,
|
|
678
|
+
) -> Any:
|
|
679
|
+
"""
|
|
680
|
+
Decorator for MCP tool exception handling.
|
|
681
|
+
|
|
682
|
+
Args:
|
|
683
|
+
tool_name: Name of the MCP tool
|
|
684
|
+
include_debug: Whether to include debug information
|
|
685
|
+
sanitize_sensitive: Whether to sanitize sensitive information
|
|
686
|
+
"""
|
|
687
|
+
def decorator(func: Any) -> Any:
|
|
688
|
+
async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
689
|
+
try:
|
|
690
|
+
return await func(*args, **kwargs)
|
|
691
|
+
except Exception as e:
|
|
692
|
+
from .utils import log_error
|
|
693
|
+
|
|
694
|
+
# Log the error with tool context
|
|
695
|
+
log_error(
|
|
696
|
+
f"MCP tool '{tool_name}' failed: {e}",
|
|
697
|
+
extra={"tool_name": tool_name, "exception_type": type(e).__name__}
|
|
698
|
+
)
|
|
699
|
+
|
|
700
|
+
# Return standardized error response
|
|
701
|
+
return create_mcp_error_response(
|
|
702
|
+
e,
|
|
703
|
+
tool_name=tool_name,
|
|
704
|
+
include_debug_info=include_debug,
|
|
705
|
+
sanitize_sensitive=sanitize_sensitive
|
|
706
|
+
)
|
|
707
|
+
|
|
708
|
+
def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
709
|
+
try:
|
|
710
|
+
return func(*args, **kwargs)
|
|
711
|
+
except Exception as e:
|
|
712
|
+
from .utils import log_error
|
|
713
|
+
|
|
714
|
+
# Log the error with tool context
|
|
715
|
+
log_error(
|
|
716
|
+
f"MCP tool '{tool_name}' failed: {e}",
|
|
717
|
+
extra={"tool_name": tool_name, "exception_type": type(e).__name__}
|
|
718
|
+
)
|
|
719
|
+
|
|
720
|
+
# Return standardized error response
|
|
721
|
+
return create_mcp_error_response(
|
|
722
|
+
e,
|
|
723
|
+
tool_name=tool_name,
|
|
724
|
+
include_debug_info=include_debug,
|
|
725
|
+
sanitize_sensitive=sanitize_sensitive
|
|
726
|
+
)
|
|
727
|
+
|
|
728
|
+
# Return appropriate wrapper based on function type
|
|
729
|
+
if __import__("asyncio").iscoroutinefunction(func):
|
|
730
|
+
return async_wrapper
|
|
731
|
+
else:
|
|
732
|
+
return sync_wrapper
|
|
733
|
+
|
|
734
|
+
return decorator
|
|
@@ -8,7 +8,22 @@ This module provides file reading functionality with encoding detection and fall
|
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
|
|
10
10
|
from .encoding_utils import read_file_safe
|
|
11
|
-
from .utils import
|
|
11
|
+
from .utils import setup_logger
|
|
12
|
+
|
|
13
|
+
# Set up logger for this module
|
|
14
|
+
logger = setup_logger(__name__)
|
|
15
|
+
|
|
16
|
+
def log_error(message: str, *args, **kwargs) -> None:
|
|
17
|
+
"""Log error message"""
|
|
18
|
+
logger.error(message, *args, **kwargs)
|
|
19
|
+
|
|
20
|
+
def log_info(message: str, *args, **kwargs) -> None:
|
|
21
|
+
"""Log info message"""
|
|
22
|
+
logger.info(message, *args, **kwargs)
|
|
23
|
+
|
|
24
|
+
def log_warning(message: str, *args, **kwargs) -> None:
|
|
25
|
+
"""Log warning message"""
|
|
26
|
+
logger.warning(message, *args, **kwargs)
|
|
12
27
|
|
|
13
28
|
|
|
14
29
|
def detect_language_from_extension(file_path: str) -> str:
|
|
@@ -12,6 +12,8 @@ import logging
|
|
|
12
12
|
import sys
|
|
13
13
|
from typing import Any
|
|
14
14
|
|
|
15
|
+
from .. import __version__
|
|
16
|
+
|
|
15
17
|
try:
|
|
16
18
|
from mcp.server import Server
|
|
17
19
|
from mcp.server.models import InitializationOptions
|
|
@@ -68,7 +70,7 @@ class TreeSitterAnalyzerMCPServer:
|
|
|
68
70
|
|
|
69
71
|
self.server: Server | None = None
|
|
70
72
|
self.name = "tree-sitter-analyzer"
|
|
71
|
-
self.version =
|
|
73
|
+
self.version = __version__
|
|
72
74
|
|
|
73
75
|
log_info(f"Initializing {self.name} v{self.version}")
|
|
74
76
|
|
|
@@ -66,6 +66,10 @@ class LanguageDetector:
|
|
|
66
66
|
".mkd": "markdown",
|
|
67
67
|
".mkdn": "markdown",
|
|
68
68
|
".mdx": "markdown",
|
|
69
|
+
# JSON系
|
|
70
|
+
".json": "json",
|
|
71
|
+
".jsonc": "json",
|
|
72
|
+
".json5": "json",
|
|
69
73
|
}
|
|
70
74
|
|
|
71
75
|
# Ambiguous extensions (map to multiple languages)
|
|
@@ -100,6 +104,7 @@ class LanguageDetector:
|
|
|
100
104
|
"rust",
|
|
101
105
|
"go",
|
|
102
106
|
"markdown",
|
|
107
|
+
"json",
|
|
103
108
|
}
|
|
104
109
|
|
|
105
110
|
def __init__(self) -> None:
|
|
@@ -143,6 +148,10 @@ class LanguageDetector:
|
|
|
143
148
|
".mkd": ("markdown", 0.8),
|
|
144
149
|
".mkdn": ("markdown", 0.8),
|
|
145
150
|
".mdx": ("markdown", 0.7), # MDX might be mixed with JSX
|
|
151
|
+
# JSON extensions
|
|
152
|
+
".json": ("json", 0.9),
|
|
153
|
+
".jsonc": ("json", 0.8), # JSON with comments
|
|
154
|
+
".json5": ("json", 0.8), # JSON5 format
|
|
146
155
|
}
|
|
147
156
|
|
|
148
157
|
# Content-based detection patterns
|
|
@@ -412,7 +421,9 @@ def detect_language_from_file(file_path: str) -> str:
|
|
|
412
421
|
Returns:
|
|
413
422
|
Detected language name
|
|
414
423
|
"""
|
|
415
|
-
|
|
424
|
+
# Create a fresh instance to ensure latest configuration
|
|
425
|
+
fresh_detector = LanguageDetector()
|
|
426
|
+
return fresh_detector.detect_from_extension(file_path)
|
|
416
427
|
|
|
417
428
|
|
|
418
429
|
def is_language_supported(language: str) -> bool:
|
|
@@ -184,6 +184,17 @@ class MarkdownElementExtractor(ElementExtractor):
|
|
|
184
184
|
log_debug(f"Error during link extraction: {e}")
|
|
185
185
|
return []
|
|
186
186
|
|
|
187
|
+
# 重複除去: 同じtextとurlを持つ要素を除去
|
|
188
|
+
seen = set()
|
|
189
|
+
unique_links = []
|
|
190
|
+
for link in links:
|
|
191
|
+
key = (getattr(link, 'text', '') or "", getattr(link, 'url', '') or "")
|
|
192
|
+
if key not in seen:
|
|
193
|
+
seen.add(key)
|
|
194
|
+
unique_links.append(link)
|
|
195
|
+
|
|
196
|
+
links = unique_links
|
|
197
|
+
|
|
187
198
|
log_debug(f"Extracted {len(links)} Markdown links")
|
|
188
199
|
return links
|
|
189
200
|
|
|
@@ -209,6 +220,17 @@ class MarkdownElementExtractor(ElementExtractor):
|
|
|
209
220
|
log_debug(f"Error during image extraction: {e}")
|
|
210
221
|
return []
|
|
211
222
|
|
|
223
|
+
# 重複除去: 同じalt_textとurlを持つ要素を除去
|
|
224
|
+
seen = set()
|
|
225
|
+
unique_images = []
|
|
226
|
+
for img in images:
|
|
227
|
+
key = (img.alt_text or "", img.url or "")
|
|
228
|
+
if key not in seen:
|
|
229
|
+
seen.add(key)
|
|
230
|
+
unique_images.append(img)
|
|
231
|
+
|
|
232
|
+
images = unique_images
|
|
233
|
+
|
|
212
234
|
log_debug(f"Extracted {len(images)} Markdown images")
|
|
213
235
|
return images
|
|
214
236
|
|
|
@@ -379,19 +379,27 @@ class AnalyzeScaleTool(BaseMCPTool):
|
|
|
379
379
|
include_details = arguments.get("include_details", False)
|
|
380
380
|
include_guidance = arguments.get("include_guidance", True)
|
|
381
381
|
|
|
382
|
+
# Security validation BEFORE path resolution to catch symlinks
|
|
383
|
+
is_valid, error_msg = self.security_validator.validate_file_path(file_path)
|
|
384
|
+
if not is_valid:
|
|
385
|
+
logger.warning(
|
|
386
|
+
f"Security validation failed for file path: {file_path} - {error_msg}"
|
|
387
|
+
)
|
|
388
|
+
raise ValueError(f"Invalid file path: {error_msg}")
|
|
389
|
+
|
|
382
390
|
# Resolve file path to absolute path
|
|
383
391
|
resolved_file_path = self.path_resolver.resolve(file_path)
|
|
384
392
|
logger.info(f"Analyzing file: {file_path} (resolved to: {resolved_file_path})")
|
|
385
393
|
|
|
386
|
-
#
|
|
394
|
+
# Additional security validation on resolved path
|
|
387
395
|
is_valid, error_msg = self.security_validator.validate_file_path(
|
|
388
396
|
resolved_file_path
|
|
389
397
|
)
|
|
390
398
|
if not is_valid:
|
|
391
399
|
logger.warning(
|
|
392
|
-
f"Security validation failed for
|
|
400
|
+
f"Security validation failed for resolved path: {resolved_file_path} - {error_msg}"
|
|
393
401
|
)
|
|
394
|
-
raise ValueError(f"Invalid
|
|
402
|
+
raise ValueError(f"Invalid resolved path: {error_msg}")
|
|
395
403
|
|
|
396
404
|
# Sanitize inputs
|
|
397
405
|
if language:
|
|
@@ -423,6 +431,12 @@ class AnalyzeScaleTool(BaseMCPTool):
|
|
|
423
431
|
# Calculate basic file metrics
|
|
424
432
|
file_metrics = self._calculate_file_metrics(resolved_file_path)
|
|
425
433
|
|
|
434
|
+
# Handle JSON files specially - they don't need structural analysis
|
|
435
|
+
if language == "json":
|
|
436
|
+
return self._create_json_file_analysis(
|
|
437
|
+
resolved_file_path, file_metrics, include_guidance
|
|
438
|
+
)
|
|
439
|
+
|
|
426
440
|
# Use appropriate analyzer based on language
|
|
427
441
|
if language == "java":
|
|
428
442
|
# Use AdvancedAnalyzer for comprehensive analysis
|
|
@@ -472,6 +486,7 @@ class AnalyzeScaleTool(BaseMCPTool):
|
|
|
472
486
|
|
|
473
487
|
# Build enhanced result structure
|
|
474
488
|
result = {
|
|
489
|
+
"success": True,
|
|
475
490
|
"file_path": file_path,
|
|
476
491
|
"language": language,
|
|
477
492
|
"file_metrics": file_metrics,
|
|
@@ -688,6 +703,56 @@ class AnalyzeScaleTool(BaseMCPTool):
|
|
|
688
703
|
|
|
689
704
|
return True
|
|
690
705
|
|
|
706
|
+
def _create_json_file_analysis(
|
|
707
|
+
self, file_path: str, file_metrics: dict[str, Any], include_guidance: bool
|
|
708
|
+
) -> dict[str, Any]:
|
|
709
|
+
"""
|
|
710
|
+
Create analysis result for JSON files.
|
|
711
|
+
|
|
712
|
+
Args:
|
|
713
|
+
file_path: Path to the JSON file
|
|
714
|
+
file_metrics: Basic file metrics
|
|
715
|
+
include_guidance: Whether to include guidance
|
|
716
|
+
|
|
717
|
+
Returns:
|
|
718
|
+
Analysis result for JSON file
|
|
719
|
+
"""
|
|
720
|
+
result = {
|
|
721
|
+
"success": True,
|
|
722
|
+
"file_path": file_path,
|
|
723
|
+
"language": "json",
|
|
724
|
+
"file_size_bytes": file_metrics["file_size_bytes"],
|
|
725
|
+
"total_lines": file_metrics["total_lines"],
|
|
726
|
+
"non_empty_lines": file_metrics["total_lines"] - file_metrics["blank_lines"],
|
|
727
|
+
"estimated_tokens": file_metrics["estimated_tokens"],
|
|
728
|
+
"complexity_metrics": {
|
|
729
|
+
"total_elements": 0,
|
|
730
|
+
"max_depth": 0,
|
|
731
|
+
"avg_complexity": 0.0,
|
|
732
|
+
},
|
|
733
|
+
"structural_overview": {
|
|
734
|
+
"classes": [],
|
|
735
|
+
"methods": [],
|
|
736
|
+
"fields": [],
|
|
737
|
+
},
|
|
738
|
+
"scale_category": "small" if file_metrics["total_lines"] < 100 else "medium" if file_metrics["total_lines"] < 1000 else "large",
|
|
739
|
+
"analysis_recommendations": {
|
|
740
|
+
"suitable_for_full_analysis": file_metrics["total_lines"] < 1000,
|
|
741
|
+
"recommended_approach": "JSON files are configuration/data files - structural analysis not applicable",
|
|
742
|
+
"token_efficiency_notes": "JSON files can be read directly without tree-sitter parsing",
|
|
743
|
+
},
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
if include_guidance:
|
|
747
|
+
result["llm_analysis_guidance"] = {
|
|
748
|
+
"file_characteristics": "JSON configuration/data file",
|
|
749
|
+
"recommended_workflow": "Direct file reading for content analysis",
|
|
750
|
+
"token_optimization": "Use simple file reading tools for JSON content",
|
|
751
|
+
"analysis_focus": "Data structure and configuration values",
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
return result
|
|
755
|
+
|
|
691
756
|
def get_tool_definition(self) -> dict[str, Any]:
|
|
692
757
|
"""
|
|
693
758
|
Get the MCP tool definition for check_code_scale.
|
|
@@ -11,6 +11,7 @@ from __future__ import annotations
|
|
|
11
11
|
import asyncio
|
|
12
12
|
import json
|
|
13
13
|
import os
|
|
14
|
+
import shutil
|
|
14
15
|
import tempfile
|
|
15
16
|
from dataclasses import dataclass
|
|
16
17
|
from pathlib import Path
|
|
@@ -27,6 +28,21 @@ DEFAULT_RG_TIMEOUT_MS = 4000
|
|
|
27
28
|
RG_TIMEOUT_HARD_CAP_MS = 30000
|
|
28
29
|
|
|
29
30
|
|
|
31
|
+
def check_external_command(command: str) -> bool:
|
|
32
|
+
"""Check if an external command is available in the system PATH."""
|
|
33
|
+
return shutil.which(command) is not None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_missing_commands() -> list[str]:
|
|
37
|
+
"""Get list of missing external commands required by fd/rg tools."""
|
|
38
|
+
missing = []
|
|
39
|
+
if not check_external_command("fd"):
|
|
40
|
+
missing.append("fd")
|
|
41
|
+
if not check_external_command("rg"):
|
|
42
|
+
missing.append("rg")
|
|
43
|
+
return missing
|
|
44
|
+
|
|
45
|
+
|
|
30
46
|
def clamp_int(value: int | None, default_value: int, hard_cap: int) -> int:
|
|
31
47
|
if value is None:
|
|
32
48
|
return default_value
|
|
@@ -64,13 +80,22 @@ async def run_command_capture(
|
|
|
64
80
|
Returns (returncode, stdout, stderr). On timeout, kills process and returns 124.
|
|
65
81
|
Separated into a util for easy monkeypatching in tests.
|
|
66
82
|
"""
|
|
67
|
-
#
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
83
|
+
# Check if command exists before attempting to run
|
|
84
|
+
if cmd and not check_external_command(cmd[0]):
|
|
85
|
+
error_msg = f"Command '{cmd[0]}' not found in PATH. Please install {cmd[0]} to use this functionality."
|
|
86
|
+
return 127, b"", error_msg.encode()
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
# Create process
|
|
90
|
+
proc = await asyncio.create_subprocess_exec(
|
|
91
|
+
*cmd,
|
|
92
|
+
stdin=asyncio.subprocess.PIPE if input_data is not None else None,
|
|
93
|
+
stdout=asyncio.subprocess.PIPE,
|
|
94
|
+
stderr=asyncio.subprocess.PIPE,
|
|
95
|
+
)
|
|
96
|
+
except FileNotFoundError as e:
|
|
97
|
+
error_msg = f"Command '{cmd[0]}' not found: {e}"
|
|
98
|
+
return 127, b"", error_msg.encode()
|
|
74
99
|
|
|
75
100
|
# Compute timeout seconds
|
|
76
101
|
timeout_s: float | None = None
|
|
@@ -239,6 +239,16 @@ class FindAndGrepTool(BaseMCPTool):
|
|
|
239
239
|
|
|
240
240
|
@handle_mcp_errors("find_and_grep")
|
|
241
241
|
async def execute(self, arguments: dict[str, Any]) -> dict[str, Any]:
|
|
242
|
+
# Check if both fd and rg commands are available
|
|
243
|
+
missing_commands = fd_rg_utils.get_missing_commands()
|
|
244
|
+
if missing_commands:
|
|
245
|
+
return {
|
|
246
|
+
"success": False,
|
|
247
|
+
"error": f"Required commands not found: {', '.join(missing_commands)}. Please install fd (https://github.com/sharkdp/fd) and ripgrep (https://github.com/BurntSushi/ripgrep) to use this tool.",
|
|
248
|
+
"count": 0,
|
|
249
|
+
"results": []
|
|
250
|
+
}
|
|
251
|
+
|
|
242
252
|
self.validate_arguments(arguments)
|
|
243
253
|
roots = self._validate_roots(arguments["roots"]) # absolute validated
|
|
244
254
|
|