spidershield 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. spidershield-0.3.0/.dockerignore +16 -0
  2. spidershield-0.3.0/.github/workflows/ci.yml +47 -0
  3. spidershield-0.3.0/.github/workflows/publish.yml +29 -0
  4. spidershield-0.3.0/.github/workflows/scan.yml +55 -0
  5. spidershield-0.3.0/.github/workflows/weekly-scan.yml +66 -0
  6. spidershield-0.3.0/.gitignore +36 -0
  7. spidershield-0.3.0/CLAUDE.md +123 -0
  8. spidershield-0.3.0/CURATION-REPORT.md +68 -0
  9. spidershield-0.3.0/Dockerfile +18 -0
  10. spidershield-0.3.0/LICENSE +21 -0
  11. spidershield-0.3.0/MCP-SECURITY-REPORT.md +132 -0
  12. spidershield-0.3.0/PKG-INFO +289 -0
  13. spidershield-0.3.0/README.md +258 -0
  14. spidershield-0.3.0/ROADMAP.md +39 -0
  15. spidershield-0.3.0/action.yml +65 -0
  16. spidershield-0.3.0/batch-rewrites/rewrite-fetch.json +7 -0
  17. spidershield-0.3.0/batch-rewrites/rewrite-git.json +62 -0
  18. spidershield-0.3.0/batch-rewrites/rewrite-memory.json +47 -0
  19. spidershield-0.3.0/batch-scan-results.json +970 -0
  20. spidershield-0.3.0/examples/insecure-server/server.py +35 -0
  21. spidershield-0.3.0/examples/secure-server/server.py +46 -0
  22. spidershield-0.3.0/pyproject.toml +58 -0
  23. spidershield-0.3.0/rewrite-llm-filesystem.json +72 -0
  24. spidershield-0.3.0/rewrites-filesystem.json +72 -0
  25. spidershield-0.3.0/scripts/teeshield_to_spiderrating.py +103 -0
  26. spidershield-0.3.0/src/spidershield/__init__.py +166 -0
  27. spidershield-0.3.0/src/spidershield/__main__.py +5 -0
  28. spidershield-0.3.0/src/spidershield/adapters/__init__.py +17 -0
  29. spidershield-0.3.0/src/spidershield/adapters/base.py +144 -0
  30. spidershield-0.3.0/src/spidershield/adapters/mcp_proxy.py +221 -0
  31. spidershield-0.3.0/src/spidershield/adapters/standalone.py +203 -0
  32. spidershield-0.3.0/src/spidershield/agent/__init__.py +23 -0
  33. spidershield-0.3.0/src/spidershield/agent/allowlist.py +56 -0
  34. spidershield-0.3.0/src/spidershield/agent/fixer.py +263 -0
  35. spidershield-0.3.0/src/spidershield/agent/issue_codes.py +268 -0
  36. spidershield-0.3.0/src/spidershield/agent/models.py +115 -0
  37. spidershield-0.3.0/src/spidershield/agent/pinning.py +247 -0
  38. spidershield-0.3.0/src/spidershield/agent/report.py +173 -0
  39. spidershield-0.3.0/src/spidershield/agent/sarif.py +248 -0
  40. spidershield-0.3.0/src/spidershield/agent/scanner.py +408 -0
  41. spidershield-0.3.0/src/spidershield/agent/skill_scanner.py +416 -0
  42. spidershield-0.3.0/src/spidershield/agent/toxic_flow.py +454 -0
  43. spidershield-0.3.0/src/spidershield/audit/__init__.py +6 -0
  44. spidershield-0.3.0/src/spidershield/audit/logger.py +108 -0
  45. spidershield-0.3.0/src/spidershield/audit/storage.py +143 -0
  46. spidershield-0.3.0/src/spidershield/cli.py +1117 -0
  47. spidershield-0.3.0/src/spidershield/dataset/__init__.py +1 -0
  48. spidershield-0.3.0/src/spidershield/dataset/collector.py +344 -0
  49. spidershield-0.3.0/src/spidershield/dataset/db.py +288 -0
  50. spidershield-0.3.0/src/spidershield/dlp/__init__.py +27 -0
  51. spidershield-0.3.0/src/spidershield/dlp/engine.py +275 -0
  52. spidershield-0.3.0/src/spidershield/dlp/pii.py +193 -0
  53. spidershield-0.3.0/src/spidershield/dlp/prompt_injection.py +180 -0
  54. spidershield-0.3.0/src/spidershield/dlp/secrets.py +227 -0
  55. spidershield-0.3.0/src/spidershield/evaluator/__init__.py +0 -0
  56. spidershield-0.3.0/src/spidershield/evaluator/runner.py +386 -0
  57. spidershield-0.3.0/src/spidershield/guard/__init__.py +22 -0
  58. spidershield-0.3.0/src/spidershield/guard/context.py +22 -0
  59. spidershield-0.3.0/src/spidershield/guard/core.py +113 -0
  60. spidershield-0.3.0/src/spidershield/guard/decision.py +41 -0
  61. spidershield-0.3.0/src/spidershield/guard/policy.py +159 -0
  62. spidershield-0.3.0/src/spidershield/guard/presets/balanced.yaml +73 -0
  63. spidershield-0.3.0/src/spidershield/guard/presets/permissive.yaml +34 -0
  64. spidershield-0.3.0/src/spidershield/guard/presets/strict.yaml +84 -0
  65. spidershield-0.3.0/src/spidershield/hardener/__init__.py +0 -0
  66. spidershield-0.3.0/src/spidershield/hardener/prompt.py +67 -0
  67. spidershield-0.3.0/src/spidershield/hardener/quality_gate.py +150 -0
  68. spidershield-0.3.0/src/spidershield/hardener/runner.py +343 -0
  69. spidershield-0.3.0/src/spidershield/models.py +94 -0
  70. spidershield-0.3.0/src/spidershield/rewriter/__init__.py +0 -0
  71. spidershield-0.3.0/src/spidershield/rewriter/cache.py +53 -0
  72. spidershield-0.3.0/src/spidershield/rewriter/prompt.py +82 -0
  73. spidershield-0.3.0/src/spidershield/rewriter/providers.py +104 -0
  74. spidershield-0.3.0/src/spidershield/rewriter/quality_gate.py +260 -0
  75. spidershield-0.3.0/src/spidershield/rewriter/runner.py +513 -0
  76. spidershield-0.3.0/src/spidershield/scanner/__init__.py +0 -0
  77. spidershield-0.3.0/src/spidershield/scanner/architecture_check.py +229 -0
  78. spidershield-0.3.0/src/spidershield/scanner/description_quality.py +416 -0
  79. spidershield-0.3.0/src/spidershield/scanner/license_check.py +92 -0
  80. spidershield-0.3.0/src/spidershield/scanner/runner.py +278 -0
  81. spidershield-0.3.0/src/spidershield/scanner/security_scan.py +273 -0
  82. spidershield-0.3.0/src/spidershield/server.py +173 -0
  83. spidershield-0.3.0/src/spidershield/spiderrating.py +573 -0
  84. spidershield-0.3.0/src/spidershield/utils/__init__.py +0 -0
  85. spidershield-0.3.0/src/spidershield/utils/jsonrpc.py +98 -0
  86. spidershield-0.3.0/tests/__init__.py +0 -0
  87. spidershield-0.3.0/tests/test_adapters.py +346 -0
  88. spidershield-0.3.0/tests/test_agent_pinning.py +235 -0
  89. spidershield-0.3.0/tests/test_agent_sarif.py +279 -0
  90. spidershield-0.3.0/tests/test_allowlist.py +104 -0
  91. spidershield-0.3.0/tests/test_audit.py +405 -0
  92. spidershield-0.3.0/tests/test_cli.py +95 -0
  93. spidershield-0.3.0/tests/test_cli_integration.py +130 -0
  94. spidershield-0.3.0/tests/test_dataset.py +496 -0
  95. spidershield-0.3.0/tests/test_dlp.py +313 -0
  96. spidershield-0.3.0/tests/test_evaluator_v2.py +104 -0
  97. spidershield-0.3.0/tests/test_firewall.py +135 -0
  98. spidershield-0.3.0/tests/test_golden_descriptions.py +258 -0
  99. spidershield-0.3.0/tests/test_guard_core.py +156 -0
  100. spidershield-0.3.0/tests/test_hardener_v2.py +279 -0
  101. spidershield-0.3.0/tests/test_issue_codes.py +198 -0
  102. spidershield-0.3.0/tests/test_mcp_proxy.py +220 -0
  103. spidershield-0.3.0/tests/test_presets.py +173 -0
  104. spidershield-0.3.0/tests/test_prompt_injection.py +334 -0
  105. spidershield-0.3.0/tests/test_public_api.py +299 -0
  106. spidershield-0.3.0/tests/test_rewrite_cache.py +45 -0
  107. spidershield-0.3.0/tests/test_rewriter.py +145 -0
  108. spidershield-0.3.0/tests/test_rewriter_v2.py +477 -0
  109. spidershield-0.3.0/tests/test_scan_runner.py +102 -0
  110. spidershield-0.3.0/tests/test_scanner.py +251 -0
  111. spidershield-0.3.0/tests/test_spiderrating.py +190 -0
  112. spidershield-0.3.0/tests/test_toxic_flow.py +457 -0
@@ -0,0 +1,16 @@
1
+ .git
2
+ .github
3
+ tests
4
+ docs
5
+ examples
6
+ *.md
7
+ !README.md
8
+ __pycache__
9
+ *.pyc
10
+ .ruff_cache
11
+ .pytest_cache
12
+ test-targets
13
+ tmp_scan
14
+ dist
15
+ build
16
+ *.egg-info
@@ -0,0 +1,47 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [master, main]
6
+ pull_request:
7
+ branches: [master, main]
8
+
9
+ jobs:
10
+ lint:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - name: Set up Python
16
+ uses: actions/setup-python@v5
17
+ with:
18
+ python-version: '3.12'
19
+
20
+ - name: Install ruff
21
+ run: pip install ruff>=0.4
22
+
23
+ - name: Lint
24
+ run: ruff check src/ tests/
25
+
26
+ test:
27
+ runs-on: ubuntu-latest
28
+ strategy:
29
+ matrix:
30
+ python-version: ['3.11', '3.12', '3.13']
31
+
32
+ steps:
33
+ - uses: actions/checkout@v4
34
+
35
+ - name: Set up Python ${{ matrix.python-version }}
36
+ uses: actions/setup-python@v5
37
+ with:
38
+ python-version: ${{ matrix.python-version }}
39
+
40
+ - name: Install dependencies
41
+ run: pip install -e ".[dev]" pytest-cov
42
+
43
+ - name: Run tests with coverage
44
+ run: pytest tests/ -v --cov=spidershield --cov-report=term-missing --cov-fail-under=60
45
+
46
+ - name: Verify CLI
47
+ run: spidershield --version
@@ -0,0 +1,29 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ id-token: write
9
+
10
+ jobs:
11
+ publish:
12
+ runs-on: ubuntu-latest
13
+ environment: pypi
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+
17
+ - name: Set up Python
18
+ uses: actions/setup-python@v5
19
+ with:
20
+ python-version: '3.12'
21
+
22
+ - name: Install build tools
23
+ run: pip install build
24
+
25
+ - name: Build package
26
+ run: python -m build
27
+
28
+ - name: Publish to PyPI
29
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,55 @@
1
+ name: SpiderShield Scan
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ inputs:
6
+ target:
7
+ description: 'MCP server path or GitHub URL to scan'
8
+ required: true
9
+ type: string
10
+ format:
11
+ description: 'Output format'
12
+ required: false
13
+ default: 'table'
14
+ type: choice
15
+ options:
16
+ - table
17
+ - json
18
+
19
+ # Can be called from other repos
20
+ workflow_call:
21
+ inputs:
22
+ target:
23
+ description: 'MCP server path to scan'
24
+ required: true
25
+ type: string
26
+
27
+ jobs:
28
+ scan:
29
+ runs-on: ubuntu-latest
30
+ steps:
31
+ - uses: actions/checkout@v4
32
+
33
+ - name: Set up Python
34
+ uses: actions/setup-python@v5
35
+ with:
36
+ python-version: '3.12'
37
+
38
+ - name: Install SpiderShield
39
+ run: pip install -e .
40
+
41
+ - name: Run scan
42
+ run: |
43
+ spidershield scan "${{ inputs.target }}" --format "${{ inputs.format || 'table' }}"
44
+
45
+ - name: Run scan (JSON artifact)
46
+ if: always()
47
+ run: |
48
+ spidershield scan "${{ inputs.target }}" --format json -o scan-report.json || true
49
+
50
+ - name: Upload report
51
+ if: always()
52
+ uses: actions/upload-artifact@v4
53
+ with:
54
+ name: spidershield-report
55
+ path: scan-report.json
@@ -0,0 +1,66 @@
1
+ name: Weekly MCP Server Scan
2
+
3
+ on:
4
+ schedule:
5
+ - cron: '0 9 * * 1' # Every Monday at 09:00 UTC
6
+ workflow_dispatch:
7
+
8
+ jobs:
9
+ scan:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+
14
+ - name: Set up Python
15
+ uses: actions/setup-python@v5
16
+ with:
17
+ python-version: '3.12'
18
+
19
+ - name: Install SpiderShield
20
+ run: pip install -e .
21
+
22
+ - name: Scan official MCP servers
23
+ run: |
24
+ mkdir -p scan-results
25
+ TARGETS=(
26
+ "https://github.com/modelcontextprotocol/servers"
27
+ "https://github.com/supabase-community/supabase-mcp"
28
+ "https://github.com/neondatabase/mcp-server-neon"
29
+ "https://github.com/korotovsky/slack-mcp-server"
30
+ )
31
+ for target in "${TARGETS[@]}"; do
32
+ name=$(basename "$target")
33
+ echo "Scanning: $name"
34
+ spidershield scan "$target" --format json -o "scan-results/${name}.json" || true
35
+ done
36
+
37
+ - name: Generate summary
38
+ run: |
39
+ python -c "
40
+ import json, glob, sys
41
+ results = []
42
+ for f in sorted(glob.glob('scan-results/*.json')):
43
+ try:
44
+ data = json.load(open(f))
45
+ results.append({
46
+ 'server': data.get('target', f),
47
+ 'overall': data.get('overall_score', 0),
48
+ 'security': data.get('security_score', 0),
49
+ 'descriptions': data.get('description_score', 0),
50
+ 'rating': data.get('rating', '?'),
51
+ 'tools': data.get('tool_count', 0),
52
+ })
53
+ except Exception as e:
54
+ print(f'Error reading {f}: {e}', file=sys.stderr)
55
+ json.dump({'scan_date': '$(date -I)', 'servers': results}, open('scan-results/summary.json', 'w'), indent=2)
56
+ print(f'Scanned {len(results)} servers')
57
+ for r in sorted(results, key=lambda x: -x['overall']):
58
+ print(f\" {r['rating']:3s} {r['overall']:5.1f} {r['server']}\")
59
+ "
60
+
61
+ - name: Upload scan results
62
+ uses: actions/upload-artifact@v4
63
+ with:
64
+ name: weekly-scan-${{ github.run_number }}
65
+ path: scan-results/
66
+ retention-days: 90
@@ -0,0 +1,36 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .eggs/
8
+ *.egg
9
+ .venv/
10
+ venv/
11
+ env/
12
+ .env
13
+ .env.*
14
+ *.log
15
+ .pytest_cache/
16
+ .ruff_cache/
17
+ .mypy_cache/
18
+ .coverage
19
+ htmlcov/
20
+ node_modules/
21
+ .DS_Store
22
+ Thumbs.db
23
+ *.swp
24
+ *.swo
25
+ reports/
26
+
27
+ # Test targets (cloned repos)
28
+ test-targets/
29
+
30
+ # Generated reports
31
+ *.report.json
32
+ fs-report.json
33
+
34
+ # Internal docs (not published)
35
+ docs/internal/
36
+ docs/observations/
@@ -0,0 +1,123 @@
1
+ # SpiderShield -- MCP Server Security Linter
2
+
3
+ ## Project Overview
4
+ SpiderShield is a static analysis tool for MCP (Model Context Protocol) servers.
5
+ It scans tool descriptions, security patterns, architecture quality, and licensing.
6
+ Think "npm audit for MCP tools".
7
+
8
+ ## Architecture
9
+ ```
10
+ src/spidershield/
11
+ cli.py -- Click CLI (scan, rewrite, harden, eval)
12
+ models.py -- Pydantic V2 models (ScanReport, SecurityIssue, etc.)
13
+ server.py -- MCP server mode (scan_mcp_server tool)
14
+ spiderrating.py -- SpiderRating format conversion (metadata, grades)
15
+ scanner/
16
+ runner.py -- Orchestrates 4-stage scan pipeline
17
+ description_quality.py -- Tool description scoring (7 criteria)
18
+ security_scan.py -- Static security pattern matching
19
+ architecture_check.py -- Code quality checks
20
+ license_check.py -- License detection
21
+ agent/
22
+ scanner.py -- Agent config security audit
23
+ skill_scanner.py -- Skill malware/injection pattern matching (20 patterns)
24
+ toxic_flow.py -- Dangerous capability combination detection (keyword + AST)
25
+ pinning.py -- SHA-256 content pinning for rug-pull detection
26
+ allowlist.py -- Approved-only skills enforcement
27
+ sarif.py -- SARIF output for agent findings
28
+ models.py -- Finding, SkillFinding, ScanResult, AuditFramework
29
+ issue_codes.py -- TS-E/W/C/P code registry
30
+ rewriter/
31
+ runner.py -- Template + LLM description rewriter
32
+ cache.py -- SHA-256 keyed LLM rewrite cache
33
+ providers.py -- Anthropic / OpenAI / Gemini providers
34
+ hardener/runner.py -- Security fix suggestions
35
+ evaluator/runner.py -- Tool selection accuracy testing
36
+ ```
37
+
38
+ ## Hard Constraints (G0 -- never violate)
39
+
40
+ 1. **No false sense of security**: Never give A rating to a server with undetected critical issues.
41
+ If uncertain, score conservatively.
42
+ 2. **No destructive modifications**: `rewrite` and `harden` must never break working code.
43
+ Always preserve original semantics.
44
+ 3. **Reproducible results**: Same input must produce identical scan output.
45
+ No randomness, no network-dependent scoring.
46
+
47
+ ## Evolution Mode Protocol
48
+
49
+ SpiderShield uses evidence-driven evolution (see docs/internal/001-audit-quality-evolution-2026-03-08.md).
50
+
51
+ ### Per-Change Cycle
52
+ 1. **Evidence first**: Before changing a scanner, document the false positive/negative that motivates the change
53
+ 2. **Measure before/after**: Run `spidershield scan` on test-targets/ before and after changes
54
+ 3. **Update observation doc**: Record what changed and why in docs/internal/
55
+
56
+ ### Scanner Quality Rules
57
+ - Security scanner: Minimize false positives. A false positive erodes trust more than a missed issue.
58
+ - Description scorer: Score must correlate with actual LLM tool selection success.
59
+ - Architecture checker: Gradual scoring preferred over binary pass/fail.
60
+ - Overall score: SpiderRating formula `descriptions*0.35 + security_adjusted*0.35 + architecture*0.30`.
61
+ - Architecture bonus: `min(3.0, arch_score * 0.3)` folds into security_adjusted.
62
+ - Grade scale: F/D/C/B/A (thresholds: 3.0/5.0/7.0/8.5).
63
+ - Hard constraints: critical→F, no_tools→F, license_banned→D cap.
64
+
65
+ ### Scoring Calibration
66
+ - A server with no quality signals in descriptions should score 0-2/10, not 3-4/10
67
+ - A server with all quality signals should score 8-10/10
68
+ - Security score 10.0 means zero issues found, not "secure" (we can't prove absence)
69
+
70
+ ## PR Campaign Quality Gates (G1 -- must pass before submitting any PR)
71
+
72
+ Before submitting a PR to any external MCP repo, ALL 5 gates must pass:
73
+
74
+ 1. **Real problem**: The original description has a concrete, demonstrable issue
75
+ (ambiguous scope, missing side-effects, misleading trigger). Not "could be better".
76
+ 2. **Narrower and more precise**: The rewrite must be semantically narrower or more
77
+ specific than the original. Never broaden scope.
78
+ 3. **Preserves command semantics**: The rewrite must not change what the tool actually does.
79
+ `git fetch` fetches from git remotes, not URLs. `git add` stages files, not "appends".
80
+ 4. **One-sentence justification**: If you cannot explain the change in one sentence,
81
+ the change is not high-confidence enough for an automated PR.
82
+ 5. **Tests must pass**: If the target repo has CI, the PR must not break it.
83
+ Description-only changes should never break tests.
84
+
85
+ ### PR Strategy by Repo Type
86
+
87
+ | Type | Stars | Action | Example |
88
+ |------|-------|--------|---------|
89
+ | A: Small active | < 1k | Direct PR | HenkDz/postgresql-mcp-server |
90
+ | B: Large/strict | > 1k | Issue first, PR if invited | modelcontextprotocol/servers |
91
+ | C: Corporate | any | Issue only | playwright-mcp, github-mcp |
92
+
93
+ ### PR Anti-Patterns (learned from rejected PRs)
94
+
95
+ - **Tautological triggers**: "Opens a file. Use when user wants to open a file." (rejected by ida-pro-mcp #277)
96
+ - **Semantic mismatches**: git "fetch" matched to URL "fetch", git "add" matched to "append" (flagged on git-mcp-server #42)
97
+ - **Template boilerplate**: Generic error guidance unrelated to the tool's domain
98
+ - **Too many changes**: Large PRs scare maintainers. Keep to 1-5 tool descriptions per PR.
99
+
100
+ ## Development Standards
101
+
102
+ - Python 3.11+ with type hints on all new functions
103
+ - Use Pydantic V2 for data models
104
+ - Rich console for CLI output
105
+ - Tests in tests/ directory
106
+ - No unnecessary dependencies
107
+
108
+ ## Key Files for Common Tasks
109
+
110
+ | Task | Files |
111
+ |------|-------|
112
+ | Add security pattern | scanner/security_scan.py (DANGEROUS_PATTERNS dict) |
113
+ | Add description criterion | scanner/description_quality.py + models.py (ToolDescriptionScore) |
114
+ | Change scoring weights | scanner/description_quality.py (line ~90-108) |
115
+ | Add tool extraction pattern | scanner/description_quality.py (_extract_tools) |
116
+ | Change report output | scanner/runner.py (_print_table) |
117
+ | SpiderRating conversion (MCP) | spiderrating.py (convert, metadata scoring) |
118
+ | SpiderRating conversion (Skill) | spiderrating.py (convert_skill, score_skill_description) |
119
+ | Agent security patterns | agent/skill_scanner.py (MALICIOUS_PATTERNS list) |
120
+ | Agent config checks | agent/scanner.py |
121
+ | Toxic flow detection | agent/toxic_flow.py (keyword + AST) |
122
+ | LLM rewrite cache | rewriter/cache.py (~/.spidershield/rewrite-cache/) |
123
+ | Add CLI command | cli.py |
@@ -0,0 +1,68 @@
1
+ # SpiderShield Curation Report
2
+
3
+ Batch scan of top MCP servers. Generated by `spidershield scan`.
4
+
5
+ ## Summary
6
+
7
+ | Server | Tools | Security | Description | Arch | Overall | Rating | Rewrite Gain |
8
+ |--------|-------|----------|-------------|------|---------|--------|-------------|
9
+ | filesystem (official) | 14 | 10.0 | 3.2 | 10.0 | 7.6 | B | +4.8 |
10
+ | memory (official) | 9 | 10.0 | 2.3 | 10.0 | 7.3 | B | - |
11
+ | git (official) | 12 | 10.0 | 2.4 | 10.0 | 7.3 | B | +6.6 |
12
+ | fetch (official) | 1 | 9.0 | 3.5 | 10.0 | 7.3 | B | - |
13
+ | time (official) | 0 | 10.0 | 5.0 | 10.0 | 8.2 | A | - |
14
+ | everything (python-sdk) | 13 | 10.0 | 2.8 | 7.0 | 6.7 | B | - |
15
+ | supabase (community) | 30 | 9.0 | 2.3 | 8.0 | 6.4 | B | +5.6 |
16
+
17
+ **Totals:** 7 servers, 79 tools scanned
18
+
19
+ ## Key Findings
20
+
21
+ ### 1. Description quality is universally poor
22
+ - Average description score: **3.1/10**
23
+ - 0% of tools have scenario triggers ("Use when...")
24
+ - 0% of tools have parameter examples
25
+ - <5% have error guidance
26
+ - This confirms the Neon finding: tool descriptions are the #1 bottleneck for agent tool selection
27
+
28
+ ### 2. Rewriting delivers massive gains
29
+
30
+ **Template-based (zero cost):**
31
+ - filesystem: 3.7 -> 8.5 (+4.8 points)
32
+ - git: 2.9 -> 9.5 (+6.6 points)
33
+ - supabase: 3.4 -> 9.0 (+5.6 points)
34
+ - Average improvement: +5.7 points
35
+
36
+ **Claude API (higher quality):**
37
+ - filesystem: 3.7 -> 9.6 (+5.9 points)
38
+ - memory: 3.4 -> 9.4 (+6.0 points)
39
+ - git: 2.9 -> 8.6 (+5.7 points)
40
+ - fetch: 3.5 -> 9.6 (+6.1 points)
41
+ - **Average improvement: +5.9 points with Claude API**
42
+
43
+ ### 3. Security is generally solid
44
+ - Average security score: **9.7/10**
45
+ - Official servers score 10.0 (clean)
46
+ - Community servers have minor issues (SSRF in fetch, credential handling)
47
+ - No critical vulnerabilities found in production servers
48
+
49
+ ### 4. Architecture quality varies
50
+ - Official servers: strong (10.0) - tests, error handling, types
51
+ - Community servers: moderate (7-8) - often missing tests
52
+
53
+ ## Top Curation Candidates
54
+
55
+ Servers that would benefit most from SpiderShield curation (high tool count + low description quality):
56
+
57
+ 1. **supabase** - 30 tools, desc 2.3/10, improvement potential +5.6
58
+ 2. **filesystem** - 14 tools, desc 3.2/10, improvement potential +4.8
59
+ 3. **everything** - 13 tools, desc 2.8/10
60
+ 4. **git** - 12 tools, desc 2.4/10, improvement potential +6.6
61
+ 5. **memory** - 9 tools, desc 2.3/10
62
+
63
+ ## Not Yet Scanned (need pattern support)
64
+ - Playwright MCP (compiled JS only, no source TS)
65
+ - GitHub MCP (Go, not yet supported)
66
+ - Context7 (clone failed)
67
+ - AWS MCP servers
68
+ - Desktop Commander
@@ -0,0 +1,18 @@
1
+ FROM python:3.12-slim
2
+
3
+ RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*
4
+
5
+ RUN useradd -m -u 1000 spidershield
6
+
7
+ WORKDIR /app
8
+
9
+ COPY pyproject.toml README.md ./
10
+ COPY src/ src/
11
+
12
+ RUN pip install --no-cache-dir .
13
+
14
+ USER spidershield
15
+
16
+ HEALTHCHECK --interval=30s --timeout=5s CMD spidershield --version || exit 1
17
+
18
+ CMD ["spidershield-server"]
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 TeeShield
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,132 @@
1
+ # The MCP Tool Security Report
2
+
3
+ **We scanned 79 MCP tools across 7 production servers. Here's what we found.**
4
+
5
+ ## What we scanned
6
+
7
+ We ran [SpiderShield](https://github.com/teehooai/spidershield) against the most widely used MCP servers in the ecosystem:
8
+
9
+ | Server | Type | Tools |
10
+ |--------|------|-------|
11
+ | filesystem | Official | 14 |
12
+ | git | Official | 12 |
13
+ | memory | Official | 9 |
14
+ | fetch | Official | 1 |
15
+ | time | Official | 0 |
16
+ | everything | SDK example | 13 |
17
+ | supabase | Community | 30 |
18
+
19
+ Each tool was scored on four dimensions: description quality, security patterns, architecture, and license compliance.
20
+
21
+ ## The headline number
22
+
23
+ **Average description quality: 3.1 out of 10.**
24
+
25
+ This matters because AI agents choose which tool to call based entirely on the description text. A vague description gives the agent no boundaries.
26
+
27
+ ## What's missing from tool descriptions
28
+
29
+ We checked every tool description for five critical attributes:
30
+
31
+ | Attribute | Present | Missing |
32
+ |-----------|---------|---------|
33
+ | Scenario triggers ("Use when...") | 0% | 100% |
34
+ | Parameter examples | 0% | 100% |
35
+ | Error handling guidance | <5% | >95% |
36
+ | Disambiguation vs similar tools | <10% | >90% |
37
+ | Adequate length (>50 chars) | ~40% | ~60% |
38
+
39
+ Not a single tool across all 79 had a scenario trigger. This means agents have zero guidance on *when* to pick one tool over another.
40
+
41
+ ## Why this matters for agents
42
+
43
+ Consider the official Git MCP server. It has 12 tools, including three that show diffs:
44
+
45
+ ```
46
+ git_diff_unstaged: "Shows changes in the working directory that are not yet staged"
47
+ git_diff_staged: "Shows changes that are staged for commit"
48
+ git_diff: "Shows differences between branches or commits"
49
+ ```
50
+
51
+ An agent seeing these descriptions has to guess which one to call. There's no "Use when..." trigger, no examples, no disambiguation. The result: agents frequently call the wrong diff tool, waste context tokens, and produce incorrect results.
52
+
53
+ ## What a good description looks like
54
+
55
+ Here's the same tool after SpiderShield rewrites it:
56
+
57
+ **Before (score 2.9):**
58
+ ```
59
+ "Shows the working tree status"
60
+ ```
61
+
62
+ **After (score 9.6):**
63
+ ```
64
+ "Show the working tree status including modified, staged, and untracked files.
65
+ Use when the user wants to see the current state of the repository before
66
+ committing or staging changes. Unlike git_diff tools that show content changes,
67
+ this only shows which files have changed."
68
+ ```
69
+
70
+ Three things changed:
71
+ 1. **Scenario trigger** -- tells the agent *when* to use this tool
72
+ 2. **What it returns** -- the agent knows what to expect
73
+ 3. **Disambiguation** -- prevents confusion with similar tools
74
+
75
+ ## The rewrite impact
76
+
77
+ We rewrote all 79 tool descriptions using SpiderShield's template engine (zero cost, no API calls):
78
+
79
+ | Server | Before | After | Gain |
80
+ |--------|--------|-------|------|
81
+ | git | 2.9 | 9.5 | +6.6 |
82
+ | supabase | 3.4 | 9.0 | +5.6 |
83
+ | filesystem | 3.7 | 8.5 | +4.8 |
84
+ | **Average** | **3.1** | **8.8** | **+5.7** |
85
+
86
+ Using Claude API for higher-quality rewrites: average gain of **+5.9 points**.
87
+
88
+ ## Security is actually solid
89
+
90
+ Good news: the code-level security of MCP servers is generally strong.
91
+
92
+ | Server | Security Score |
93
+ |--------|---------------|
94
+ | filesystem | 10.0 |
95
+ | git | 10.0 |
96
+ | memory | 10.0 |
97
+ | time | 10.0 |
98
+ | everything | 10.0 |
99
+ | fetch | 9.0 |
100
+ | supabase | 9.0 |
101
+
102
+ The security risk in MCP isn't in the code. It's in the descriptions. A tool with perfect code but a vague description like "access filesystem" is still dangerous -- because the agent doesn't know what boundaries to respect.
103
+
104
+ ## Top findings
105
+
106
+ 1. **Description quality is the #1 security bottleneck.** Not code vulnerabilities. Not architecture. The descriptions.
107
+
108
+ 2. **Zero tools have scenario triggers.** This is the single most impactful improvement. Adding "Use when..." costs nothing and immediately improves tool selection accuracy.
109
+
110
+ 3. **Disambiguation is almost nonexistent.** Servers with similar tools (git has 3 diff tools, filesystem has 3 read tools) provide no guidance on which to choose.
111
+
112
+ 4. **The fix is cheap.** Template-based rewriting is free and delivers +5.7 points on average. No API calls, no runtime overhead, no code changes.
113
+
114
+ ## What server authors should do
115
+
116
+ 1. **Add scenario triggers** to every tool description: "Use when the user wants to..."
117
+ 2. **Add parameter examples** with concrete values, not just type annotations
118
+ 3. **Disambiguate similar tools** explicitly: "Unlike X, this tool..."
119
+ 4. **Declare side effects**: "This tool writes to disk" / "This tool is read-only"
120
+ 5. **Run SpiderShield** before publishing: `pip install spidershield && spidershield scan .`
121
+
122
+ ## Methodology
123
+
124
+ - Scanner: [SpiderShield v0.1.0](https://pypi.org/project/spidershield/)
125
+ - Description scoring: 5 criteria weighted to 10-point scale (scenario triggers 3.0, disambiguation 2.0, parameter examples 2.0, error guidance 1.5, length 1.5)
126
+ - Security scanning: pattern-based detection for path traversal, command injection, SSRF, credential exposure
127
+ - All scans are static analysis -- no runtime execution, no network calls
128
+ - Raw data: [batch-scan-results.json](batch-scan-results.json)
129
+
130
+ ---
131
+
132
+ *This report was generated by [SpiderShield](https://github.com/teehooai/spidershield), a static security linter for MCP tools. Install: `pip install spidershield`*