spidershield 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spidershield-0.3.0/.dockerignore +16 -0
- spidershield-0.3.0/.github/workflows/ci.yml +47 -0
- spidershield-0.3.0/.github/workflows/publish.yml +29 -0
- spidershield-0.3.0/.github/workflows/scan.yml +55 -0
- spidershield-0.3.0/.github/workflows/weekly-scan.yml +66 -0
- spidershield-0.3.0/.gitignore +36 -0
- spidershield-0.3.0/CLAUDE.md +123 -0
- spidershield-0.3.0/CURATION-REPORT.md +68 -0
- spidershield-0.3.0/Dockerfile +18 -0
- spidershield-0.3.0/LICENSE +21 -0
- spidershield-0.3.0/MCP-SECURITY-REPORT.md +132 -0
- spidershield-0.3.0/PKG-INFO +289 -0
- spidershield-0.3.0/README.md +258 -0
- spidershield-0.3.0/ROADMAP.md +39 -0
- spidershield-0.3.0/action.yml +65 -0
- spidershield-0.3.0/batch-rewrites/rewrite-fetch.json +7 -0
- spidershield-0.3.0/batch-rewrites/rewrite-git.json +62 -0
- spidershield-0.3.0/batch-rewrites/rewrite-memory.json +47 -0
- spidershield-0.3.0/batch-scan-results.json +970 -0
- spidershield-0.3.0/examples/insecure-server/server.py +35 -0
- spidershield-0.3.0/examples/secure-server/server.py +46 -0
- spidershield-0.3.0/pyproject.toml +58 -0
- spidershield-0.3.0/rewrite-llm-filesystem.json +72 -0
- spidershield-0.3.0/rewrites-filesystem.json +72 -0
- spidershield-0.3.0/scripts/teeshield_to_spiderrating.py +103 -0
- spidershield-0.3.0/src/spidershield/__init__.py +166 -0
- spidershield-0.3.0/src/spidershield/__main__.py +5 -0
- spidershield-0.3.0/src/spidershield/adapters/__init__.py +17 -0
- spidershield-0.3.0/src/spidershield/adapters/base.py +144 -0
- spidershield-0.3.0/src/spidershield/adapters/mcp_proxy.py +221 -0
- spidershield-0.3.0/src/spidershield/adapters/standalone.py +203 -0
- spidershield-0.3.0/src/spidershield/agent/__init__.py +23 -0
- spidershield-0.3.0/src/spidershield/agent/allowlist.py +56 -0
- spidershield-0.3.0/src/spidershield/agent/fixer.py +263 -0
- spidershield-0.3.0/src/spidershield/agent/issue_codes.py +268 -0
- spidershield-0.3.0/src/spidershield/agent/models.py +115 -0
- spidershield-0.3.0/src/spidershield/agent/pinning.py +247 -0
- spidershield-0.3.0/src/spidershield/agent/report.py +173 -0
- spidershield-0.3.0/src/spidershield/agent/sarif.py +248 -0
- spidershield-0.3.0/src/spidershield/agent/scanner.py +408 -0
- spidershield-0.3.0/src/spidershield/agent/skill_scanner.py +416 -0
- spidershield-0.3.0/src/spidershield/agent/toxic_flow.py +454 -0
- spidershield-0.3.0/src/spidershield/audit/__init__.py +6 -0
- spidershield-0.3.0/src/spidershield/audit/logger.py +108 -0
- spidershield-0.3.0/src/spidershield/audit/storage.py +143 -0
- spidershield-0.3.0/src/spidershield/cli.py +1117 -0
- spidershield-0.3.0/src/spidershield/dataset/__init__.py +1 -0
- spidershield-0.3.0/src/spidershield/dataset/collector.py +344 -0
- spidershield-0.3.0/src/spidershield/dataset/db.py +288 -0
- spidershield-0.3.0/src/spidershield/dlp/__init__.py +27 -0
- spidershield-0.3.0/src/spidershield/dlp/engine.py +275 -0
- spidershield-0.3.0/src/spidershield/dlp/pii.py +193 -0
- spidershield-0.3.0/src/spidershield/dlp/prompt_injection.py +180 -0
- spidershield-0.3.0/src/spidershield/dlp/secrets.py +227 -0
- spidershield-0.3.0/src/spidershield/evaluator/__init__.py +0 -0
- spidershield-0.3.0/src/spidershield/evaluator/runner.py +386 -0
- spidershield-0.3.0/src/spidershield/guard/__init__.py +22 -0
- spidershield-0.3.0/src/spidershield/guard/context.py +22 -0
- spidershield-0.3.0/src/spidershield/guard/core.py +113 -0
- spidershield-0.3.0/src/spidershield/guard/decision.py +41 -0
- spidershield-0.3.0/src/spidershield/guard/policy.py +159 -0
- spidershield-0.3.0/src/spidershield/guard/presets/balanced.yaml +73 -0
- spidershield-0.3.0/src/spidershield/guard/presets/permissive.yaml +34 -0
- spidershield-0.3.0/src/spidershield/guard/presets/strict.yaml +84 -0
- spidershield-0.3.0/src/spidershield/hardener/__init__.py +0 -0
- spidershield-0.3.0/src/spidershield/hardener/prompt.py +67 -0
- spidershield-0.3.0/src/spidershield/hardener/quality_gate.py +150 -0
- spidershield-0.3.0/src/spidershield/hardener/runner.py +343 -0
- spidershield-0.3.0/src/spidershield/models.py +94 -0
- spidershield-0.3.0/src/spidershield/rewriter/__init__.py +0 -0
- spidershield-0.3.0/src/spidershield/rewriter/cache.py +53 -0
- spidershield-0.3.0/src/spidershield/rewriter/prompt.py +82 -0
- spidershield-0.3.0/src/spidershield/rewriter/providers.py +104 -0
- spidershield-0.3.0/src/spidershield/rewriter/quality_gate.py +260 -0
- spidershield-0.3.0/src/spidershield/rewriter/runner.py +513 -0
- spidershield-0.3.0/src/spidershield/scanner/__init__.py +0 -0
- spidershield-0.3.0/src/spidershield/scanner/architecture_check.py +229 -0
- spidershield-0.3.0/src/spidershield/scanner/description_quality.py +416 -0
- spidershield-0.3.0/src/spidershield/scanner/license_check.py +92 -0
- spidershield-0.3.0/src/spidershield/scanner/runner.py +278 -0
- spidershield-0.3.0/src/spidershield/scanner/security_scan.py +273 -0
- spidershield-0.3.0/src/spidershield/server.py +173 -0
- spidershield-0.3.0/src/spidershield/spiderrating.py +573 -0
- spidershield-0.3.0/src/spidershield/utils/__init__.py +0 -0
- spidershield-0.3.0/src/spidershield/utils/jsonrpc.py +98 -0
- spidershield-0.3.0/tests/__init__.py +0 -0
- spidershield-0.3.0/tests/test_adapters.py +346 -0
- spidershield-0.3.0/tests/test_agent_pinning.py +235 -0
- spidershield-0.3.0/tests/test_agent_sarif.py +279 -0
- spidershield-0.3.0/tests/test_allowlist.py +104 -0
- spidershield-0.3.0/tests/test_audit.py +405 -0
- spidershield-0.3.0/tests/test_cli.py +95 -0
- spidershield-0.3.0/tests/test_cli_integration.py +130 -0
- spidershield-0.3.0/tests/test_dataset.py +496 -0
- spidershield-0.3.0/tests/test_dlp.py +313 -0
- spidershield-0.3.0/tests/test_evaluator_v2.py +104 -0
- spidershield-0.3.0/tests/test_firewall.py +135 -0
- spidershield-0.3.0/tests/test_golden_descriptions.py +258 -0
- spidershield-0.3.0/tests/test_guard_core.py +156 -0
- spidershield-0.3.0/tests/test_hardener_v2.py +279 -0
- spidershield-0.3.0/tests/test_issue_codes.py +198 -0
- spidershield-0.3.0/tests/test_mcp_proxy.py +220 -0
- spidershield-0.3.0/tests/test_presets.py +173 -0
- spidershield-0.3.0/tests/test_prompt_injection.py +334 -0
- spidershield-0.3.0/tests/test_public_api.py +299 -0
- spidershield-0.3.0/tests/test_rewrite_cache.py +45 -0
- spidershield-0.3.0/tests/test_rewriter.py +145 -0
- spidershield-0.3.0/tests/test_rewriter_v2.py +477 -0
- spidershield-0.3.0/tests/test_scan_runner.py +102 -0
- spidershield-0.3.0/tests/test_scanner.py +251 -0
- spidershield-0.3.0/tests/test_spiderrating.py +190 -0
- spidershield-0.3.0/tests/test_toxic_flow.py +457 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [master, main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [master, main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
|
|
15
|
+
- name: Set up Python
|
|
16
|
+
uses: actions/setup-python@v5
|
|
17
|
+
with:
|
|
18
|
+
python-version: '3.12'
|
|
19
|
+
|
|
20
|
+
- name: Install ruff
|
|
21
|
+
run: pip install ruff>=0.4
|
|
22
|
+
|
|
23
|
+
- name: Lint
|
|
24
|
+
run: ruff check src/ tests/
|
|
25
|
+
|
|
26
|
+
test:
|
|
27
|
+
runs-on: ubuntu-latest
|
|
28
|
+
strategy:
|
|
29
|
+
matrix:
|
|
30
|
+
python-version: ['3.11', '3.12', '3.13']
|
|
31
|
+
|
|
32
|
+
steps:
|
|
33
|
+
- uses: actions/checkout@v4
|
|
34
|
+
|
|
35
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
36
|
+
uses: actions/setup-python@v5
|
|
37
|
+
with:
|
|
38
|
+
python-version: ${{ matrix.python-version }}
|
|
39
|
+
|
|
40
|
+
- name: Install dependencies
|
|
41
|
+
run: pip install -e ".[dev]" pytest-cov
|
|
42
|
+
|
|
43
|
+
- name: Run tests with coverage
|
|
44
|
+
run: pytest tests/ -v --cov=spidershield --cov-report=term-missing --cov-fail-under=60
|
|
45
|
+
|
|
46
|
+
- name: Verify CLI
|
|
47
|
+
run: spidershield --version
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
id-token: write
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
publish:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
environment: pypi
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- name: Set up Python
|
|
18
|
+
uses: actions/setup-python@v5
|
|
19
|
+
with:
|
|
20
|
+
python-version: '3.12'
|
|
21
|
+
|
|
22
|
+
- name: Install build tools
|
|
23
|
+
run: pip install build
|
|
24
|
+
|
|
25
|
+
- name: Build package
|
|
26
|
+
run: python -m build
|
|
27
|
+
|
|
28
|
+
- name: Publish to PyPI
|
|
29
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
name: SpiderShield Scan
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
inputs:
|
|
6
|
+
target:
|
|
7
|
+
description: 'MCP server path or GitHub URL to scan'
|
|
8
|
+
required: true
|
|
9
|
+
type: string
|
|
10
|
+
format:
|
|
11
|
+
description: 'Output format'
|
|
12
|
+
required: false
|
|
13
|
+
default: 'table'
|
|
14
|
+
type: choice
|
|
15
|
+
options:
|
|
16
|
+
- table
|
|
17
|
+
- json
|
|
18
|
+
|
|
19
|
+
# Can be called from other repos
|
|
20
|
+
workflow_call:
|
|
21
|
+
inputs:
|
|
22
|
+
target:
|
|
23
|
+
description: 'MCP server path to scan'
|
|
24
|
+
required: true
|
|
25
|
+
type: string
|
|
26
|
+
|
|
27
|
+
jobs:
|
|
28
|
+
scan:
|
|
29
|
+
runs-on: ubuntu-latest
|
|
30
|
+
steps:
|
|
31
|
+
- uses: actions/checkout@v4
|
|
32
|
+
|
|
33
|
+
- name: Set up Python
|
|
34
|
+
uses: actions/setup-python@v5
|
|
35
|
+
with:
|
|
36
|
+
python-version: '3.12'
|
|
37
|
+
|
|
38
|
+
- name: Install SpiderShield
|
|
39
|
+
run: pip install -e .
|
|
40
|
+
|
|
41
|
+
- name: Run scan
|
|
42
|
+
run: |
|
|
43
|
+
spidershield scan "${{ inputs.target }}" --format "${{ inputs.format || 'table' }}"
|
|
44
|
+
|
|
45
|
+
- name: Run scan (JSON artifact)
|
|
46
|
+
if: always()
|
|
47
|
+
run: |
|
|
48
|
+
spidershield scan "${{ inputs.target }}" --format json -o scan-report.json || true
|
|
49
|
+
|
|
50
|
+
- name: Upload report
|
|
51
|
+
if: always()
|
|
52
|
+
uses: actions/upload-artifact@v4
|
|
53
|
+
with:
|
|
54
|
+
name: spidershield-report
|
|
55
|
+
path: scan-report.json
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
name: Weekly MCP Server Scan
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
schedule:
|
|
5
|
+
- cron: '0 9 * * 1' # Every Monday at 09:00 UTC
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
scan:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
|
|
14
|
+
- name: Set up Python
|
|
15
|
+
uses: actions/setup-python@v5
|
|
16
|
+
with:
|
|
17
|
+
python-version: '3.12'
|
|
18
|
+
|
|
19
|
+
- name: Install SpiderShield
|
|
20
|
+
run: pip install -e .
|
|
21
|
+
|
|
22
|
+
- name: Scan official MCP servers
|
|
23
|
+
run: |
|
|
24
|
+
mkdir -p scan-results
|
|
25
|
+
TARGETS=(
|
|
26
|
+
"https://github.com/modelcontextprotocol/servers"
|
|
27
|
+
"https://github.com/supabase-community/supabase-mcp"
|
|
28
|
+
"https://github.com/neondatabase/mcp-server-neon"
|
|
29
|
+
"https://github.com/korotovsky/slack-mcp-server"
|
|
30
|
+
)
|
|
31
|
+
for target in "${TARGETS[@]}"; do
|
|
32
|
+
name=$(basename "$target")
|
|
33
|
+
echo "Scanning: $name"
|
|
34
|
+
spidershield scan "$target" --format json -o "scan-results/${name}.json" || true
|
|
35
|
+
done
|
|
36
|
+
|
|
37
|
+
- name: Generate summary
|
|
38
|
+
run: |
|
|
39
|
+
python -c "
|
|
40
|
+
import json, glob, sys
|
|
41
|
+
results = []
|
|
42
|
+
for f in sorted(glob.glob('scan-results/*.json')):
|
|
43
|
+
try:
|
|
44
|
+
data = json.load(open(f))
|
|
45
|
+
results.append({
|
|
46
|
+
'server': data.get('target', f),
|
|
47
|
+
'overall': data.get('overall_score', 0),
|
|
48
|
+
'security': data.get('security_score', 0),
|
|
49
|
+
'descriptions': data.get('description_score', 0),
|
|
50
|
+
'rating': data.get('rating', '?'),
|
|
51
|
+
'tools': data.get('tool_count', 0),
|
|
52
|
+
})
|
|
53
|
+
except Exception as e:
|
|
54
|
+
print(f'Error reading {f}: {e}', file=sys.stderr)
|
|
55
|
+
json.dump({'scan_date': '$(date -I)', 'servers': results}, open('scan-results/summary.json', 'w'), indent=2)
|
|
56
|
+
print(f'Scanned {len(results)} servers')
|
|
57
|
+
for r in sorted(results, key=lambda x: -x['overall']):
|
|
58
|
+
print(f\" {r['rating']:3s} {r['overall']:5.1f} {r['server']}\")
|
|
59
|
+
"
|
|
60
|
+
|
|
61
|
+
- name: Upload scan results
|
|
62
|
+
uses: actions/upload-artifact@v4
|
|
63
|
+
with:
|
|
64
|
+
name: weekly-scan-${{ github.run_number }}
|
|
65
|
+
path: scan-results/
|
|
66
|
+
retention-days: 90
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
__pycache__/
|
|
2
|
+
*.py[cod]
|
|
3
|
+
*$py.class
|
|
4
|
+
*.egg-info/
|
|
5
|
+
dist/
|
|
6
|
+
build/
|
|
7
|
+
.eggs/
|
|
8
|
+
*.egg
|
|
9
|
+
.venv/
|
|
10
|
+
venv/
|
|
11
|
+
env/
|
|
12
|
+
.env
|
|
13
|
+
.env.*
|
|
14
|
+
*.log
|
|
15
|
+
.pytest_cache/
|
|
16
|
+
.ruff_cache/
|
|
17
|
+
.mypy_cache/
|
|
18
|
+
.coverage
|
|
19
|
+
htmlcov/
|
|
20
|
+
node_modules/
|
|
21
|
+
.DS_Store
|
|
22
|
+
Thumbs.db
|
|
23
|
+
*.swp
|
|
24
|
+
*.swo
|
|
25
|
+
reports/
|
|
26
|
+
|
|
27
|
+
# Test targets (cloned repos)
|
|
28
|
+
test-targets/
|
|
29
|
+
|
|
30
|
+
# Generated reports
|
|
31
|
+
*.report.json
|
|
32
|
+
fs-report.json
|
|
33
|
+
|
|
34
|
+
# Internal docs (not published)
|
|
35
|
+
docs/internal/
|
|
36
|
+
docs/observations/
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# SpiderShield -- MCP Server Security Linter
|
|
2
|
+
|
|
3
|
+
## Project Overview
|
|
4
|
+
SpiderShield is a static analysis tool for MCP (Model Context Protocol) servers.
|
|
5
|
+
It scans tool descriptions, security patterns, architecture quality, and licensing.
|
|
6
|
+
Think "npm audit for MCP tools".
|
|
7
|
+
|
|
8
|
+
## Architecture
|
|
9
|
+
```
|
|
10
|
+
src/spidershield/
|
|
11
|
+
cli.py -- Click CLI (scan, rewrite, harden, eval)
|
|
12
|
+
models.py -- Pydantic V2 models (ScanReport, SecurityIssue, etc.)
|
|
13
|
+
server.py -- MCP server mode (scan_mcp_server tool)
|
|
14
|
+
spiderrating.py -- SpiderRating format conversion (metadata, grades)
|
|
15
|
+
scanner/
|
|
16
|
+
runner.py -- Orchestrates 4-stage scan pipeline
|
|
17
|
+
description_quality.py -- Tool description scoring (7 criteria)
|
|
18
|
+
security_scan.py -- Static security pattern matching
|
|
19
|
+
architecture_check.py -- Code quality checks
|
|
20
|
+
license_check.py -- License detection
|
|
21
|
+
agent/
|
|
22
|
+
scanner.py -- Agent config security audit
|
|
23
|
+
skill_scanner.py -- Skill malware/injection pattern matching (20 patterns)
|
|
24
|
+
toxic_flow.py -- Dangerous capability combination detection (keyword + AST)
|
|
25
|
+
pinning.py -- SHA-256 content pinning for rug-pull detection
|
|
26
|
+
allowlist.py -- Approved-only skills enforcement
|
|
27
|
+
sarif.py -- SARIF output for agent findings
|
|
28
|
+
models.py -- Finding, SkillFinding, ScanResult, AuditFramework
|
|
29
|
+
issue_codes.py -- TS-E/W/C/P code registry
|
|
30
|
+
rewriter/
|
|
31
|
+
runner.py -- Template + LLM description rewriter
|
|
32
|
+
cache.py -- SHA-256 keyed LLM rewrite cache
|
|
33
|
+
providers.py -- Anthropic / OpenAI / Gemini providers
|
|
34
|
+
hardener/runner.py -- Security fix suggestions
|
|
35
|
+
evaluator/runner.py -- Tool selection accuracy testing
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Hard Constraints (G0 -- never violate)
|
|
39
|
+
|
|
40
|
+
1. **No false sense of security**: Never give A rating to a server with undetected critical issues.
|
|
41
|
+
If uncertain, score conservatively.
|
|
42
|
+
2. **No destructive modifications**: `rewrite` and `harden` must never break working code.
|
|
43
|
+
Always preserve original semantics.
|
|
44
|
+
3. **Reproducible results**: Same input must produce identical scan output.
|
|
45
|
+
No randomness, no network-dependent scoring.
|
|
46
|
+
|
|
47
|
+
## Evolution Mode Protocol
|
|
48
|
+
|
|
49
|
+
SpiderShield uses evidence-driven evolution (see docs/internal/001-audit-quality-evolution-2026-03-08.md).
|
|
50
|
+
|
|
51
|
+
### Per-Change Cycle
|
|
52
|
+
1. **Evidence first**: Before changing a scanner, document the false positive/negative that motivates the change
|
|
53
|
+
2. **Measure before/after**: Run `spidershield scan` on test-targets/ before and after changes
|
|
54
|
+
3. **Update observation doc**: Record what changed and why in docs/internal/
|
|
55
|
+
|
|
56
|
+
### Scanner Quality Rules
|
|
57
|
+
- Security scanner: Minimize false positives. A false positive erodes trust more than a missed issue.
|
|
58
|
+
- Description scorer: Score must correlate with actual LLM tool selection success.
|
|
59
|
+
- Architecture checker: Gradual scoring preferred over binary pass/fail.
|
|
60
|
+
- Overall score: SpiderRating formula `descriptions*0.35 + security_adjusted*0.35 + architecture*0.30`.
|
|
61
|
+
- Architecture bonus: `min(3.0, arch_score * 0.3)` folds into security_adjusted.
|
|
62
|
+
- Grade scale: F/D/C/B/A (thresholds: 3.0/5.0/7.0/8.5).
|
|
63
|
+
- Hard constraints: critical→F, no_tools→F, license_banned→D cap.
|
|
64
|
+
|
|
65
|
+
### Scoring Calibration
|
|
66
|
+
- A server with no quality signals in descriptions should score 0-2/10, not 3-4/10
|
|
67
|
+
- A server with all quality signals should score 8-10/10
|
|
68
|
+
- Security score 10.0 means zero issues found, not "secure" (we can't prove absence)
|
|
69
|
+
|
|
70
|
+
## PR Campaign Quality Gates (G1 -- must pass before submitting any PR)
|
|
71
|
+
|
|
72
|
+
Before submitting a PR to any external MCP repo, ALL 5 gates must pass:
|
|
73
|
+
|
|
74
|
+
1. **Real problem**: The original description has a concrete, demonstrable issue
|
|
75
|
+
(ambiguous scope, missing side-effects, misleading trigger). Not "could be better".
|
|
76
|
+
2. **Narrower and more precise**: The rewrite must be semantically narrower or more
|
|
77
|
+
specific than the original. Never broaden scope.
|
|
78
|
+
3. **Preserves command semantics**: The rewrite must not change what the tool actually does.
|
|
79
|
+
`git fetch` fetches from git remotes, not URLs. `git add` stages files, not "appends".
|
|
80
|
+
4. **One-sentence justification**: If you cannot explain the change in one sentence,
|
|
81
|
+
the change is not high-confidence enough for an automated PR.
|
|
82
|
+
5. **Tests must pass**: If the target repo has CI, the PR must not break it.
|
|
83
|
+
Description-only changes should never break tests.
|
|
84
|
+
|
|
85
|
+
### PR Strategy by Repo Type
|
|
86
|
+
|
|
87
|
+
| Type | Stars | Action | Example |
|
|
88
|
+
|------|-------|--------|---------|
|
|
89
|
+
| A: Small active | < 1k | Direct PR | HenkDz/postgresql-mcp-server |
|
|
90
|
+
| B: Large/strict | > 1k | Issue first, PR if invited | modelcontextprotocol/servers |
|
|
91
|
+
| C: Corporate | any | Issue only | playwright-mcp, github-mcp |
|
|
92
|
+
|
|
93
|
+
### PR Anti-Patterns (learned from rejected PRs)
|
|
94
|
+
|
|
95
|
+
- **Tautological triggers**: "Opens a file. Use when user wants to open a file." (rejected by ida-pro-mcp #277)
|
|
96
|
+
- **Semantic mismatches**: git "fetch" matched to URL "fetch", git "add" matched to "append" (flagged on git-mcp-server #42)
|
|
97
|
+
- **Template boilerplate**: Generic error guidance unrelated to the tool's domain
|
|
98
|
+
- **Too many changes**: Large PRs scare maintainers. Keep to 1-5 tool descriptions per PR.
|
|
99
|
+
|
|
100
|
+
## Development Standards
|
|
101
|
+
|
|
102
|
+
- Python 3.11+ with type hints on all new functions
|
|
103
|
+
- Use Pydantic V2 for data models
|
|
104
|
+
- Rich console for CLI output
|
|
105
|
+
- Tests in tests/ directory
|
|
106
|
+
- No unnecessary dependencies
|
|
107
|
+
|
|
108
|
+
## Key Files for Common Tasks
|
|
109
|
+
|
|
110
|
+
| Task | Files |
|
|
111
|
+
|------|-------|
|
|
112
|
+
| Add security pattern | scanner/security_scan.py (DANGEROUS_PATTERNS dict) |
|
|
113
|
+
| Add description criterion | scanner/description_quality.py + models.py (ToolDescriptionScore) |
|
|
114
|
+
| Change scoring weights | scanner/description_quality.py (line ~90-108) |
|
|
115
|
+
| Add tool extraction pattern | scanner/description_quality.py (_extract_tools) |
|
|
116
|
+
| Change report output | scanner/runner.py (_print_table) |
|
|
117
|
+
| SpiderRating conversion (MCP) | spiderrating.py (convert, metadata scoring) |
|
|
118
|
+
| SpiderRating conversion (Skill) | spiderrating.py (convert_skill, score_skill_description) |
|
|
119
|
+
| Agent security patterns | agent/skill_scanner.py (MALICIOUS_PATTERNS list) |
|
|
120
|
+
| Agent config checks | agent/scanner.py |
|
|
121
|
+
| Toxic flow detection | agent/toxic_flow.py (keyword + AST) |
|
|
122
|
+
| LLM rewrite cache | rewriter/cache.py (~/.spidershield/rewrite-cache/) |
|
|
123
|
+
| Add CLI command | cli.py |
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# SpiderShield Curation Report
|
|
2
|
+
|
|
3
|
+
Batch scan of top MCP servers. Generated by `spidershield scan`.
|
|
4
|
+
|
|
5
|
+
## Summary
|
|
6
|
+
|
|
7
|
+
| Server | Tools | Security | Description | Arch | Overall | Rating | Rewrite Gain |
|
|
8
|
+
|--------|-------|----------|-------------|------|---------|--------|-------------|
|
|
9
|
+
| filesystem (official) | 14 | 10.0 | 3.2 | 10.0 | 7.6 | B | +4.8 |
|
|
10
|
+
| memory (official) | 9 | 10.0 | 2.3 | 10.0 | 7.3 | B | - |
|
|
11
|
+
| git (official) | 12 | 10.0 | 2.4 | 10.0 | 7.3 | B | +6.6 |
|
|
12
|
+
| fetch (official) | 1 | 9.0 | 3.5 | 10.0 | 7.3 | B | - |
|
|
13
|
+
| time (official) | 0 | 10.0 | 5.0 | 10.0 | 8.2 | A | - |
|
|
14
|
+
| everything (python-sdk) | 13 | 10.0 | 2.8 | 7.0 | 6.7 | B | - |
|
|
15
|
+
| supabase (community) | 30 | 9.0 | 2.3 | 8.0 | 6.4 | B | +5.6 |
|
|
16
|
+
|
|
17
|
+
**Totals:** 7 servers, 79 tools scanned
|
|
18
|
+
|
|
19
|
+
## Key Findings
|
|
20
|
+
|
|
21
|
+
### 1. Description quality is universally poor
|
|
22
|
+
- Average description score: **3.1/10**
|
|
23
|
+
- 0% of tools have scenario triggers ("Use when...")
|
|
24
|
+
- 0% of tools have parameter examples
|
|
25
|
+
- <5% have error guidance
|
|
26
|
+
- This confirms the Neon finding: tool descriptions are the #1 bottleneck for agent tool selection
|
|
27
|
+
|
|
28
|
+
### 2. Rewriting delivers massive gains
|
|
29
|
+
|
|
30
|
+
**Template-based (zero cost):**
|
|
31
|
+
- filesystem: 3.7 -> 8.5 (+4.8 points)
|
|
32
|
+
- git: 2.9 -> 9.5 (+6.6 points)
|
|
33
|
+
- supabase: 3.4 -> 9.0 (+5.6 points)
|
|
34
|
+
- Average improvement: +5.7 points
|
|
35
|
+
|
|
36
|
+
**Claude API (higher quality):**
|
|
37
|
+
- filesystem: 3.7 -> 9.6 (+5.9 points)
|
|
38
|
+
- memory: 3.4 -> 9.4 (+6.0 points)
|
|
39
|
+
- git: 2.9 -> 8.6 (+5.7 points)
|
|
40
|
+
- fetch: 3.5 -> 9.6 (+6.1 points)
|
|
41
|
+
- **Average improvement: +5.9 points with Claude API**
|
|
42
|
+
|
|
43
|
+
### 3. Security is generally solid
|
|
44
|
+
- Average security score: **9.7/10**
|
|
45
|
+
- Official servers score 10.0 (clean)
|
|
46
|
+
- Community servers have minor issues (SSRF in fetch, credential handling)
|
|
47
|
+
- No critical vulnerabilities found in production servers
|
|
48
|
+
|
|
49
|
+
### 4. Architecture quality varies
|
|
50
|
+
- Official servers: strong (10.0) - tests, error handling, types
|
|
51
|
+
- Community servers: moderate (7-8) - often missing tests
|
|
52
|
+
|
|
53
|
+
## Top Curation Candidates
|
|
54
|
+
|
|
55
|
+
Servers that would benefit most from SpiderShield curation (high tool count + low description quality):
|
|
56
|
+
|
|
57
|
+
1. **supabase** - 30 tools, desc 2.3/10, improvement potential +5.6
|
|
58
|
+
2. **filesystem** - 14 tools, desc 3.2/10, improvement potential +4.8
|
|
59
|
+
3. **everything** - 13 tools, desc 2.8/10
|
|
60
|
+
4. **git** - 12 tools, desc 2.4/10, improvement potential +6.6
|
|
61
|
+
5. **memory** - 9 tools, desc 2.3/10
|
|
62
|
+
|
|
63
|
+
## Not Yet Scanned (need pattern support)
|
|
64
|
+
- Playwright MCP (compiled JS only, no source TS)
|
|
65
|
+
- GitHub MCP (Go, not yet supported)
|
|
66
|
+
- Context7 (clone failed)
|
|
67
|
+
- AWS MCP servers
|
|
68
|
+
- Desktop Commander
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
FROM python:3.12-slim
|
|
2
|
+
|
|
3
|
+
RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*
|
|
4
|
+
|
|
5
|
+
RUN useradd -m -u 1000 spidershield
|
|
6
|
+
|
|
7
|
+
WORKDIR /app
|
|
8
|
+
|
|
9
|
+
COPY pyproject.toml README.md ./
|
|
10
|
+
COPY src/ src/
|
|
11
|
+
|
|
12
|
+
RUN pip install --no-cache-dir .
|
|
13
|
+
|
|
14
|
+
USER spidershield
|
|
15
|
+
|
|
16
|
+
HEALTHCHECK --interval=30s --timeout=5s CMD spidershield --version || exit 1
|
|
17
|
+
|
|
18
|
+
CMD ["spidershield-server"]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 TeeShield
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# The MCP Tool Security Report
|
|
2
|
+
|
|
3
|
+
**We scanned 79 MCP tools across 7 production servers. Here's what we found.**
|
|
4
|
+
|
|
5
|
+
## What we scanned
|
|
6
|
+
|
|
7
|
+
We ran [SpiderShield](https://github.com/teehooai/spidershield) against the most widely used MCP servers in the ecosystem:
|
|
8
|
+
|
|
9
|
+
| Server | Type | Tools |
|
|
10
|
+
|--------|------|-------|
|
|
11
|
+
| filesystem | Official | 14 |
|
|
12
|
+
| git | Official | 12 |
|
|
13
|
+
| memory | Official | 9 |
|
|
14
|
+
| fetch | Official | 1 |
|
|
15
|
+
| time | Official | 0 |
|
|
16
|
+
| everything | SDK example | 13 |
|
|
17
|
+
| supabase | Community | 30 |
|
|
18
|
+
|
|
19
|
+
Each tool was scored on four dimensions: description quality, security patterns, architecture, and license compliance.
|
|
20
|
+
|
|
21
|
+
## The headline number
|
|
22
|
+
|
|
23
|
+
**Average description quality: 3.1 out of 10.**
|
|
24
|
+
|
|
25
|
+
This matters because AI agents choose which tool to call based entirely on the description text. A vague description gives the agent no boundaries.
|
|
26
|
+
|
|
27
|
+
## What's missing from tool descriptions
|
|
28
|
+
|
|
29
|
+
We checked every tool description for five critical attributes:
|
|
30
|
+
|
|
31
|
+
| Attribute | Present | Missing |
|
|
32
|
+
|-----------|---------|---------|
|
|
33
|
+
| Scenario triggers ("Use when...") | 0% | 100% |
|
|
34
|
+
| Parameter examples | 0% | 100% |
|
|
35
|
+
| Error handling guidance | <5% | >95% |
|
|
36
|
+
| Disambiguation vs similar tools | <10% | >90% |
|
|
37
|
+
| Adequate length (>50 chars) | ~40% | ~60% |
|
|
38
|
+
|
|
39
|
+
Not a single tool across all 79 had a scenario trigger. This means agents have zero guidance on *when* to pick one tool over another.
|
|
40
|
+
|
|
41
|
+
## Why this matters for agents
|
|
42
|
+
|
|
43
|
+
Consider the official Git MCP server. It has 12 tools, including three that show diffs:
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
git_diff_unstaged: "Shows changes in the working directory that are not yet staged"
|
|
47
|
+
git_diff_staged: "Shows changes that are staged for commit"
|
|
48
|
+
git_diff: "Shows differences between branches or commits"
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
An agent seeing these descriptions has to guess which one to call. There's no "Use when..." trigger, no examples, no disambiguation. The result: agents frequently call the wrong diff tool, waste context tokens, and produce incorrect results.
|
|
52
|
+
|
|
53
|
+
## What a good description looks like
|
|
54
|
+
|
|
55
|
+
Here's the same tool after SpiderShield rewrites it:
|
|
56
|
+
|
|
57
|
+
**Before (score 2.9):**
|
|
58
|
+
```
|
|
59
|
+
"Shows the working tree status"
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
**After (score 9.6):**
|
|
63
|
+
```
|
|
64
|
+
"Show the working tree status including modified, staged, and untracked files.
|
|
65
|
+
Use when the user wants to see the current state of the repository before
|
|
66
|
+
committing or staging changes. Unlike git_diff tools that show content changes,
|
|
67
|
+
this only shows which files have changed."
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Three things changed:
|
|
71
|
+
1. **Scenario trigger** -- tells the agent *when* to use this tool
|
|
72
|
+
2. **What it returns** -- the agent knows what to expect
|
|
73
|
+
3. **Disambiguation** -- prevents confusion with similar tools
|
|
74
|
+
|
|
75
|
+
## The rewrite impact
|
|
76
|
+
|
|
77
|
+
We rewrote all 79 tool descriptions using SpiderShield's template engine (zero cost, no API calls):
|
|
78
|
+
|
|
79
|
+
| Server | Before | After | Gain |
|
|
80
|
+
|--------|--------|-------|------|
|
|
81
|
+
| git | 2.9 | 9.5 | +6.6 |
|
|
82
|
+
| supabase | 3.4 | 9.0 | +5.6 |
|
|
83
|
+
| filesystem | 3.7 | 8.5 | +4.8 |
|
|
84
|
+
| **Average** | **3.1** | **8.8** | **+5.7** |
|
|
85
|
+
|
|
86
|
+
Using Claude API for higher-quality rewrites: average gain of **+5.9 points**.
|
|
87
|
+
|
|
88
|
+
## Security is actually solid
|
|
89
|
+
|
|
90
|
+
Good news: the code-level security of MCP servers is generally strong.
|
|
91
|
+
|
|
92
|
+
| Server | Security Score |
|
|
93
|
+
|--------|---------------|
|
|
94
|
+
| filesystem | 10.0 |
|
|
95
|
+
| git | 10.0 |
|
|
96
|
+
| memory | 10.0 |
|
|
97
|
+
| time | 10.0 |
|
|
98
|
+
| everything | 10.0 |
|
|
99
|
+
| fetch | 9.0 |
|
|
100
|
+
| supabase | 9.0 |
|
|
101
|
+
|
|
102
|
+
The security risk in MCP isn't in the code. It's in the descriptions. A tool with perfect code but a vague description like "access filesystem" is still dangerous -- because the agent doesn't know what boundaries to respect.
|
|
103
|
+
|
|
104
|
+
## Top findings
|
|
105
|
+
|
|
106
|
+
1. **Description quality is the #1 security bottleneck.** Not code vulnerabilities. Not architecture. The descriptions.
|
|
107
|
+
|
|
108
|
+
2. **Zero tools have scenario triggers.** This is the single most impactful improvement. Adding "Use when..." costs nothing and immediately improves tool selection accuracy.
|
|
109
|
+
|
|
110
|
+
3. **Disambiguation is almost nonexistent.** Servers with similar tools (git has 3 diff tools, filesystem has 3 read tools) provide no guidance on which to choose.
|
|
111
|
+
|
|
112
|
+
4. **The fix is cheap.** Template-based rewriting is free and delivers +5.7 points on average. No API calls, no runtime overhead, no code changes.
|
|
113
|
+
|
|
114
|
+
## What server authors should do
|
|
115
|
+
|
|
116
|
+
1. **Add scenario triggers** to every tool description: "Use when the user wants to..."
|
|
117
|
+
2. **Add parameter examples** with concrete values, not just type annotations
|
|
118
|
+
3. **Disambiguate similar tools** explicitly: "Unlike X, this tool..."
|
|
119
|
+
4. **Declare side effects**: "This tool writes to disk" / "This tool is read-only"
|
|
120
|
+
5. **Run SpiderShield** before publishing: `pip install spidershield && spidershield scan .`
|
|
121
|
+
|
|
122
|
+
## Methodology
|
|
123
|
+
|
|
124
|
+
- Scanner: [SpiderShield v0.1.0](https://pypi.org/project/spidershield/)
|
|
125
|
+
- Description scoring: 5 criteria weighted to 10-point scale (scenario triggers 3.0, disambiguation 2.0, parameter examples 2.0, error guidance 1.5, length 1.5)
|
|
126
|
+
- Security scanning: pattern-based detection for path traversal, command injection, SSRF, credential exposure
|
|
127
|
+
- All scans are static analysis -- no runtime execution, no network calls
|
|
128
|
+
- Raw data: [batch-scan-results.json](batch-scan-results.json)
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
*This report was generated by [SpiderShield](https://github.com/teehooai/spidershield), a static security linter for MCP tools. Install: `pip install spidershield`*
|