websec-validator 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. websec_validator/__init__.py +14 -0
  2. websec_validator/briefing.py +218 -0
  3. websec_validator/calibration.json +75 -0
  4. websec_validator/calibration.py +226 -0
  5. websec_validator/cli.py +395 -0
  6. websec_validator/constitution.py +81 -0
  7. websec_validator/corpus.json +49 -0
  8. websec_validator/dynamic.py +249 -0
  9. websec_validator/extractors/__init__.py +56 -0
  10. websec_validator/extractors/auth.py +77 -0
  11. websec_validator/extractors/authz.py +130 -0
  12. websec_validator/extractors/base.py +101 -0
  13. websec_validator/extractors/client_exposure.py +48 -0
  14. websec_validator/extractors/graphql.py +71 -0
  15. websec_validator/extractors/iac_ci.py +65 -0
  16. websec_validator/extractors/integrations.py +55 -0
  17. websec_validator/extractors/routes.py +215 -0
  18. websec_validator/extractors/schemas.py +75 -0
  19. websec_validator/extractors/stack.py +80 -0
  20. websec_validator/extractors/surface.py +86 -0
  21. websec_validator/extractors/tenant.py +33 -0
  22. websec_validator/findings.py +199 -0
  23. websec_validator/probes.py +79 -0
  24. websec_validator/proof.py +96 -0
  25. websec_validator/recon.py +28 -0
  26. websec_validator/report.py +114 -0
  27. websec_validator/scanners.py +248 -0
  28. websec_validator/templates/probes/bola-cross-tenant.sh +192 -0
  29. websec_validator/templates/probes/bola-write-verbs.py +147 -0
  30. websec_validator/templates/probes/compare-roles.sh +69 -0
  31. websec_validator/templates/probes/dlp-bypass-offline.py +149 -0
  32. websec_validator/templates/probes/hs256-brute-force.py +90 -0
  33. websec_validator/templates/probes/jwt-attacks.sh +161 -0
  34. websec_validator/templates/probes/mass-assignment.py +201 -0
  35. websec_validator/templates/probes/race-conditions.py +144 -0
  36. websec_validator/templates/probes/rate-limit-burst.sh +136 -0
  37. websec_validator/templates/probes/s3-assess.sh +120 -0
  38. websec_validator/templates/probes/ssrf-probes.sh +189 -0
  39. websec_validator/templates/probes/webhook-forgery.py +113 -0
  40. websec_validator/templates/reports/FINDINGS-SUMMARY.md.template +75 -0
  41. websec_validator/templates/reports/access-control-matrix.md.template +65 -0
  42. websec_validator/templates/reports/findings-triage.md.template +28 -0
  43. websec_validator/templates/reports/pentest-handover-brief.md.template +121 -0
  44. websec_validator/templates/reports/per-tool-FINDINGS.md.template +37 -0
  45. websec_validator-0.2.0.dist-info/METADATA +232 -0
  46. websec_validator-0.2.0.dist-info/RECORD +50 -0
  47. websec_validator-0.2.0.dist-info/WHEEL +5 -0
  48. websec_validator-0.2.0.dist-info/entry_points.txt +2 -0
  49. websec_validator-0.2.0.dist-info/licenses/LICENSE +21 -0
  50. websec_validator-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,113 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Webhook forgery probe — signature verification for inbound webhooks.
4
+
5
+ A correct webhook verifier uses:
6
+ - crypto.timingSafeEqual (or HMAC compare_digest) — not raw == comparison
7
+ - fail-closed — reject if ANY required header is missing or malformed
8
+ - timestamp-age check — reject signatures older than ~5 minutes to prevent
9
+ captured-and-replayed-later forgeries
10
+
11
+ This probe tests:
12
+ 1. No signature header -> expect 401
13
+ 2. Invalid signature (random b64) -> expect 401
14
+ 3. Garbage signature (non-b64) -> expect 401
15
+ 4. Missing timestamp -> expect 401
16
+ 5. Far-future timestamp -> expect 401 ideally (replay-window check)
17
+ 6. Far-past timestamp -> same
18
+ 7. Truncated signature -> expect 401
19
+ 8. Empty body -> expect 401
20
+ 9. Wrong content-type -> expect 401
21
+ """
22
+ import json, subprocess, time, sys
23
+ from pathlib import Path
24
+
25
+ ROOT = Path(__file__).resolve().parents[2].parent
26
+ ENV = {}
27
+ for line in (ROOT / 'security/zap/.env').read_text().splitlines():
28
+ if '=' in line and not line.lstrip().startswith('#'):
29
+ k, v = line.split('=', 1); ENV[k.strip()] = v.strip()
30
+
31
+ TARGET = ENV['ZAP_TARGET']
32
+
33
+ # PROJECT-SPECIFIC START
34
+ # TODO: replace with your project's inbound-webhook path, signature header
35
+ # name, and timestamp header name. Examples:
36
+ # Bird / MessageBird: /webhooks/messagebird, messagebird-signature, messagebird-timestamp
37
+ # Stripe: /webhooks/stripe, Stripe-Signature (combined ts+sig)
38
+ # Twilio: /webhooks/twilio, X-Twilio-Signature
39
+ # GitHub: /webhooks/github, X-Hub-Signature-256
40
+ # Custom: /webhooks/<provider>, X-Signature, X-Timestamp
41
+ WEBHOOK_PATH = "/webhooks/<provider>"
42
+ SIG_HEADER = "x-signature"
43
+ TS_HEADER = "x-timestamp"
44
+
45
+ URL = f"{TARGET}{WEBHOOK_PATH}"
46
+
47
+ # TODO: realistic payload shape for your provider.
48
+ PAYLOAD = json.dumps({
49
+ "event": "message.received",
50
+ "type": "message",
51
+ "channelId": "channel-id-xxx",
52
+ "message": {
53
+ "id": "fake-msg-id",
54
+ "from": "+15551234567",
55
+ "content": "hello from attacker",
56
+ }
57
+ })
58
+ # PROJECT-SPECIFIC END
59
+
60
+ probes = [
61
+ # (name, headers, body, expected_code, expected_reason)
62
+ ('no-signature', {}, PAYLOAD, 401, 'no sig'),
63
+ ('invalid-signature-b64', {SIG_HEADER: 'aW52YWxpZA=='}, PAYLOAD, 401, 'bad sig'),
64
+ ('garbage-signature', {SIG_HEADER: 'not-base64-!'}, PAYLOAD, 401, 'malformed sig'),
65
+ ('missing-timestamp', {SIG_HEADER: 'aW52YWxpZA=='}, PAYLOAD, 401, 'no timestamp'),
66
+ ('zero-timestamp', {SIG_HEADER: 'aW52YWxpZA==', TS_HEADER: '0'}, PAYLOAD, 401, 'timestamp epoch 0'),
67
+ ('far-future-timestamp', {SIG_HEADER: 'aW52YWxpZA==', TS_HEADER: '4070908800'}, PAYLOAD, 401, 'timestamp year 2099'),
68
+ ('far-past-timestamp', {SIG_HEADER: 'aW52YWxpZA==', TS_HEADER: '1000000000'}, PAYLOAD, 401, 'timestamp year 2001'),
69
+ ('truncated-signature', {SIG_HEADER: 'a'}, PAYLOAD, 401, 'too short'),
70
+ ('empty-body', {SIG_HEADER: 'aW52YWxpZA==', TS_HEADER: str(int(time.time()))}, '', 401, 'empty body'),
71
+ ('wrong-content-type', {SIG_HEADER: 'aW52YWxpZA==', TS_HEADER: str(int(time.time())), 'Content-Type': 'text/plain'}, PAYLOAD, 401, 'wrong ct'),
72
+ ]
73
+
74
+ findings = []
75
+ print(f"=== Webhook forgery probes against {URL} ===\n")
76
+
77
+ for name, headers, body, expected, reason in probes:
78
+ cmd = ['curl', '-s', '-X', 'POST', URL, '-w', '\nHTTP_CODE:%{http_code}']
79
+ for h, v in headers.items():
80
+ cmd += ['-H', f'{h}: {v}']
81
+ if 'Content-Type' not in headers:
82
+ cmd += ['-H', 'Content-Type: application/json']
83
+ cmd += ['-d', body]
84
+ r = subprocess.run(cmd, capture_output=True, text=True)
85
+ out = r.stdout
86
+ code = int(out.split('HTTP_CODE:')[-1].strip()) if 'HTTP_CODE:' in out else 0
87
+ body_text = out.split('\nHTTP_CODE:')[0]
88
+ expected_ok = code == expected
89
+ mark = 'OK' if expected_ok else '!!'
90
+ sev = 'PASS' if expected_ok else 'FAIL'
91
+ print(f" [{mark}] [{sev}] {name:30s} expected={expected} actual={code} ({reason})")
92
+ findings.append({
93
+ 'name': name, 'expected': expected, 'actual': code, 'pass': expected_ok,
94
+ 'body_preview': body_text[:120],
95
+ })
96
+
97
+ out_p = ROOT / 'security/pentest-prep/reports/webhook-forgery/findings.json'
98
+ out_p.parent.mkdir(parents=True, exist_ok=True)
99
+ out_p.write_text(json.dumps(findings, indent=2))
100
+
101
+ passed = sum(1 for f in findings if f['pass'])
102
+ print(f"\n=== Summary ===")
103
+ print(f" {passed}/{len(findings)} probes returned expected 401")
104
+ print(f" Saved: {out_p}")
105
+
106
+ # Replay-window note
107
+ print()
108
+ print("=== Note on timestamp-age / replay window ===")
109
+ print(" Even if the HMAC is correct, captured webhooks should not replay forever.")
110
+ print(" Look in your handler for code like:")
111
+ print(" const age = Math.abs(Date.now()/1000 - parseInt(timestamp));")
112
+ print(" if (age > 300) return res.status(401).json({error:'webhook timestamp out of window'});")
113
+ print(" If that check is missing, log it as a finding (low severity, easy fix).")
@@ -0,0 +1,75 @@
1
+ # Security tooling pass — findings summary
2
+
3
+ > Date: <YYYY-MM-DD>. Tools run locally; **zero repo footprint added**.
4
+ > All outputs in `security/<tool>/` (gitignored).
5
+
6
+ ## Tools run
7
+
8
+ | Tool | Status | Outputs |
9
+ |---|---|---|
10
+ | **Prowler** <ver> | ☐ | `security/prowler/` |
11
+ | **Nuclei** <ver> | ☐ | `security/nuclei/` |
12
+ | **Semgrep** <ver> | ☐ | `security/semgrep/` |
13
+ | **Gitleaks** <ver> | ☐ | `security/gitleaks/` |
14
+ | **Trivy** <ver> | ☐ | `security/trivy/` |
15
+ | **ZAP** <ver> + manual probes | ☐ | `security/zap/`, `security/pentest-prep/` |
16
+
17
+ ## Most important finding
18
+
19
+ > The single highest-priority item, with action and owner.
20
+
21
+ ## Real findings
22
+
23
+ | Tool | Finding | Severity | Action |
24
+ |---|---|---|---|
25
+ | <tool> | <finding> | <CRIT/HIGH/MED/LOW> | <action + file:line> |
26
+
27
+ ## What's clean
28
+
29
+ | Surface | Tool | Result |
30
+ |---|---|---|
31
+ | <Surface 1> | <Tool> | <e.g. 0 CRITICAL + 0 HIGH> |
32
+ | <Surface 2> | <Tool> | <Result> |
33
+
34
+ ## Recommended order of fixes
35
+
36
+ 1. <P0 item>
37
+ 2. <P1 item>
38
+ 3. <P2 item>
39
+
40
+ ## What's NOT in this report
41
+
42
+ - <Surfaces not covered + why>
43
+ - <Tools skipped + why>
44
+
45
+ ## Reproducing this scan pass
46
+
47
+ ```bash
48
+ # Prowler
49
+ prowler aws --region us-east-1 \
50
+ --compliance cis_2.0_aws aws_foundational_security_best_practices_aws \
51
+ --output-formats html json-asff csv \
52
+ --output-directory security/prowler/
53
+
54
+ # Nuclei
55
+ TOKEN=$(./security/zap/run.sh --print-token)
56
+ nuclei -target "$ZAP_TARGET" -H "Authorization: Bearer $TOKEN" \
57
+ -tags "jwt,ssrf,sqli,lfi,redirect,rce,exposure,misconfig,cve" \
58
+ -severity medium,high,critical -rate-limit 30 -concurrency 5 \
59
+ -json-export security/nuclei/nuclei-baseline.json \
60
+ -output security/nuclei/nuclei-baseline.txt
61
+
62
+ # Semgrep
63
+ semgrep --config auto --config p/typescript --config p/javascript \
64
+ --config p/security-audit --severity WARNING --severity ERROR \
65
+ --json -o security/semgrep/semgrep-backend.json backend/src
66
+
67
+ # Gitleaks
68
+ gitleaks detect --source . --report-format json --report-path security/gitleaks/current.json
69
+ gitleaks git --report-format json --report-path security/gitleaks/history.json
70
+
71
+ # Trivy
72
+ trivy fs --scanners vuln,secret,misconfig --severity HIGH,CRITICAL \
73
+ --skip-dirs node_modules --skip-dirs security \
74
+ --format json --output security/trivy/trivy-fs.json .
75
+ ```
@@ -0,0 +1,65 @@
1
+ # Access-Control Matrix — <PROJECT_NAME> API
2
+
3
+ > Source of truth: `backend/src/routes/*`, `backend/src/server.ts`,
4
+ > the auth/permission middleware files, and the role seed.
5
+ >
6
+ > This is the map of *what each role SHOULD be able to reach*. The ZAP
7
+ > Access Control test compares it against what each role *actually* can reach.
8
+ >
9
+ > **Last refreshed:** <YYYY-MM-DD>
10
+
11
+ ## How auth is enforced
12
+
13
+ - `<requireAuth middleware mount line>` applies to every `/api/*` route mounted *after* it.
14
+ - A handful of `/api/*` routes are registered **before** that line and are therefore **public**:
15
+ list them here (e.g. `/api/auth/*`, `/api/health`, `/api/settings` if public).
16
+ - Routes outside `/api/*` bypass `requireAuth` entirely: webhooks (HMAC-verified),
17
+ SCIM endpoints (own bearer), `/docs`, etc.
18
+ - **Token mechanism:** describe how tokens are minted, sent, and refreshed.
19
+ - **Authorization styles seen on routes:**
20
+ - Permission strings (CASL / cancan / custom) — describe.
21
+ - Tenant-scoped middleware (`requireGroupAccess` / `requireOrgAccess`) — describe.
22
+ - Manual in-handler checks — list which routes still rely on these.
23
+
24
+ ## Roles
25
+
26
+ | Role label | roleId | Key permissions |
27
+ |---|---|---|
28
+ | <Highest privilege> | `role-...` | `*` (all) |
29
+ | <Mid privilege> | `role-...` | ... |
30
+ | <Low privilege> | `role-...` | ... |
31
+
32
+ ## Legend
33
+
34
+ - ✅ allowed · ❌ denied (403) · 🔒 = denied unless caller is in the target tenant · 🟡 = self-only (IDOR-guarded)
35
+ - "Auth" column = `requireAuth` enforced (any authenticated user reaches the guard).
36
+
37
+ ## Matrix (representative endpoints, grouped)
38
+
39
+ | Method + Path | Guard | admin | low-privilege role |
40
+ |---|---|---|---|
41
+ | **Auth / public** | | | |
42
+ | POST /api/auth/login | public | ✅ | ✅ |
43
+ | POST /api/auth/refresh | public | ✅ | ✅ |
44
+ | GET /api/auth/me | requireAuth | ✅ | ✅ |
45
+ | POST /api/auth/logout | requireAuth | ✅ | ✅ |
46
+ | **Admin: users** | | | |
47
+ | GET /api/admin/users | requirePermission('users:view') | ✅ | ❌ |
48
+ | POST /api/admin/users | requirePermission('users:manage') | ✅ | ❌ |
49
+ | ... | ... | ... | ... |
50
+
51
+ > Continue this table for every route. Group by `**Section**` rows.
52
+ > The completeness of this matrix is the single highest-value deliverable —
53
+ > the pentest team loads it into ZAP's Access Control tab and uses it as the
54
+ > map for role-vs-role testing.
55
+
56
+ ## Known gaps / TODOs
57
+
58
+ - List any routes you know don't yet conform (e.g. still use legacy `requireRole('admin')`).
59
+ - List any routes whose authz lives in-handler instead of in middleware — these are
60
+ the easiest to forget when adding a new role.
61
+
62
+ ## Audit history
63
+
64
+ - YYYY-MM-DD — initial matrix
65
+ - YYYY-MM-DD — added `*` after PR #N
@@ -0,0 +1,28 @@
1
+ # ZAP findings triage — API scan
2
+
3
+ > Triaged against the actual codebase. Document every false positive with
4
+ > evidence so future-you (and the pentest team) doesn't re-investigate them.
5
+
6
+ | Alert | Count | Risk | Verdict | Why |
7
+ |---|---|---|---|---|
8
+ | <e.g. SQL Injection> | <N> | High | **False positive** | <Reason — link to file:line proof> |
9
+ | <e.g. NoSQL Injection> | <N> | High | **False positive** | <Stack uses parameterized expressions, not strings> |
10
+ | <e.g. PII Disclosure> | <N> | High | <Real / FP> | <Evidence trail> |
11
+ | <e.g. Path Traversal> | <N> | High | **False positive** | <IDs are DB keys, no filesystem reads> |
12
+ | <e.g. Application Error Disclosure> | <N> | Low | Minor | <Confirm error responses are generic> |
13
+
14
+ ## So what's actually worth doing?
15
+
16
+ > List the real, actionable items that came from elsewhere in the engagement
17
+ > (infra checks, source review, role-comparison diff).
18
+
19
+ 1. <Real item 1>
20
+ 2. <Real item 2>
21
+ 3. <Real item 3>
22
+
23
+ ## Lesson for next runs
24
+
25
+ > A 2-3 sentence note about which ZAP rules systematically misfire on this stack
26
+ > and what the real signal source was. Examples:
27
+ > - DynamoDB JSON API -> SQLi/NoSQLi alerts are noise; signal is in two-role diff.
28
+ > - GraphQL endpoint -> rule 10202 misfires on every alias; signal is in graphql-cop.
@@ -0,0 +1,121 @@
1
+ # <PROJECT_NAME> — Pentest Handover Brief
2
+
3
+ **Audience:** <client>'s security engineering (pentester)
4
+ **Prepared by:** <your name>
5
+ **Engagement type:** Gray-box authenticated pentest
6
+ **Test window:** <fill in dates>
7
+ **Primary contact:** <name> — <email / phone>
8
+ **Backup contact:** <name> — <email / phone>
9
+
10
+ ---
11
+
12
+ ## 1. What this app is (one paragraph)
13
+
14
+ <2-3 sentence app description in plain English. What does it do, who uses it,
15
+ what kind of data does it hold, what's it integrated with.>
16
+
17
+ ## 2. Architecture (give freely)
18
+
19
+ | Layer | Tech | Notes |
20
+ |---|---|---|
21
+ | Frontend | <e.g. Next.js + React> | <SPA / server-rendered / etc.> |
22
+ | API | <e.g. Express + TS on App Runner> | <auth model summary> |
23
+ | Datastore | <e.g. DynamoDB single-table> | <indexing model> |
24
+ | Object storage | <e.g. S3 bucket name> | <public access posture> |
25
+ | Ingress (webhooks) | <e.g. /webhooks/provider> | <signature scheme> |
26
+ | Outbound | <e.g. third-party REST API> | <auth model> |
27
+ | Hosting | <e.g. AWS CDK-managed> | <test env stack name> |
28
+
29
+ **Auth model (important):** <e.g. Bearer JWT + localStorage. No cookies, no CSRF token.>
30
+
31
+ **Tenancy / isolation:** <Describe the tenancy boundary — group/org/workspace —
32
+ and which boundary the pentester should attack.>
33
+
34
+ ## 3. URLs
35
+
36
+ | Env | URL | Notes |
37
+ |---|---|---|
38
+ | Dev / test | <https://...> | Pentest target |
39
+ | Prod | <https://...> | **OUT OF SCOPE** unless separately authorized in writing |
40
+
41
+ Webhook endpoint: `POST <WEBHOOK_PATH>` — <signature scheme description>.
42
+ **Do not skip the signature check during your own replay tests** — the server
43
+ returns 401 fast and you'll think the endpoint is dead.
44
+
45
+ ## 4. Roles & authorization matrix
46
+
47
+ | Role | Should access |
48
+ |---|---|
49
+ | `<role-1>` | <what they can do> |
50
+ | `<role-2>` | <what they can do> |
51
+ | `<role-3>` | <what they can do> |
52
+
53
+ **Known intent (please verify by attack):**
54
+ - An agent in Tenant A should get **403/404** on Tenant B's resources across every group-scoped endpoint.
55
+ - <Other intent statements>
56
+
57
+ ## 5. Test accounts (provisioned in dev env)
58
+
59
+ > **Real values stored in <vault name>** — share via Bitwarden/1Password link, **not** via email/Slack.
60
+
61
+ | Username | Role | Tenant | Purpose |
62
+ |---|---|---|---|
63
+ | `pentest-agent-a1@...` | agent | Tenant A | Baseline agent |
64
+ | `pentest-agent-a2@...` | agent | Tenant A | Same-tenant collision tests |
65
+ | `pentest-agent-b1@...` | agent | **Tenant B** | Cross-tenant isolation tests ← primary IDOR target |
66
+ | `pentest-manager-a@...` | manager | Tenant A | Manager privilege boundary |
67
+ | `pentest-manager-b@...` | manager | Tenant B | Cross-tenant manager boundary |
68
+ | `pentest-admin@...` | admin | (tenant-wide) | Privilege-escalation baseline |
69
+
70
+ **Pre-seeded data:**
71
+ - ~N resources across Tenants A and B
72
+ - A few with attached media — good for media-ACL tests
73
+ - Notifications (read + unread) for each test agent
74
+
75
+ ## 6. Out of scope (rules of engagement)
76
+
77
+ - **Third-party infrastructure** (their API, their webhook origin). Test our handling, not their service.
78
+ - **Cloud control plane** — IAM probing, account enumeration, bucket bruteforcing across the org.
79
+ - **DoS / volumetric** — no load attacks, no fork bombs, no concurrency exhaustion.
80
+ - **Social engineering** of <client> staff or customers.
81
+ - **Production** environment — dev/test URLs only.
82
+ - **Real customer data** — even if you find a path to it, do not exfiltrate beyond a single proof sample, and notify <contact> immediately.
83
+
84
+ ## 7. Things to focus on (without telling you the answers)
85
+
86
+ In rough order of where I'd spend time if I were you:
87
+
88
+ 1. **Cross-tenant authorization.** Every `/api/...` route that takes a resource ID — does it verify the resource belongs to a tenant the caller can see?
89
+ 2. **Token exposure.** Token lifetime, refresh, revocation on logout, XSS sinks.
90
+ 3. **Media proxy.** Can an agent in Tenant A fetch a media key belonging to Tenant B? Are object-storage keys guessable / sequential?
91
+ 4. **Webhook endpoint.** Signature bypass, replay window, oversized payloads, malformed JSON, source spoofing.
92
+ 5. <Other focus areas>
93
+
94
+ ## 8. What you will NOT receive (and why)
95
+
96
+ - My internal findings, ZAP/Semgrep/Prowler output, suspected weak spots. These would bias your testing and reduce the value of comparing your results against mine afterward.
97
+ - Source code. If <client> wants a code review as part of this engagement, that's a separate ask and I'll provide a read-only repo link.
98
+ - Prod credentials.
99
+
100
+ After your report is in, I'll share my findings and we'll diff them — the **overlap** validates tooling, the **delta in both directions** is the real signal.
101
+
102
+ ## 9. Communication
103
+
104
+ - **Real-time questions:** <chat handle>
105
+ - **Findings:** Final report + raw evidence to <sponsor email>
106
+ - **Suspected high/critical mid-test:** Page <contact> immediately.
107
+ - **If you accidentally hit prod:** Stop, ping <contact>, do not delete logs.
108
+
109
+ ## 10. Reporting format we'd like
110
+
111
+ Per finding:
112
+ - Title
113
+ - Severity (CVSS 3.1 vector + score)
114
+ - Affected endpoint(s) / component
115
+ - Reproduction steps (curl or HTTP request preferred over screenshots)
116
+ - Impact (what an attacker actually gets)
117
+ - Suggested remediation (optional but appreciated)
118
+
119
+ ---
120
+
121
+ _Last updated: <YYYY-MM-DD>_
@@ -0,0 +1,37 @@
1
+ # <Tool> findings — <YYYY-MM-DD>
2
+
3
+ > Hand-written triage of `<tool>` output. Raw evidence in this folder
4
+ > (e.g. `<tool>-baseline.json`, `<tool>-current.json`).
5
+
6
+ ## Run command
7
+
8
+ ```bash
9
+ <the exact command, so anyone can re-run it>
10
+ ```
11
+
12
+ ## Summary
13
+
14
+ - Raw alerts: <N>
15
+ - Real findings: <N>
16
+ - False positives triaged: <N>
17
+
18
+ ## Real findings
19
+
20
+ ### 1. <Title> — <severity>
21
+
22
+ - **What:** <1-2 sentences>
23
+ - **Where:** `<file:line>` or `<endpoint>`
24
+ - **Evidence:** <log excerpt / response body / commit hash>
25
+ - **Action:** <what to do, who owns it>
26
+
27
+ ### 2. ...
28
+
29
+ ## False positives (do not "fix")
30
+
31
+ | Alert | Cause | Why FP |
32
+ |---|---|---|
33
+ | <alert name> | <what the tool detected> | <why it's a FP — link to file:line or test that proves it> |
34
+
35
+ ## Notes for next run
36
+
37
+ - <Anything that surprised you, anything to investigate further next time>
@@ -0,0 +1,232 @@
1
+ Metadata-Version: 2.4
2
+ Name: websec-validator
3
+ Version: 0.2.0
4
+ Summary: Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app.
5
+ Author: Ricardo Accioly
6
+ License: MIT
7
+ Keywords: security,pentest,sast,dast,bola,ai-agent,appsec
8
+ Requires-Python: >=3.11
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Dynamic: license-file
12
+
13
+ # websec-validator
14
+
15
+ > Local-first security recon that **briefs your AI coding agent**. It does the deterministic
16
+ > half — read the repo, map the full attack surface, run + de-duplicate the static scanners, and
17
+ > stage a probe library tailored to what it found — then hands your agent (Claude Code, Codex,
18
+ > Gemini, Cursor) a marching-orders briefing. **Code in, artifacts out. No LLM in the tool, no
19
+ > server, no running app required.**
20
+
21
+ It is *not* an autonomous scanner and *not* a SaaS. It's the missing front-half: the thing that
22
+ turns a repo into a precise, fact-grounded security brief an AI agent (with a human in the loop)
23
+ can act on — an auto-filled, repo-aware version of a senior pentester's "here's what to test and
24
+ how" handoff. Full landscape + why this niche is real: [`MARKET-ANALYSIS-AND-VERDICT.md`](MARKET-ANALYSIS-AND-VERDICT.md).
25
+
26
+ ## Quickstart — just point it at your repo
27
+
28
+ **Simplest: tell your AI agent.** In Claude Code (or any coding agent), open your project and say:
29
+
30
+ > *"Install and run the security tool at github.com/raccioly/websec-validator on this repo, then follow its briefing."*
31
+
32
+ It installs, runs, and walks the findings with you. There's nothing to host and no website — it's
33
+ local. The four ways to get there, all ending in the same `AGENT-BRIEFING.md` your agent acts on:
34
+
35
+ | Path | One-time setup | Then |
36
+ |---|---|---|
37
+ | **Tell your agent** (simplest) | — | say the line above |
38
+ | **CLI** (a terminal) | `pipx install websec-validator` | `websec run /path/to/your/app` |
39
+ | **Claude Code plugin** (slash) | `/plugin marketplace add raccioly/websec-validator` → `/plugin install websec-validator@websec-plugins` | invoke the **security-pass** skill, or just ask |
40
+ | **Docker** (no install) | `docker build -t websec-validator .` | `docker run --rm -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out` |
41
+
42
+ ➡️ **Want the reasoning behind every check?** Read **[docs/METHODOLOGY.md](docs/METHODOLOGY.md)** — what each test does and why.
43
+
44
+ ## Install
45
+
46
+ ```bash
47
+ pipx install websec-validator # from PyPI
48
+ brew install noir # OWASP Noir — the route engine (50+ frameworks); regex fallback if absent
49
+ websec --version
50
+ ```
51
+
52
+ _Until the first PyPI release publishes (or for bleeding-edge), install straight from source instead:_
53
+ `pipx install git+https://github.com/raccioly/websec-validator` (or from a clone: `pipx install .`).
54
+
55
+ Requires **Python 3.11+** (on stock macOS, `python3` is often 3.9 — use `pipx`, which picks a newer
56
+ interpreter, or install via Homebrew/pyenv). Zero Python runtime dependencies: it shells out to
57
+ scanners (Trivy, Gitleaks, Semgrep/OpenGrep, Checkov, Prowler) and Noir **when present**, reports
58
+ what's missing, and never hard-fails if a tool is absent.
59
+
60
+ ### Or run via Docker (everything bundled, zero install)
61
+
62
+ No need to install Noir or any scanner — the image bundles them all (arch-aware, amd64 + arm64):
63
+
64
+ ```bash
65
+ docker build -t websec-validator .
66
+ docker run --rm -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out
67
+ ```
68
+
69
+ The image carries Noir + Trivy + Gitleaks + Semgrep + Checkov; mount your repo at `/scan` and the
70
+ artifacts land in `/scan/websec-out`.
71
+
72
+ ## Use
73
+
74
+ ```bash
75
+ websec run ./my-app # ← the one command: recon + stage tailored probes + emit the briefing
76
+ websec ./my-app # same thing — a bare path defaults to `run`
77
+ websec run ./my-app --scan # …and also execute the available static scanners
78
+ websec doctor ./my-app # (optional) which scanners are installed?
79
+ ```
80
+
81
+ Then point your agent at the output: **"Read `websec-out/AGENT-BRIEFING.md` and follow it."**
82
+
83
+ > That's the whole user surface: **`run`** (plus the optional, advanced **`dynamic`** live-probing step below). `recon`/`proof`/`calibrate` exist for developing the tool itself and are hidden from `--help` — you never need them.
84
+
85
+ ## What it extracts (11 deterministic extractors, no LLM)
86
+
87
+ | | Dimension | Notable output |
88
+ |---|---|---|
89
+ | stack | languages, frameworks, datastores | monorepo-aware (aggregates every manifest) |
90
+ | routes | every endpoint via **OWASP Noir** | method · path · typed params · code path |
91
+ | auth | scheme + login surface | multi-scheme (primary jwt > passport), PyJWT/NextAuth/session aware |
92
+ | **authz** | access-control map | guard coverage + **write endpoints with no visible guard** + roles |
93
+ | tenant | multi-tenancy key candidates | the BOLA boundary, by frequency |
94
+ | surface | 12 user-input-gated sink classes | SSRF/SQLi/NoSQLi/traversal/SSTI/redirect/deser/XXE/proto-pollution/ReDoS/cmd/eval |
95
+ | schemas | data models + **privileged fields** | Pydantic/SQLAlchemy/Django/Prisma/Mongoose/TypeORM/Zod → `role`/`isAdmin`/`groupId` for mass-assignment targeting |
96
+ | iac_ci | IaC + CI/CD | GitHub Actions injection, unpinned actions, Dockerfile-root, tfstate |
97
+ | client_exposure | browser leakage | `NEXT_PUBLIC_*` secrets, server-secret-in-client, source maps |
98
+ | graphql | GraphQL surface | introspection / playground / missing depth-limit |
99
+ | integrations | third-party + webhooks | webhooks missing signature verification |
100
+
101
+ Plus **derived targeting** — IDOR / SSRF / open-redirect / upload / write / auth-endpoint
102
+ candidates — so probes get pointed at the *exact* endpoints, not fired blindly.
103
+
104
+ ## What you get (`websec-out/`)
105
+
106
+ | Artifact | What it is |
107
+ |---|---|
108
+ | `AGENT-BRIEFING.md` | **The product.** Marching orders: detected surface, the access-control map, targeting, findings, the method, and the staged probe list. |
109
+ | `FACTS.json` | The full structured recon. |
110
+ | `findings.json` | Static scanner results, **de-duplicated across tools** and severity-ranked (with `--scan`). |
111
+ | `findings-ledger.json` / `REPORT.md` | The traceable ledger: each finding with an evidence chain, CWE/ASVS/OWASP-API citation, remediation, and a **calibrated `P(real)`** (measured real-vuln rate + 95% CI + sample size). |
112
+ | `probes/` | The probe scripts selected + staged for *this* app (BOLA, JWT, SSRF, mass-assignment…). |
113
+
114
+ ## The flow
115
+
116
+ ```
117
+ 🔧 websec (deterministic) 🤖 your agent + 🧑 you
118
+ ───────────────────────────────── ─────────────────────────────────
119
+ 1. recon → full attack surface → confirm the tenant boundary + auth model
120
+ 2. run + de-dup static scanners → triage real-vs-noise
121
+ 3. stage tailored probes → fill placeholders, run vs a TEST instance
122
+ 4. emit AGENT-BRIEFING.md → propose fixes, re-run to confirm, report back
123
+ ```
124
+
125
+ Static recon + briefing need **only the code**. *Running* the probes needs a live test instance +
126
+ test credentials (the human supplies them) — the tool itself never touches a running app.
127
+
128
+ ## Proof harness
129
+
130
+ `websec proof` clones a vuln-app corpus (VAmPI, NodeGoat, DVGA) and scores whether recon surfaces
131
+ each app's documented attack surface — a deterministic, CI-trackable proxy (currently **10/10**).
132
+ The real kill-criterion (does the briefing lift an agent's bug-finding vs a generic prompt?) is the
133
+ manual A/B in [`corpus/PROOF-PROTOCOL.md`](corpus/PROOF-PROTOCOL.md).
134
+
135
+ ## Calibrated confidence
136
+
137
+ `websec calibrate` runs the ledger against the labeled corpus, measures how often each
138
+ *(attack-class, confidence)* bucket is a **real** documented vuln, and writes `calibration.json`
139
+ (shipped + applied at runtime). Each finding then carries `P(real)` with a **95% Wilson confidence
140
+ interval** and the sample size `n` — so "MEDIUM" stops being a vibe and becomes "real ~57% of the
141
+ time on the corpus (CI 43–70%, n=51)". A finding that matches no documented vuln counts as a false
142
+ positive (the corpus is well-documented). **Honest caveats:** the corpus is *deliberately
143
+ vulnerable*, so the rates skew **optimistic** for clean production code, and small samples mean
144
+ **wide intervals** — the CI is the headline, not the point estimate, and both tighten as the corpus
145
+ grows. With thin data a bucket falls back to the per-label aggregate, then to a clearly-flagged
146
+ uncalibrated prior. No ML, no deps — binomial proportion + Wilson interval; the structure upgrades to
147
+ isotonic regression if a large labeled set ever exists.
148
+
149
+ **It self-improves.** `websec dynamic` is an *oracle*: a write that executes unauthenticated is a
150
+ confirmed real vuln, and a recon-flagged endpoint that turns out auth-enforced is a confirmed false
151
+ positive. Every dynamic run folds those confirmed labels into a **local overlay** (`~/.cache/websec-validator/`,
152
+ gitignored, never shipped) that's merged on top of the public table — so the numbers **personalize to
153
+ your apps** the more you run it, with no extra step and nothing leaving your machine. To label by hand
154
+ instead, feed a `{attack_class, confidence, is_real}` file to `websec calibrate --ingest`.
155
+
156
+ ## Dynamic phase (v2 — read-only so far)
157
+
158
+ When you have a *running TEST instance*, `websec dynamic` mints role tokens and runs the probes the
159
+ static recon pointed at. v1 is **read-only**: authenticated **cross-tenant BOLA** on the group-scoped
160
+ GET endpoints recon discovered.
161
+
162
+ ```bash
163
+ cp dynamic-config.example.json dynamic-config.json # TEST target + role creds (gitignored)
164
+ websec run ./my-app # static recon → websec-out/FACTS.json
165
+ websec dynamic --config dynamic-config.json --facts websec-out/FACTS.json
166
+ # → "14/14 cross-tenant GET reads blocked — all isolated" (or 🚨 LEAK with the exact endpoint)
167
+ ```
168
+
169
+ Never point it at production. Write-verb BOLA, JWT/auth attacks, and a ZAP/Nuclei two-role diff are
170
+ the next dynamic probes (explicitly gated — they mutate).
171
+
172
+ ## Validated on
173
+
174
+ HugoCross (Next.js), `wu-whatsappinbox` (106-service Express/AWS monorepo), VAmPI, NodeGoat, DVGA —
175
+ independently reproducing a hand-done pentest's findings (tenant boundary, SSO-endpoint SSRF, media
176
+ upload, conversation-BOLA routes, roles).
177
+
178
+ ## Tests
179
+
180
+ ```bash
181
+ python3 -m unittest discover -s tests # stdlib only, no Noir/network — 23 tests
182
+ ```
183
+
184
+ ## Releasing (maintainer)
185
+
186
+ Published to PyPI via **Trusted Publishing** (OIDC — no API token in the repo). To cut a release:
187
+
188
+ ```bash
189
+ # 1. bump the version in pyproject.toml (e.g. 0.2.0 → 0.2.1)
190
+ # 2. tag it and push — the tag must match pyproject's version (CI verifies):
191
+ git tag v0.2.1 && git push origin v0.2.1
192
+ # → .github/workflows/publish.yml builds + publishes to PyPI
193
+ ```
194
+
195
+ One-time PyPI setup (before the first release): on pypi.org → **Account → Publishing → Add a pending
196
+ publisher** with project `websec-validator`, owner `raccioly`, repo `websec-validator`, workflow
197
+ `publish.yml`, environment `pypi`. The project is created on the first successful publish.
198
+
199
+ > Two independent channels, two update mechanisms: the **CLI** ships to **PyPI** (semver releases,
200
+ > `pip install --upgrade`); the **Claude Code plugin** ships from **git** (tracks latest commit,
201
+ > refreshed via `/plugin marketplace update`).
202
+
203
+ ## Status / roadmap
204
+
205
+ **Done:** 11-extractor recon (incl. schema/entity → mass-assignment targeting), cross-tool de-dup,
206
+ tailored probe staging, agent briefing, traceable findings ledger with **calibrated confidence
207
+ (CJE — Wilson CIs)**, proof harness, test suite, **Docker bundle** (all scanners + Noir, arch-aware),
208
+ **dynamic phase v1** (authenticated read-only cross-tenant BOLA — validated live, reproduced a
209
+ hand-pentest's 14/14).
210
+ **Next:** dynamic write-verb BOLA + JWT/auth probes + ZAP/Nuclei two-role diff (gated, they mutate),
211
+ calibration on hand-labeled real repos (more representative base rate), ASVS index lookup, optional
212
+ model-SDK adapters for no-agent fallback.
213
+
214
+ ## Using it as a Claude Code skill / plugin
215
+
216
+ This repo **is** a Claude Code plugin. Install it once —
217
+
218
+ ```
219
+ /plugin marketplace add raccioly/websec-validator
220
+ /plugin install websec-validator@websec-plugins
221
+ ```
222
+
223
+ — and the bundled **security-pass** skill ([`skills/security-pass/SKILL.md`](skills/security-pass/SKILL.md))
224
+ lets you just ask, in plain English, for a security pass: it runs `websec`, reads the briefing, and
225
+ works the findings with you. For other agents the universal interface is unchanged: run the CLI, read
226
+ `AGENT-BRIEFING.md`.
227
+
228
+ ## Credits
229
+
230
+ Methodology + probe library come from a real authenticated pentest pass
231
+ ([`base-research/REPLICATION-PLAYBOOK.md`](base-research/REPLICATION-PLAYBOOK.md), not committed).
232
+ This tool productizes that hand-written pass into something an AI agent can run on any repo.