websec-validator 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. websec_validator/__init__.py +14 -0
  2. websec_validator/briefing.py +218 -0
  3. websec_validator/calibration.json +75 -0
  4. websec_validator/calibration.py +226 -0
  5. websec_validator/cli.py +395 -0
  6. websec_validator/constitution.py +81 -0
  7. websec_validator/corpus.json +49 -0
  8. websec_validator/dynamic.py +249 -0
  9. websec_validator/extractors/__init__.py +56 -0
  10. websec_validator/extractors/auth.py +77 -0
  11. websec_validator/extractors/authz.py +130 -0
  12. websec_validator/extractors/base.py +101 -0
  13. websec_validator/extractors/client_exposure.py +48 -0
  14. websec_validator/extractors/graphql.py +71 -0
  15. websec_validator/extractors/iac_ci.py +65 -0
  16. websec_validator/extractors/integrations.py +55 -0
  17. websec_validator/extractors/routes.py +215 -0
  18. websec_validator/extractors/schemas.py +75 -0
  19. websec_validator/extractors/stack.py +80 -0
  20. websec_validator/extractors/surface.py +86 -0
  21. websec_validator/extractors/tenant.py +33 -0
  22. websec_validator/findings.py +199 -0
  23. websec_validator/probes.py +79 -0
  24. websec_validator/proof.py +96 -0
  25. websec_validator/recon.py +28 -0
  26. websec_validator/report.py +114 -0
  27. websec_validator/scanners.py +248 -0
  28. websec_validator/templates/probes/bola-cross-tenant.sh +192 -0
  29. websec_validator/templates/probes/bola-write-verbs.py +147 -0
  30. websec_validator/templates/probes/compare-roles.sh +69 -0
  31. websec_validator/templates/probes/dlp-bypass-offline.py +149 -0
  32. websec_validator/templates/probes/hs256-brute-force.py +90 -0
  33. websec_validator/templates/probes/jwt-attacks.sh +161 -0
  34. websec_validator/templates/probes/mass-assignment.py +201 -0
  35. websec_validator/templates/probes/race-conditions.py +144 -0
  36. websec_validator/templates/probes/rate-limit-burst.sh +136 -0
  37. websec_validator/templates/probes/s3-assess.sh +120 -0
  38. websec_validator/templates/probes/ssrf-probes.sh +189 -0
  39. websec_validator/templates/probes/webhook-forgery.py +113 -0
  40. websec_validator/templates/reports/FINDINGS-SUMMARY.md.template +75 -0
  41. websec_validator/templates/reports/access-control-matrix.md.template +65 -0
  42. websec_validator/templates/reports/findings-triage.md.template +28 -0
  43. websec_validator/templates/reports/pentest-handover-brief.md.template +121 -0
  44. websec_validator/templates/reports/per-tool-FINDINGS.md.template +37 -0
  45. websec_validator-0.2.0.dist-info/METADATA +232 -0
  46. websec_validator-0.2.0.dist-info/RECORD +50 -0
  47. websec_validator-0.2.0.dist-info/WHEEL +5 -0
  48. websec_validator-0.2.0.dist-info/entry_points.txt +2 -0
  49. websec_validator-0.2.0.dist-info/licenses/LICENSE +21 -0
  50. websec_validator-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,248 @@
1
+ """Static scanner registry + detection + execution.
2
+
3
+ v1 philosophy: the tool does NOT install scanners and does NOT import them. It
4
+ detects which are on PATH (or reachable via Docker) and shells out to the ones
5
+ that are present, writing each tool's native JSON to the output dir. Missing
6
+ tools are reported in the briefing so the agent can offer to install them — we
7
+ never hard-fail because a scanner is absent.
8
+
9
+ Each scanner runs read-only against the filesystem (no network target, no
10
+ running app). Anything that needs a live instance (ZAP, Nuclei DAST) is NOT
11
+ here — that is the dynamic phase, which v1 leaves to the agent + human.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import shutil
18
+ import subprocess
19
+ from dataclasses import dataclass
20
+ from pathlib import Path
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class Scanner:
25
+ key: str
26
+ name: str
27
+ category: str # sast | sca | secrets | iac | cloud
28
+ binary: str # what we look for on PATH
29
+ languages: tuple = () # () == language-agnostic
30
+ install: str = "" # one-line install hint for the briefing
31
+ # argv builder: (target, out_file) -> list[str]; None means "detect only" for now
32
+ argv: object = None
33
+
34
+
35
+ def _trivy(target: Path, out: Path) -> list:
36
+ # SCA + secrets + IaC misconfig in one pass; pinned by the user's install.
37
+ return ["trivy", "fs", "--scanners", "vuln,secret,misconfig",
38
+ "--skip-dirs", "node_modules", "--skip-dirs", "security",
39
+ "--format", "json", "--output", str(out), str(target)]
40
+
41
+
42
+ def _gitleaks(target: Path, out: Path) -> list:
43
+ return ["gitleaks", "detect", "--source", str(target), "--no-banner",
44
+ "--report-format", "json", "--report-path", str(out)]
45
+
46
+
47
+ def _semgrep(target: Path, out: Path) -> list:
48
+ return ["semgrep", "scan", "--config", "auto", "--json",
49
+ "--output", str(out), str(target)]
50
+
51
+
52
+ def _checkov(target: Path, out: Path) -> list:
53
+ return ["checkov", "-d", str(target), "--compact", "-o", "json",
54
+ "--output-file-path", str(out.parent)]
55
+
56
+
57
+ REGISTRY: tuple = (
58
+ Scanner("trivy", "Trivy", "sca", "trivy",
59
+ install="brew install trivy # pin by digest in CI", argv=_trivy),
60
+ Scanner("gitleaks", "Gitleaks", "secrets", "gitleaks",
61
+ install="brew install gitleaks", argv=_gitleaks),
62
+ Scanner("semgrep", "Semgrep/OpenGrep", "sast", "semgrep",
63
+ install="pipx install semgrep # or opengrep for fully-OSS", argv=_semgrep),
64
+ Scanner("checkov", "Checkov", "iac", "checkov",
65
+ install="pipx install checkov", argv=_checkov),
66
+ Scanner("bandit", "Bandit", "sast", "bandit", languages=("python",),
67
+ install="pipx install bandit"),
68
+ Scanner("osv-scanner", "OSV-Scanner", "sca", "osv-scanner",
69
+ install="brew install osv-scanner"),
70
+ Scanner("prowler", "Prowler", "cloud", "prowler",
71
+ install="pipx install prowler # needs AWS creds"),
72
+ )
73
+
74
+
75
+ def detect(stack_languages: list | None = None) -> dict:
76
+ """Return {'available': [...], 'missing': [...]} for the relevant scanners.
77
+
78
+ A language-specific scanner (e.g. Bandit/python) is only considered relevant
79
+ when that language is present in the stack.
80
+ """
81
+ langs = set(stack_languages or [])
82
+ available, missing = [], []
83
+ for s in REGISTRY:
84
+ if s.languages and not (set(s.languages) & langs):
85
+ continue # not relevant to this repo's stack
86
+ entry = {"key": s.key, "name": s.name, "category": s.category}
87
+ if shutil.which(s.binary):
88
+ available.append(entry)
89
+ else:
90
+ missing.append({**entry, "install": s.install})
91
+ return {"available": available, "missing": missing}
92
+
93
+
94
+ def run_available(target: Path, outdir: Path, stack_languages: list | None = None,
95
+ timeout: int = 600) -> list:
96
+ """Execute every available, runnable static scanner. Returns per-scanner status.
97
+
98
+ Raw JSON lands in outdir/scanners/<key>.json. We capture status only here;
99
+ cross-tool normalization + de-duplication is a separate (next) step.
100
+ """
101
+ langs = set(stack_languages or [])
102
+ scan_dir = outdir / "scanners"
103
+ scan_dir.mkdir(parents=True, exist_ok=True)
104
+ results = []
105
+ for s in REGISTRY:
106
+ if s.argv is None:
107
+ continue # detect-only for now
108
+ if s.languages and not (set(s.languages) & langs):
109
+ continue
110
+ if not shutil.which(s.binary):
111
+ continue
112
+ out_file = scan_dir / f"{s.key}.json"
113
+ try:
114
+ proc = subprocess.run(s.argv(target, out_file), capture_output=True,
115
+ text=True, timeout=timeout)
116
+ results.append({"key": s.key, "name": s.name, "category": s.category,
117
+ "exit_code": proc.returncode, "output": str(out_file),
118
+ "findings": _count_findings(s.key, out_file)})
119
+ except subprocess.TimeoutExpired:
120
+ results.append({"key": s.key, "name": s.name, "status": "timeout"})
121
+ except Exception as e: # never let one scanner sink the run
122
+ results.append({"key": s.key, "name": s.name, "status": f"error: {e}"})
123
+ return results
124
+
125
+
126
+ def _count_findings(key: str, out_file: Path) -> int:
127
+ """Best-effort finding count from a scanner's native JSON (for the summary)."""
128
+ if not out_file.exists():
129
+ return 0
130
+ try:
131
+ data = json.loads(out_file.read_text())
132
+ except Exception:
133
+ return 0
134
+ if key == "trivy":
135
+ return sum(len(r.get("Vulnerabilities", []) or []) +
136
+ len(r.get("Secrets", []) or []) +
137
+ len(r.get("Misconfigurations", []) or [])
138
+ for r in (data.get("Results") or []))
139
+ if key == "gitleaks":
140
+ return len(data) if isinstance(data, list) else 0
141
+ if key == "semgrep":
142
+ return len(data.get("results", []) or [])
143
+ return 0
144
+
145
+
146
+ # ---- cross-tool normalization + de-duplication -------------------------------------------
147
+ # The thing no OSS orchestrator does: one ranked finding even when two scanners
148
+ # report the same CVE / secret / misconfig. Fingerprints are scheme-shared across
149
+ # tools so e.g. a secret found by both Gitleaks and Trivy collapses to one row.
150
+
151
+ SEV_ORDER = {"CRITICAL": 4, "HIGH": 3, "MEDIUM": 2, "LOW": 1, "INFO": 0, "UNKNOWN": 1}
152
+
153
+
154
+ def _sev(s: str) -> str:
155
+ s = (s or "").upper()
156
+ return s if s in SEV_ORDER else "MEDIUM"
157
+
158
+
159
+ def _norm_trivy(data: dict) -> list:
160
+ out = []
161
+ for res in (data.get("Results") or []):
162
+ tgt = res.get("Target", "")
163
+ for v in (res.get("Vulnerabilities") or []):
164
+ out.append({"tool": "trivy", "category": "sca", "severity": _sev(v.get("Severity")),
165
+ "key": v.get("VulnerabilityID", ""), "file": tgt, "line": 0,
166
+ "title": f"{v.get('PkgName')} {v.get('InstalledVersion')} → {v.get('FixedVersion', '(no fix)')}",
167
+ "fingerprint": f"cve|{v.get('PkgName')}|{v.get('VulnerabilityID')}"})
168
+ for s in (res.get("Secrets") or []):
169
+ out.append({"tool": "trivy", "category": "secret", "severity": _sev(s.get("Severity") or "HIGH"),
170
+ "key": s.get("RuleID", ""), "file": tgt, "line": s.get("StartLine", 0),
171
+ "title": f"secret: {s.get('Title') or s.get('RuleID')}",
172
+ "fingerprint": f"secret|{tgt}|{s.get('RuleID')}"})
173
+ for m in (res.get("Misconfigurations") or []):
174
+ out.append({"tool": "trivy", "category": "iac", "severity": _sev(m.get("Severity")),
175
+ "key": m.get("ID", ""), "file": tgt, "line": 0, "title": (m.get("Title") or "")[:90],
176
+ "fingerprint": f"iac|{tgt}|{m.get('ID')}"})
177
+ return out
178
+
179
+
180
+ def _norm_gitleaks(data) -> list:
181
+ rows = data if isinstance(data, list) else (data.get("findings") or [])
182
+ out = []
183
+ for x in rows:
184
+ f, rule = x.get("File", ""), x.get("RuleID", "")
185
+ out.append({"tool": "gitleaks", "category": "secret", "severity": "HIGH",
186
+ "key": rule, "file": f, "line": x.get("StartLine", 0),
187
+ "title": f"secret: {(x.get('Description') or rule)[:80]}",
188
+ "fingerprint": f"secret|{f}|{rule}"})
189
+ return out
190
+
191
+
192
+ def _norm_semgrep(data: dict) -> list:
193
+ sevmap = {"ERROR": "HIGH", "WARNING": "MEDIUM", "INFO": "INFO"}
194
+ out = []
195
+ for r in (data.get("results") or []):
196
+ rule = (r.get("check_id", "")).split(".")[-1]
197
+ path = r.get("path", "")
198
+ line = (r.get("start") or {}).get("line", 0)
199
+ sev = sevmap.get((r.get("extra") or {}).get("severity", "INFO"), "MEDIUM")
200
+ out.append({"tool": "semgrep", "category": "sast", "severity": sev,
201
+ "key": rule, "file": path, "line": line,
202
+ "title": ((r.get("extra") or {}).get("message") or rule)[:90],
203
+ "fingerprint": f"sast|{path}|{line}|{rule}"})
204
+ return out
205
+
206
+
207
+ _PARSERS = {"trivy": _norm_trivy, "gitleaks": _norm_gitleaks, "semgrep": _norm_semgrep}
208
+
209
+
210
+ def normalize_findings(scan_results: list, outdir: Path) -> dict:
211
+ """Merge every scanner's native JSON into one de-duplicated, severity-ranked
212
+ findings.json. Returns a summary (raw vs deduped, by severity/category)."""
213
+ raw = []
214
+ for r in scan_results:
215
+ out, key = r.get("output"), r.get("key")
216
+ parser = _PARSERS.get(key)
217
+ if not (out and parser and Path(out).exists()):
218
+ continue
219
+ try:
220
+ raw += parser(json.loads(Path(out).read_text() or "{}"))
221
+ except Exception:
222
+ continue
223
+
224
+ by_fp: dict = {}
225
+ for f in raw:
226
+ fp = f["fingerprint"]
227
+ if fp in by_fp:
228
+ if f["tool"] not in by_fp[fp]["tools"]:
229
+ by_fp[fp]["tools"].append(f["tool"])
230
+ if SEV_ORDER[f["severity"]] > SEV_ORDER[by_fp[fp]["severity"]]:
231
+ by_fp[fp]["severity"] = f["severity"]
232
+ else:
233
+ f = dict(f)
234
+ f["tools"] = [f.pop("tool")]
235
+ by_fp[fp] = f
236
+ deduped = sorted(by_fp.values(), key=lambda f: -SEV_ORDER[f["severity"]])
237
+ (outdir / "findings.json").write_text(json.dumps(deduped, indent=2))
238
+
239
+ by_sev, by_cat = {}, {}
240
+ for f in deduped:
241
+ by_sev[f["severity"]] = by_sev.get(f["severity"], 0) + 1
242
+ by_cat[f["category"]] = by_cat.get(f["category"], 0) + 1
243
+ return {"total_raw": len(raw), "total": len(deduped),
244
+ "cross_tool_or_dup_merged": len(raw) - len(deduped),
245
+ "by_severity": by_sev, "by_category": by_cat,
246
+ "top": [{"severity": f["severity"], "category": f["category"], "title": f["title"],
247
+ "file": f["file"], "tools": f["tools"]} for f in deduped[:15]]}
248
+
@@ -0,0 +1,192 @@
1
+ #!/usr/bin/env bash
2
+ #
3
+ # bola-cross-tenant.sh — manual BOLA / cross-tenant probe.
4
+ #
5
+ # ZAP's automated scanner can't tell when Agent A reading Agent B's tenant
6
+ # data is a violation — it just sees "another 200". This script does the
7
+ # two-account probe a pentest team will run on day 1:
8
+ #
9
+ # 1. Mint two agent tokens (Agent A in tenant_A, Agent B in tenant_B).
10
+ # 2. Discover each agent's accessible tenants via /api/auth/me (or your
11
+ # project's equivalent "current user" endpoint).
12
+ # 3. For every tenant-scoped endpoint pattern, try Agent A's token against
13
+ # Agent B's tenantId, and vice versa. Expect 403 or 404 (either prevents
14
+ # the data leak).
15
+ #
16
+ # Usage:
17
+ # 1. In .env, set:
18
+ # ZAP_AGENT_USER / ZAP_AGENT_PASS (Agent A in tenant X)
19
+ # ZAP_AGENT2_USER / ZAP_AGENT2_PASS (Agent B in tenant Y — DIFFERENT tenant)
20
+ # 2. ./bola-cross-tenant.sh
21
+ # 3. Output is one PASS/FAIL line per probe + a summary; nonzero exit on FAIL.
22
+ #
23
+ # Requires: bash, curl, jq, python3.
24
+ set -euo pipefail
25
+ cd "$(dirname "$0")"
26
+
27
+ [[ -f .env ]] || { echo "No .env found in $(pwd)" >&2; exit 1; }
28
+
29
+ # Parse .env literally (handles passwords with shell-special chars)
30
+ read_env() {
31
+ local key="$1"
32
+ python3 -c "
33
+ for l in open('.env'):
34
+ l = l.rstrip('\n')
35
+ if l.startswith('#') or '=' not in l: continue
36
+ k, v = l.split('=', 1)
37
+ if k.strip() == '$key':
38
+ print(v); break
39
+ "
40
+ }
41
+
42
+ TARGET="$(read_env ZAP_TARGET)"
43
+ A_USER="$(read_env ZAP_AGENT_USER)"
44
+ A_PASS="$(read_env ZAP_AGENT_PASS)"
45
+ B_USER="$(read_env ZAP_AGENT2_USER)"
46
+ B_PASS="$(read_env ZAP_AGENT2_PASS)"
47
+
48
+ [[ -n "$TARGET" && -n "$A_USER" && -n "$A_PASS" && -n "$B_USER" && -n "$B_PASS" ]] || {
49
+ cat >&2 <<EOF
50
+ ERROR: missing required .env values. Need:
51
+ ZAP_TARGET, ZAP_AGENT_USER, ZAP_AGENT_PASS,
52
+ ZAP_AGENT2_USER (the second agent in a DIFFERENT tenant), ZAP_AGENT2_PASS
53
+
54
+ The cross-tenant probe is moot if both agents are in the same tenant.
55
+ EOF
56
+ exit 2
57
+ }
58
+
59
+ # TODO: adjust login URL / payload / response shape to match your API.
60
+ login() {
61
+ local user="$1" pass="$2"
62
+ local body
63
+ body=$(jq -nc --arg e "$user" --arg p "$pass" '{email:$e,password:$p}')
64
+ curl -fsS -X POST "$TARGET/api/auth/login" \
65
+ -H 'Content-Type: application/json' \
66
+ -d "$body" \
67
+ | jq -r '.tokens.accessToken'
68
+ }
69
+
70
+ echo "==> minting Agent A token..."
71
+ A_TOKEN="$(login "$A_USER" "$A_PASS")"
72
+ [[ -n "$A_TOKEN" ]] || { echo "Agent A login failed" >&2; exit 3; }
73
+
74
+ echo "==> minting Agent B token..."
75
+ B_TOKEN="$(login "$B_USER" "$B_PASS")"
76
+ [[ -n "$B_TOKEN" ]] || { echo "Agent B login failed" >&2; exit 3; }
77
+
78
+ # TODO: adjust /api/auth/me to your project's "current user" endpoint.
79
+ # We need each agent's tenant-id list. Adjust the jq filter below to your shape.
80
+ fetch_me() {
81
+ local token="$1"
82
+ curl -fsS "$TARGET/api/auth/me" -H "Authorization: Bearer $token"
83
+ }
84
+
85
+ A_ME="$(fetch_me "$A_TOKEN")"
86
+ B_ME="$(fetch_me "$B_TOKEN")"
87
+
88
+ # TODO: this jq expects {user: {groupIds: [...]}} or {groupIds: [...]}.
89
+ # Change groupIds to whatever your tenancy field is (orgIds, workspaceIds, accountIds).
90
+ A_GROUPS=( $(echo "$A_ME" | jq -r '(.user.groupIds // .groupIds // []) | .[]') )
91
+ B_GROUPS=( $(echo "$B_ME" | jq -r '(.user.groupIds // .groupIds // []) | .[]') )
92
+
93
+ [[ ${#A_GROUPS[@]} -gt 0 ]] || { echo "Agent A has no tenant ids" >&2; exit 3; }
94
+ [[ ${#B_GROUPS[@]} -gt 0 ]] || { echo "Agent B has no tenant ids" >&2; exit 3; }
95
+
96
+ # Pick the first tenant each that the OTHER agent does NOT belong to
97
+ A_TARGET_GROUP=""
98
+ for g in "${A_GROUPS[@]}"; do
99
+ if ! printf '%s\n' "${B_GROUPS[@]}" | grep -qx "$g"; then
100
+ A_TARGET_GROUP="$g"; break
101
+ fi
102
+ done
103
+ B_TARGET_GROUP=""
104
+ for g in "${B_GROUPS[@]}"; do
105
+ if ! printf '%s\n' "${A_GROUPS[@]}" | grep -qx "$g"; then
106
+ B_TARGET_GROUP="$g"; break
107
+ fi
108
+ done
109
+
110
+ [[ -n "$A_TARGET_GROUP" && -n "$B_TARGET_GROUP" ]] || {
111
+ echo "ERROR: Agent A and B share all tenants — cannot run a meaningful cross-tenant test." >&2
112
+ echo "Agent A tenants: ${A_GROUPS[*]}" >&2
113
+ echo "Agent B tenants: ${B_GROUPS[*]}" >&2
114
+ echo "Move one agent into a different tenant via the admin UI, then re-run." >&2
115
+ exit 3
116
+ }
117
+
118
+ echo "==> Agent A will try to access B's tenant: $B_TARGET_GROUP"
119
+ echo "==> Agent B will try to access A's tenant: $A_TARGET_GROUP"
120
+ echo
121
+
122
+ # PROJECT-SPECIFIC START
123
+ # Probe matrix: each is a (METHOD, PATH_TEMPLATE, EXPECTED_BLOCKED_CODES) tuple.
124
+ # {group} is substituted with the OTHER agent's tenant id. We accept 403 or 404
125
+ # (either prevents the leak). REPLACE these with your project's tenant-scoped
126
+ # endpoints. Look at backend routes for any path containing /:groupId or /:orgId.
127
+ PROBES=(
128
+ "GET /api/groups/{group}/conversations 403|404"
129
+ "GET /api/groups/{group}/users 403|404"
130
+ "GET /api/groups/{group}/tags 403|404"
131
+ "GET /api/groups/{group}/canned-responses 403|404"
132
+ "POST /api/groups/{group}/tags 403|404"
133
+ "GET /api/groups/{group} 403|404"
134
+ )
135
+ # PROJECT-SPECIFIC END
136
+
137
+ PASS=0
138
+ FAIL=0
139
+ FAIL_LINES=()
140
+
141
+ probe() {
142
+ local label="$1" token="$2" method="$3" url="$4" allowed_codes="$5"
143
+ local code
144
+ if [[ "$method" == "GET" ]]; then
145
+ code=$(curl -s -m 10 -o /dev/null -w '%{http_code}' \
146
+ -H "Authorization: Bearer $token" "$url")
147
+ elif [[ "$method" == "POST" ]]; then
148
+ code=$(curl -s -m 10 -o /dev/null -w '%{http_code}' -X POST \
149
+ -H "Authorization: Bearer $token" \
150
+ -H 'Content-Type: application/json' \
151
+ -d '{}' "$url")
152
+ else
153
+ code=$(curl -s -m 10 -o /dev/null -w '%{http_code}' -X "$method" \
154
+ -H "Authorization: Bearer $token" "$url")
155
+ fi
156
+ if [[ "|$allowed_codes|" == *"|$code|"* ]]; then
157
+ printf ' %-4s %-6s %-7s %s expected:%s actual:%s\n' "PASS" "$label" "$method" "$url" "$allowed_codes" "$code"
158
+ PASS=$((PASS+1))
159
+ else
160
+ printf ' %-4s %-6s %-7s %s expected:%s actual:%s\n' "FAIL" "$label" "$method" "$url" "$allowed_codes" "$code"
161
+ FAIL=$((FAIL+1))
162
+ FAIL_LINES+=("$label $method $url got $code (expected $allowed_codes)")
163
+ fi
164
+ }
165
+
166
+ echo "=== Agent A attacking Agent B's tenant ($B_TARGET_GROUP) ==="
167
+ for p in "${PROBES[@]}"; do
168
+ method=$(echo "$p" | awk '{print $1}')
169
+ path=$(echo "$p" | awk '{print $2}' | sed "s|{group}|$B_TARGET_GROUP|g")
170
+ expected=$(echo "$p" | awk '{print $3}')
171
+ probe "A→B" "$A_TOKEN" "$method" "$TARGET$path" "$expected"
172
+ done
173
+ echo
174
+ echo "=== Agent B attacking Agent A's tenant ($A_TARGET_GROUP) ==="
175
+ for p in "${PROBES[@]}"; do
176
+ method=$(echo "$p" | awk '{print $1}')
177
+ path=$(echo "$p" | awk '{print $2}' | sed "s|{group}|$A_TARGET_GROUP|g")
178
+ expected=$(echo "$p" | awk '{print $3}')
179
+ probe "B→A" "$B_TOKEN" "$method" "$TARGET$path" "$expected"
180
+ done
181
+
182
+ echo
183
+ echo "=== Summary ==="
184
+ echo " PASS: $PASS"
185
+ echo " FAIL: $FAIL"
186
+ if [[ $FAIL -gt 0 ]]; then
187
+ echo
188
+ echo "FAILED PROBES (these are real BOLA findings — investigate immediately):"
189
+ printf ' - %s\n' "${FAIL_LINES[@]}"
190
+ exit 1
191
+ fi
192
+ echo "All probes blocked — cross-tenant access control holds."
@@ -0,0 +1,147 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Extended BOLA probe — covers PATCH, PUT, DELETE, POST verbs across
4
+ tenant-scoped endpoints. The shell `bola-cross-tenant.sh` only tests GET +
5
+ one POST; write verbs miss authz checks more often than GETs.
6
+
7
+ Strategy:
8
+ - As Agent A (Tenant A), attempt every mutating verb against Agent B's
9
+ real resources in Tenant B.
10
+ - Expected: 403 or 404.
11
+ - If 200/204: BOLA — log the finding (no auto-rollback; some mutations
12
+ can't be cleanly reverted from a black-box position).
13
+
14
+ DELETE-against-real-resource is SKIPPED. Instead we test the auth gate by
15
+ sending DELETE to a fabricated UUID — expect 403 BEFORE the 404 lookup.
16
+ """
17
+ import json, subprocess, sys
18
+ from pathlib import Path
19
+
20
+ ROOT = Path(__file__).resolve().parents[2].parent
21
+ fixture = json.loads((ROOT / 'security/pentest-prep/fixtures/test-context.json').read_text())
22
+ TARGET = fixture['target']
23
+
24
+ A = fixture['agent_a']
25
+ B = fixture['agent_b']
26
+
27
+ ENV = {}
28
+ for line in (ROOT / 'security/zap/.env').read_text().splitlines():
29
+ if '=' in line and not line.lstrip().startswith('#'):
30
+ k, v = line.split('=', 1); ENV[k.strip()] = v.strip()
31
+
32
+ # TODO: adjust login URL and response parsing to your API.
33
+ def login(u, p):
34
+ r = subprocess.run(['curl','-fsS','-X','POST',f"{TARGET}/api/auth/login",
35
+ '-H','Content-Type: application/json',
36
+ '-d',json.dumps({'email':u,'password':p})],
37
+ capture_output=True, text=True)
38
+ return json.loads(r.stdout)['tokens']['accessToken']
39
+
40
+ A_TOK = login(ENV['ZAP_AGENT_USER'], ENV['ZAP_AGENT_PASS'])
41
+ B_TOK = login(ENV['ZAP_AGENT2_USER'], ENV['ZAP_AGENT2_PASS'])
42
+
43
+ # Pick a real resource ID from B's tenant as the cross-tenant target (Agent A attacks)
44
+ B_CONV = B['conversation_ids'][0] if B['conversation_ids'] else None
45
+ B_GROUP = B['group_id']
46
+ A_CONV = A['conversation_ids'][0] if A['conversation_ids'] else None
47
+ A_GROUP = A['group_id']
48
+ FABRICATED_CONV = '00000000-0000-0000-0000-000000000000'
49
+
50
+ if not B_CONV:
51
+ print("ERROR: agent_b has no resource ids in the fixture. Aborting.", file=sys.stderr)
52
+ sys.exit(2)
53
+
54
+ findings = []
55
+
56
+ def probe(label, method, path_template, body=None, target_group=B_GROUP, target_conv=B_CONV, tok=A_TOK, fab=False):
57
+ path = path_template.format(g=target_group, id=target_conv if not fab else FABRICATED_CONV)
58
+ cmd = ['curl','-s','-X',method,f"{TARGET}{path}",'-H',f'Authorization: Bearer {tok}',
59
+ '-w','\nHTTP_CODE:%{http_code}']
60
+ if body is not None:
61
+ cmd += ['-H','Content-Type: application/json','-d',json.dumps(body)]
62
+ r = subprocess.run(cmd, capture_output=True, text=True)
63
+ out = r.stdout
64
+ code = int(out.split('HTTP_CODE:')[-1].strip()) if 'HTTP_CODE:' in out else 0
65
+ body_text = out.split('\nHTTP_CODE:')[0]
66
+ if code in (403, 404):
67
+ sev, mark = 'PASS', 'OK'
68
+ elif code in (200, 201, 204):
69
+ sev, mark = 'CRITICAL', '!!'
70
+ else:
71
+ sev, mark = 'INVESTIGATE', '??'
72
+ finding = {'label': label, 'method': method, 'path': path, 'status': code,
73
+ 'severity': sev, 'response_preview': body_text[:150]}
74
+ findings.append(finding)
75
+ print(f" [{mark}] [{sev:11s}] {method:6s} {path} -> {code}")
76
+ return code, body_text
77
+
78
+ print(f"=== Extended BOLA write-verb tests ===")
79
+ print(f" Attacker (A): {A['email']} tenant={A_GROUP}")
80
+ print(f" Target (B): {B['email']} tenant={B_GROUP} resource={B_CONV}")
81
+ print()
82
+
83
+ # PROJECT-SPECIFIC START
84
+ # TODO: replace the path templates with mutating endpoints from your project.
85
+ # Look at your routes for any path with /:groupId or /:tenantId + a mutating verb.
86
+
87
+ # 1. PATCH/PUT primary tenant resource
88
+ print("--- Resource mutation (B's resource, attempted as A) ---")
89
+ probe('patch-conv-as-A', 'PATCH', '/api/groups/{g}/conversations/{id}', {'status': 'CLOSED'})
90
+ probe('put-conv-as-A', 'PUT', '/api/groups/{g}/conversations/{id}', {'status': 'CLOSED'})
91
+
92
+ # 2. Workflow actions (assign, snooze, status flip — whatever your project has)
93
+ print()
94
+ print("--- Workflow actions on B's resource as A ---")
95
+ probe('assign-as-A', 'POST', '/api/groups/{g}/conversations/{id}/assign', {'agentId': A['email']})
96
+ probe('unassign-as-A', 'POST', '/api/groups/{g}/conversations/{id}/unassign', {})
97
+ probe('snooze-as-A', 'POST', '/api/groups/{g}/conversations/{id}/snooze', {'snoozeUntil': '2026-12-31T00:00:00Z'})
98
+ probe('spam-as-A', 'POST', '/api/groups/{g}/conversations/{id}/spam', {})
99
+
100
+ # 3. Sub-resource operations (tags, labels, attachments — adapt to your model)
101
+ print()
102
+ print("--- Sub-resource operations on B's resource ---")
103
+ probe('tag-add-as-A', 'POST', '/api/groups/{g}/conversations/{id}/tags', {'tagId': 'some-tag-id'})
104
+ probe('tag-del-as-A', 'DELETE', '/api/groups/{g}/conversations/{id}/tags/fake-tag-id')
105
+
106
+ # 4. Tenant-level mutations (modify or delete the tenant itself)
107
+ print()
108
+ print("--- Tenant-level mutations (B's tenant as A) ---")
109
+ probe('grp-put-as-A', 'PUT', '/api/admin/groups/{g}', {'name': 'pwn'}, target_conv='')
110
+ probe('grp-del-as-A', 'DELETE', '/api/admin/groups/{g}', target_conv='')
111
+
112
+ # 5. DELETE with fabricated UUID — auth gate only (no real deletion since target doesn't exist)
113
+ print()
114
+ print("--- DELETE auth-gate check (fabricated UUID, no mutation possible) ---")
115
+ probe('delete-conv-fab', 'DELETE', '/api/groups/{g}/conversations/{id}', fab=True)
116
+ # PROJECT-SPECIFIC END
117
+
118
+ # 6. Same probes as B against A (verify symmetry)
119
+ print()
120
+ print(f"=== Reverse direction: B attacks A's tenant ===")
121
+ print(f" Attacker (B): {B['email']}")
122
+ print(f" Target (A): {A['email']} tenant={A_GROUP} resource={A_CONV}")
123
+ print()
124
+
125
+ if A_CONV:
126
+ probe('B->A: patch-conv', 'PATCH', '/api/groups/{g}/conversations/{id}', {'status':'CLOSED'},
127
+ target_group=A_GROUP, target_conv=A_CONV, tok=B_TOK)
128
+ probe('B->A: assign', 'POST', '/api/groups/{g}/conversations/{id}/assign', {'agentId': B['email']},
129
+ target_group=A_GROUP, target_conv=A_CONV, tok=B_TOK)
130
+ probe('B->A: snooze', 'POST', '/api/groups/{g}/conversations/{id}/snooze', {'snoozeUntil':'2026-12-31T00:00:00Z'},
131
+ target_group=A_GROUP, target_conv=A_CONV, tok=B_TOK)
132
+
133
+ # Save
134
+ out = ROOT / 'security/pentest-prep/reports/custom-bola/write-verb-findings.json'
135
+ out.parent.mkdir(parents=True, exist_ok=True)
136
+ out.write_text(json.dumps(findings, indent=2))
137
+
138
+ crit = sum(1 for f in findings if f['severity'] == 'CRITICAL')
139
+ inv = sum(1 for f in findings if f['severity'] == 'INVESTIGATE')
140
+ ok = sum(1 for f in findings if f['severity'] == 'PASS')
141
+ print()
142
+ print("=== Summary ===")
143
+ print(f" CRITICAL (BOLA confirmed): {crit}")
144
+ print(f" INVESTIGATE (odd status): {inv}")
145
+ print(f" PASS (403/404): {ok}")
146
+ print(f" Saved to: {out}")
147
+ sys.exit(1 if crit > 0 else 0)
@@ -0,0 +1,69 @@
1
+ #!/usr/bin/env bash
2
+ # compare-roles.sh — diff two ZAP role-scoped SARIF reports.
3
+ #
4
+ # Usage:
5
+ # ./run.sh agent # produces zap-report-sarif-agent.json
6
+ # ./run.sh admin # produces zap-report-sarif-admin.json
7
+ # ./compare-roles.sh # prints the access-control delta
8
+ #
9
+ # What it shows:
10
+ # - Routes ADMIN can reach that AGENT cannot → expected; verifies authz
11
+ # - Routes AGENT can reach that ADMIN cannot → almost always wrong; investigate
12
+ # - Routes both can reach → no role distinction (may be intentional public)
13
+ #
14
+ # This is the "two-role diff" step. Without it, the active scan only proves what one
15
+ # role can see; the diff is what proves cross-role access control actually works.
16
+ set -euo pipefail
17
+ cd "$(dirname "$0")"
18
+
19
+ AGENT="zap-report-sarif-agent.json"
20
+ ADMIN="zap-report-sarif-admin.json"
21
+
22
+ [[ -f "$AGENT" ]] || { echo "missing $AGENT — run ./run.sh agent first" >&2; exit 1; }
23
+ [[ -f "$ADMIN" ]] || { echo "missing $ADMIN — run ./run.sh admin first" >&2; exit 1; }
24
+
25
+ python3 - <<PY
26
+ import json, re
27
+ from collections import defaultdict
28
+
29
+ def load_urls(path):
30
+ """Return {normalized_path: set(method)} of every URL ZAP touched."""
31
+ with open(path) as f:
32
+ sarif = json.load(f)
33
+ out = defaultdict(set)
34
+ for r in sarif["runs"][0]["results"]:
35
+ for loc in r.get("locations", []):
36
+ uri = loc.get("physicalLocation", {}).get("artifactLocation", {}).get("uri", "")
37
+ if not uri.startswith("http"): continue
38
+ # strip host + querystring, normalize ids/uuids/slugs
39
+ path = re.sub(r"https?://[^/]+", "", uri).split("?")[0]
40
+ path = re.sub(r"/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f-]{20,}", "/{uuid}", path)
41
+ path = re.sub(r"/[0-9a-f]{20,}", "/{id}", path)
42
+ path = re.sub(r"/\d+(?=/|\$)", "/{n}", path)
43
+ out[path].add(r.get("properties", {}).get("method", "?"))
44
+ return out
45
+
46
+ agent = load_urls("$AGENT")
47
+ admin = load_urls("$ADMIN")
48
+
49
+ agent_only = set(agent) - set(admin)
50
+ admin_only = set(admin) - set(agent)
51
+ both = set(agent) & set(admin)
52
+
53
+ print(f"=== AGENT touched {len(agent)} distinct path patterns ===")
54
+ print(f"=== ADMIN touched {len(admin)} distinct path patterns ===")
55
+ print()
56
+ print(f"--- Paths AGENT reached that ADMIN did not ({len(agent_only)}) ---")
57
+ print(" These are SUSPICIOUS — admin should see everything agent sees.")
58
+ for p in sorted(agent_only): print(f" AGENT-ONLY {p}")
59
+ print()
60
+ print(f"--- Paths ADMIN reached that AGENT did not ({len(admin_only)}) ---")
61
+ print(" These should match the access-control matrix (admin-only routes).")
62
+ for p in sorted(admin_only): print(f" ADMIN-ONLY {p}")
63
+ print()
64
+ print(f"--- Paths both reached ({len(both)}) ---")
65
+ print(" These are routes neither blocked at the auth/authz layer for either role.")
66
+ print(" Verify against access-control-matrix.md — anything here that should be")
67
+ print(" admin-only is a real access-control gap.")
68
+ for p in sorted(both): print(f" BOTH {p}")
69
+ PY