websec-validator 0.2.3__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {websec_validator-0.2.3/src/websec_validator.egg-info → websec_validator-0.2.5}/PKG-INFO +1 -1
  2. {websec_validator-0.2.3 → websec_validator-0.2.5}/pyproject.toml +1 -1
  3. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/briefing.py +1 -0
  4. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/cli.py +21 -3
  5. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/dynamic.py +85 -2
  6. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/extractors/__init__.py +2 -2
  7. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/extractors/authz.py +16 -0
  8. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/extractors/base.py +8 -1
  9. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/extractors/routes.py +64 -19
  10. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/findings.py +28 -0
  11. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/probes.py +8 -1
  12. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/recon.py +2 -2
  13. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/report.py +7 -3
  14. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/scanners.py +75 -15
  15. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/templates/probes/bola-cross-tenant.sh +2 -1
  16. websec_validator-0.2.5/src/websec_validator/templates/probes/forged-token.sh +84 -0
  17. websec_validator-0.2.5/src/websec_validator/templates/probes/jwt-attacks.sh +61 -0
  18. websec_validator-0.2.5/src/websec_validator/templates/probes/rate-limit-burst.sh +89 -0
  19. websec_validator-0.2.5/src/websec_validator/templates/probes/ssrf-probes.sh +62 -0
  20. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/templates/probes/unauth-baseline.sh +2 -1
  21. {websec_validator-0.2.3 → websec_validator-0.2.5/src/websec_validator.egg-info}/PKG-INFO +1 -1
  22. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator.egg-info/SOURCES.txt +2 -0
  23. websec_validator-0.2.5/tests/test_hardening.py +137 -0
  24. {websec_validator-0.2.3 → websec_validator-0.2.5}/tests/test_recon.py +37 -1
  25. websec_validator-0.2.3/src/websec_validator/templates/probes/jwt-attacks.sh +0 -161
  26. websec_validator-0.2.3/src/websec_validator/templates/probes/rate-limit-burst.sh +0 -136
  27. websec_validator-0.2.3/src/websec_validator/templates/probes/ssrf-probes.sh +0 -189
  28. {websec_validator-0.2.3 → websec_validator-0.2.5}/LICENSE +0 -0
  29. {websec_validator-0.2.3 → websec_validator-0.2.5}/README.md +0 -0
  30. {websec_validator-0.2.3 → websec_validator-0.2.5}/setup.cfg +0 -0
  31. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/__init__.py +0 -0
  32. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/calibration.json +0 -0
  33. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/calibration.py +0 -0
  34. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/constitution.py +0 -0
  35. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/corpus.json +0 -0
  36. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/extractors/auth.py +0 -0
  37. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/extractors/client_exposure.py +0 -0
  38. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/extractors/graphql.py +0 -0
  39. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/extractors/iac_ci.py +0 -0
  40. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/extractors/integrations.py +0 -0
  41. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/extractors/schemas.py +0 -0
  42. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/extractors/stack.py +0 -0
  43. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/extractors/surface.py +0 -0
  44. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/extractors/tenant.py +0 -0
  45. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/proof.py +0 -0
  46. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/templates/probes/_lib.py +0 -0
  47. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/templates/probes/bola-write-verbs.py +0 -0
  48. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/templates/probes/compare-roles.sh +0 -0
  49. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/templates/probes/dlp-bypass-offline.py +0 -0
  50. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/templates/probes/hs256-brute-force.py +0 -0
  51. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/templates/probes/mass-assignment.py +0 -0
  52. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/templates/probes/race-conditions.py +0 -0
  53. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/templates/probes/s3-assess.sh +0 -0
  54. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/templates/probes/webhook-forgery.py +0 -0
  55. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/templates/reports/FINDINGS-SUMMARY.md.template +0 -0
  56. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/templates/reports/access-control-matrix.md.template +0 -0
  57. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/templates/reports/findings-triage.md.template +0 -0
  58. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/templates/reports/pentest-handover-brief.md.template +0 -0
  59. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator/templates/reports/per-tool-FINDINGS.md.template +0 -0
  60. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator.egg-info/dependency_links.txt +0 -0
  61. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator.egg-info/entry_points.txt +0 -0
  62. {websec_validator-0.2.3 → websec_validator-0.2.5}/src/websec_validator.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: websec-validator
3
- Version: 0.2.3
3
+ Version: 0.2.5
4
4
  Summary: Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app.
5
5
  Author: Ricardo Accioly
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "websec-validator"
7
- version = "0.2.3"
7
+ version = "0.2.5"
8
8
  description = "Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -118,6 +118,7 @@ credentials** — ask the human, never fabricate, never hit production.
118
118
  - **Datastores:** {", ".join(stack.get("datastores", [])) or "?"} · **Monorepo:** {stack.get("monorepo", False)}
119
119
  - **Auth scheme:** `{auth.get("scheme","?")}` (token in {auth.get("token_location","?")}) · guard files: {len(auth.get("guard_files", []))}
120
120
  - **Route engine:** {routes.get("engine","?")} · **{routes.get('count',0)} endpoints** · by method: {routes.get("by_method", {})}
121
+ {("> " + routes["note"]) if routes.get("note") else ""}
121
122
 
122
123
  ## 2. ★ Tenant boundary (confirm first — highest value, easiest to get wrong)
123
124
 
@@ -87,7 +87,7 @@ def cmd_run(args) -> int:
87
87
  print(f"websec-validator v{__version__} · target: {target} · run {ts}\n")
88
88
 
89
89
  # 1. recon
90
- facts = recon.build_facts(target, __version__)
90
+ facts = recon.build_facts(target, __version__, args.exclude)
91
91
  recon.write_facts(facts, out / "FACTS.json")
92
92
  langs = facts["stack"]["languages"]
93
93
  _print_facts_summary(facts)
@@ -98,13 +98,21 @@ def cmd_run(args) -> int:
98
98
  unified = None
99
99
  if args.scan:
100
100
  print("\n running available static scanners (read-only)…")
101
- scan_results = scanners.run_available(target, out, langs)
101
+ only = args.scanners.split(",") if args.scanners else None
102
+ scan_results = scanners.run_available(target, out, langs, excludes=args.exclude, only=only)
102
103
  for r in scan_results:
103
104
  tag = r.get("findings", r.get("status", "?"))
104
105
  print(f" {r['name']}: {tag}")
105
- unified = scanners.normalize_findings(scan_results, out)
106
+ unified = scanners.normalize_findings(scan_results, out, target=target)
106
107
  print(f" → {unified['total']} de-duplicated findings "
107
108
  f"({unified['cross_tool_or_dup_merged']} merged) · {unified['by_severity']}")
109
+ _hyg = []
110
+ if unified.get('contamination_dropped'):
111
+ _hyg.append(f"{unified['contamination_dropped']} dropped (skip-dir contamination)")
112
+ if unified.get('local_only_downgraded'):
113
+ _hyg.append(f"{unified['local_only_downgraded']} downgraded (gitignored/local-only secret)")
114
+ if _hyg:
115
+ print(f" hygiene: {' · '.join(_hyg)}")
108
116
  else:
109
117
  print(f"\n scanners available: {', '.join(s['name'] for s in det['available']) or 'none'}"
110
118
  " (add --scan to execute them)")
@@ -161,6 +169,12 @@ def cmd_dynamic(args) -> int:
161
169
  for r in u["results"]:
162
170
  mark = "🔓" if r["verdict"] == "OPEN-no-auth" else (" ·" if r["verdict"] == "protected" else " ")
163
171
  print(f" {mark} {str(r['status']):>4} {r['verdict']:26} {r['path']}")
172
+ ftb = dyn.get("forged_token_bypass", {})
173
+ if ftb:
174
+ print(f"\n forged-token (unverified-signature) → {ftb['summary']}")
175
+ for r in ftb.get("results", []):
176
+ if r["verdict"] == "BYPASS":
177
+ print(f" 🚨 BYPASS {r['baseline']}→{r['forged']} {r['method']} {r['path']} (via {r['via']})")
164
178
  if args.probe_writes:
165
179
  w = dyn["write_auth_enforcement"]
166
180
  print(f"\n write-verb auth enforcement → {w['summary']}")
@@ -342,6 +356,10 @@ def build_parser() -> argparse.ArgumentParser:
342
356
  r.add_argument("target")
343
357
  r.add_argument("--scan", action="store_true", help="also execute available static scanners")
344
358
  r.add_argument("--out", help="output dir (default: ./websec-out)")
359
+ r.add_argument("--exclude", action="append", metavar="PATH",
360
+ help="exclude a path/glob from recon + scanners (repeatable; e.g. --exclude 'docs/**')")
361
+ r.add_argument("--scanners", metavar="A,B",
362
+ help="comma-separated subset of scanners to run with --scan (e.g. gitleaks,semgrep)")
345
363
  r.set_defaults(func=cmd_run)
346
364
 
347
365
  # recon/proof/calibrate are hidden from the main --help (argparse.SUPPRESS): recon is a
@@ -37,10 +37,13 @@ def _dig(d: dict, dotted: str):
37
37
  return cur
38
38
 
39
39
 
40
- def _request(method: str, url: str, token: str | None, timeout: int = 20, data: bytes | None = None):
40
+ def _request(method: str, url: str, token: str | None, timeout: int = 20,
41
+ data: bytes | None = None, cookie: str | None = None):
41
42
  headers = {"Accept": "application/json"}
42
43
  if token:
43
44
  headers["Authorization"] = f"Bearer {token}"
45
+ if cookie:
46
+ headers["Cookie"] = cookie
44
47
  if data is not None:
45
48
  headers["Content-Type"] = "application/json"
46
49
  req = urllib.request.Request(url, method=method, headers=headers, data=data)
@@ -254,9 +257,89 @@ def write_auth_enforcement(target: str, facts: dict, max_endpoints: int = 80) ->
254
257
  }
255
258
 
256
259
 
260
+ # Codes that mean "the request reached the handler/validation" — i.e. auth PASSED. Used to
261
+ # judge a forged-token attempt. Deliberately EXCLUDES 401/403 (blocked), 429 (rate-limited —
262
+ # would otherwise be a false bypass), 5xx and 000/None (ambiguous/transport). A gated route
263
+ # (401/403 with no token) that returns one of these WITH a forged token = signature not verified.
264
+ _REACHED_HANDLER = {200, 201, 202, 203, 204, 206, 400, 404, 405, 409, 413, 415, 422}
265
+
266
+
267
+ def _forge_jwt(payload: dict, alg: str = "RS256") -> str:
268
+ """A structurally-valid JWT with a DELIBERATELY INVALID signature (no real key). The whole
269
+ point is to see whether the target verifies the signature at all — a correct verifier
270
+ rejects this outright; a decode-only auth path (the decodeJwtPayloadUnsafe class) trusts it."""
271
+ import base64
272
+
273
+ def b(o):
274
+ return base64.urlsafe_b64encode(json.dumps(o).encode()).rstrip(b"=").decode()
275
+ sig = "" if alg == "none" else "d2Vic2VjLWZvcmdlZC1zaWc" # 'websec-forged-sig' — not a real signature
276
+ return ".".join([b({"alg": alg, "typ": "JWT", "kid": "forged"}), b(payload), sig])
277
+
278
+
279
+ def forged_token_bypass(target: str, facts: dict, cookie_names=None,
280
+ probe_writes: bool = False, max_endpoints: int = 60) -> dict:
281
+ """Does the app actually VERIFY JWT signatures? Forge a token with a far-future `exp` and a
282
+ BOGUS signature, present it to each route that is GATED without auth, and compare. A route
283
+ that answers 401/403 with NO token but REACHES THE HANDLER with the forged token is trusting
284
+ an unverified token = authentication bypass (CWE-347 / OWASP API2:2023) — the dynamic verdict
285
+ on the `decodeJwtPayloadUnsafe`/`jwt.decode(verify=False)` hypothesis.
286
+
287
+ GET reads by default (read-safe); write verbs (empty body, dummy ids — non-destructive) only
288
+ when `probe_writes`. Tries `Authorization: Bearer` (universal) plus any `cookie_names` given,
289
+ since apps read tokens from different locations. 429/5xx are treated as inconclusive, never
290
+ a bypass, so an aggressive rate limiter can't manufacture a false positive."""
291
+ forged = _forge_jwt({"sub": "websec-forged", "email": "websec-forged@example.com",
292
+ "role": "admin", "roles": ["admin"], "exp": 9999999999})
293
+ cookie_names = list(cookie_names or [])
294
+
295
+ targets = [("GET", e.get("path", "")) for e in (facts.get("routes") or {}).get("endpoints", [])
296
+ if e.get("method") == "GET" and "{" not in e.get("path", "")
297
+ and not SIDE_EFFECTING.search(e.get("path", ""))]
298
+ if probe_writes:
299
+ targets += [(e.get("method"), e.get("path", "")) for e in (facts.get("routes") or {}).get("endpoints", [])
300
+ if e.get("method") in WRITE_VERBS and "{" not in e.get("path", "")
301
+ and not SIDE_EFFECTING.search(e.get("path", ""))]
302
+ targets = sorted(set(targets))[:max_endpoints]
303
+
304
+ results, bypassed = [], []
305
+ for method, path in targets:
306
+ url = target + path
307
+ body = b"{}" if method in WRITE_VERBS else None
308
+ base_code, _ = _request(method, url, token=None, data=body)
309
+ if base_code not in (401, 403):
310
+ continue # only routes that are gated WITHOUT auth tell us anything about forgery
311
+ attempts = [("Authorization: Bearer", _request(method, url, token=forged, data=body)[0])]
312
+ for cn in cookie_names:
313
+ attempts.append((f"cookie:{cn}", _request(method, url, token=None, data=body, cookie=f"{cn}={forged}")[0]))
314
+ hit = next(((via, code) for via, code in attempts if code in _REACHED_HANDLER), None)
315
+ if hit:
316
+ via, fcode = hit
317
+ row = {"method": method, "path": path, "baseline": base_code, "forged": fcode,
318
+ "via": via, "verdict": "BYPASS"}
319
+ bypassed.append(row)
320
+ else:
321
+ row = {"method": method, "path": path, "baseline": base_code,
322
+ "forged": attempts[0][1], "via": "Authorization: Bearer", "verdict": "rejected"}
323
+ results.append(row)
324
+
325
+ return {
326
+ "target": target,
327
+ "mode": "present an UNSIGNED/bogus-sig JWT (far-future exp) to each gated route; "
328
+ "reached-handler = signature not verified",
329
+ "token_locations": ["Authorization: Bearer"] + [f"cookie:{c}" for c in cookie_names],
330
+ "tested": len(results),
331
+ "bypassed": bypassed,
332
+ "results": results,
333
+ "summary": f"{len(bypassed)}/{len(results)} gated route(s) accepted a forged unsigned token"
334
+ + (" — ⚠ SIGNATURE NOT VERIFIED (CWE-347 auth bypass)" if bypassed
335
+ else " — all rejected the forged token"),
336
+ }
337
+
338
+
257
339
  def run_unauth(target: str, facts_path: Path, outdir: Path, probe_writes: bool = False) -> dict:
258
340
  facts = json.loads(Path(facts_path).read_text())
259
- res = {"unauth_reachability": unauth_reachability(target, facts)}
341
+ res = {"unauth_reachability": unauth_reachability(target, facts),
342
+ "forged_token_bypass": forged_token_bypass(target, facts, probe_writes=probe_writes)}
260
343
  if probe_writes:
261
344
  res["write_auth_enforcement"] = write_auth_enforcement(target, facts)
262
345
  outdir.mkdir(parents=True, exist_ok=True)
@@ -39,9 +39,9 @@ REGISTRY: list[Extractor] = [
39
39
  ]
40
40
 
41
41
 
42
- def run_all(root: Path, version: str) -> dict:
42
+ def run_all(root: Path, version: str, excludes: list | None = None) -> dict:
43
43
  """Walk the repo once, run every extractor, return the merged FACTS dict."""
44
- ctx = RepoContext(root)
44
+ ctx = RepoContext(root, excludes)
45
45
  facts: dict = {
46
46
  "tool": "websec-validator",
47
47
  "version": version,
@@ -52,6 +52,14 @@ ROLE = re.compile(
52
52
  r"has_?[Rr]ole\s*\(\s*['\"]([\w:.-]+)['\"]|"
53
53
  r"authorizeRoles\s*\(([^)]*)\)|permission_required\s*\(\s*['\"]([\w:.-]+)['\"]")
54
54
 
55
+ # F5: a call to a decoder/parser named "unsafe"/"unverified"/"noVerify"/"skipVerify"
56
+ # (e.g. decodeJwtPayloadUnsafe) — dangerous when its result feeds an auth decision.
57
+ UNSAFE_DECODER = re.compile(r"\b([A-Za-z_]\w*(?:[Uu]nsafe|[Uu]nverified|[Nn]o[Vv]erif\w*|[Ss]kip[Vv]erif\w*)\w*)\s*\(")
58
+ # does this file actually make an auth/identity decision? (so the unsafe decode matters)
59
+ AUTH_CONTEXT = re.compile(
60
+ r"require(?:Auth|Admin|Role|Permission)|isAdmin|authoriz|getToken\s*\(|getServerSession|"
61
+ r"req\.auth\b|currentUser|jwt\.(?:decode|verify)|decodeJwt", re.I)
62
+
55
63
 
56
64
  def _parse_next_middleware(ctx: RepoContext) -> dict:
57
65
  # Next 15.5+/16 renamed `middleware.ts` → `proxy.ts` (both filenames are valid; the
@@ -127,6 +135,13 @@ class AuthzExtractor(Extractor):
127
135
  if e.get("method") in WRITE_VERBS and not PUBLIC_HINT.search(e.get("path", "")):
128
136
  no_guard_writes.append(f"{e['method']} {e['path']} ({relcp or '?'})")
129
137
 
138
+ # F5: files that make an auth decision AND call an unsafe/unverified decoder
139
+ unsafe_decoders = []
140
+ for _p, rel, text in ctx.iter_code():
141
+ if AUTH_CONTEXT.search(text):
142
+ for dec in sorted(set(UNSAFE_DECODER.findall(text))):
143
+ unsafe_decoders.append({"file": rel, "decoder": dec})
144
+
130
145
  if global_auth:
131
146
  where = f"`{mw['file']}` (matcher {mw.get('matchers') or '—'})" if mw_auth else "`app.use(<auth>)`"
132
147
  note = (f"A GLOBAL auth middleware ({where}) was detected — most routes are protected by default. "
@@ -146,5 +161,6 @@ class AuthzExtractor(Extractor):
146
161
  "no_visible_guard": no_guard, "unknown": unknown},
147
162
  "endpoint_guards": egs[:400],
148
163
  "write_endpoints_without_visible_guard": sorted(set(no_guard_writes))[:60],
164
+ "unsafe_auth_decoders": unsafe_decoders[:30],
149
165
  "note": note,
150
166
  }
@@ -9,6 +9,7 @@ still say something useful.
9
9
 
10
10
  from __future__ import annotations
11
11
 
12
+ import fnmatch
12
13
  from pathlib import Path
13
14
 
14
15
  SKIP_DIRS = {".git", "node_modules", "dist", "build", ".next", ".nuxt", "venv",
@@ -27,13 +28,17 @@ MAX_BYTES = 2_000_000
27
28
  class RepoContext:
28
29
  """Walk the tree once; cache file text; serve cheap queries to every extractor."""
29
30
 
30
- def __init__(self, root: Path):
31
+ def __init__(self, root: Path, excludes: list | None = None):
31
32
  self.root = root
33
+ self.excludes = [e for e in (excludes or []) if e] # user --exclude paths/globs
32
34
  self._text: dict[Path, str] = {}
33
35
  self.code_files: list[Path] = []
34
36
  self.stack: dict = {} # filled by StackExtractor, read by the rest
35
37
  self._walk()
36
38
 
39
+ def _excluded(self, rel: str) -> bool:
40
+ return any(ex in rel or fnmatch.fnmatch(rel, ex) for ex in self.excludes)
41
+
37
42
  def _walk(self) -> None:
38
43
  n = 0
39
44
  for p in self.root.rglob("*"):
@@ -44,6 +49,8 @@ class RepoContext:
44
49
  # have its whole tree skipped.
45
50
  if p.is_dir() or any(part in SKIP_DIRS for part in p.relative_to(self.root).parts):
46
51
  continue
52
+ if self.excludes and self._excluded(str(p.relative_to(self.root))):
53
+ continue
47
54
  if p.suffix.lower() in CODE_EXT:
48
55
  self.code_files.append(p)
49
56
  n += 1
@@ -25,7 +25,18 @@ import subprocess
25
25
  import tempfile
26
26
  from pathlib import Path
27
27
 
28
- from .base import Extractor, RepoContext
28
+ from .base import SKIP_DIRS, Extractor, RepoContext
29
+
30
+ # Noir is a subprocess that scans the raw tree — it does NOT know the walker's SKIP_DIRS,
31
+ # so without this it grinds through (and emits routes from) build output (.next, cdk.out,
32
+ # dist), dependencies (node_modules, vendor), and NESTED WORKTREES (.claude/worktrees — a
33
+ # full copy of the repo → doubled routes). Pass the skip dirs as exclude globs (perf) AND
34
+ # post-filter Noir's output by code_path (the correctness guarantee).
35
+ _NOIR_SKIP_GLOBS = ",".join(f"**/{d}/**" for d in sorted(SKIP_DIRS))
36
+
37
+
38
+ def _in_skip_dir(code_path: str) -> bool:
39
+ return any(part in SKIP_DIRS for part in (code_path or "").replace("\\", "/").split("/"))
29
40
 
30
41
  WRITE_VERBS = {"POST", "PUT", "PATCH", "DELETE"}
31
42
  EXCLUDE_GLOBS = "*.test.ts,*.test.tsx,*.spec.ts,*.test.js,*.spec.js,*_test.go,*_test.py,test_*.py,*.stories.tsx"
@@ -38,6 +49,20 @@ TRAVERSAL_NAMES = re.compile(r"^(file|filename|filepath|path|dir|folder|template
38
49
  TEMPLATED = ("BASE_URL", "localhost", "127.0.0.1", "${", "{{")
39
50
  ASSET_GLOB = re.compile(r"\*\.\w+")
40
51
 
52
+ # A route whose source file is a vendored/third-party API SPEC (OpenAPI/Swagger/GraphQL
53
+ # schema), not an app handler. Noir parses these and emits their paths as if the app
54
+ # served them — which on a repo that vendors e.g. a 16k-line swagger turns ~15 real
55
+ # findings into hundreds of phantom ones. We split these out as informational.
56
+ SPEC_PATH = re.compile(
57
+ r"\.(?:ya?ml|graphql|gql|raml)$" # spec file formats
58
+ r"|(?:^|/)(?:node_modules|vendor|vendored|third[_-]?party|examples?|schemas?"
59
+ r"|(?:docs?|documentation)[\w-]*)/" # vendor/docs/schema dirs
60
+ r"|swagger|openapi", re.I)
61
+
62
+
63
+ def _is_spec_derived(code_path: str) -> bool:
64
+ return bool(code_path) and bool(SPEC_PATH.search(code_path))
65
+
41
66
 
42
67
  def _clean_path(p: str) -> str:
43
68
  p = re.sub(r":(\w+)", r"{\1}", p) # Express :id -> {id}
@@ -53,16 +78,17 @@ def _is_noise(path: str) -> bool:
53
78
  return bool(ASSET_GLOB.search(path)) # static-asset glob route (/*.png)
54
79
 
55
80
 
56
- def _noir_scan(root: Path) -> list | None:
81
+ def _noir_scan(root: Path, extra_excludes: list | None = None) -> list | None:
57
82
  """Run Noir → list of endpoint dicts, or None if Noir unavailable/failed."""
58
83
  if not shutil.which("noir"):
59
84
  return None
85
+ excl = ",".join([EXCLUDE_GLOBS, _NOIR_SKIP_GLOBS] + (list(extra_excludes) if extra_excludes else []))
60
86
  with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tf:
61
87
  out = Path(tf.name)
62
88
  try:
63
89
  proc = subprocess.run(
64
90
  ["noir", "scan", str(root), "-f", "json", "-o", str(out),
65
- "--exclude-path", EXCLUDE_GLOBS, "--no-log", "--no-color"],
91
+ "--exclude-path", excl, "--no-log", "--no-color"],
66
92
  capture_output=True, text=True, timeout=300)
67
93
  if not out.exists():
68
94
  return None
@@ -77,8 +103,10 @@ def _noir_scan(root: Path) -> list | None:
77
103
  pass
78
104
 
79
105
 
80
- def _normalize_noir(eps: list) -> list:
81
- rows, seen = [], set()
106
+ def _normalize_noir(eps: list) -> tuple:
107
+ """→ (app_routes, spec_derived_routes). Routes whose source file is a vendored API
108
+ spec are split out so they don't generate phantom findings (B1)."""
109
+ rows, spec, seen = [], [], set()
82
110
  for e in eps:
83
111
  if e.get("internal"):
84
112
  continue
@@ -89,21 +117,22 @@ def _normalize_noir(eps: list) -> list:
89
117
  if _is_noise(path):
90
118
  continue
91
119
  method = (e.get("method") or "GET").upper()
92
- if (method, path) in seen:
93
- continue
94
- seen.add((method, path))
95
- params = [{"name": p.get("name", ""), "where": p.get("param_type", "")}
96
- for p in (e.get("params") or [])]
97
120
  cp = (e.get("details", {}) or {}).get("code_paths") or [{}]
98
- rows.append({
121
+ code_path = cp[0].get("path", "")
122
+ if (method, path, code_path) in seen:
123
+ continue
124
+ seen.add((method, path, code_path))
125
+ row = {
99
126
  "method": method,
100
127
  "path": path,
101
- "params": params,
128
+ "params": [{"name": p.get("name", ""), "where": p.get("param_type", "")}
129
+ for p in (e.get("params") or [])],
102
130
  "technology": (e.get("details", {}) or {}).get("technology", ""),
103
- "code_path": cp[0].get("path", ""),
131
+ "code_path": code_path,
104
132
  "source": "noir",
105
- })
106
- return rows
133
+ }
134
+ (spec if _is_spec_derived(code_path) else rows).append(row)
135
+ return rows, spec
107
136
 
108
137
 
109
138
  # ---- regex fallback (only when Noir is absent) ---------------------------------------------
@@ -193,19 +222,26 @@ class RoutesExtractor(Extractor):
193
222
  category = "surface"
194
223
 
195
224
  def extract(self, ctx: RepoContext, facts: dict) -> dict:
196
- eps = _noir_scan(ctx.root)
225
+ eps = _noir_scan(ctx.root, getattr(ctx, "excludes", None))
197
226
  if eps is not None:
198
- routes = _normalize_noir(eps)
227
+ routes, spec_derived = _normalize_noir(eps)
199
228
  engine = "noir"
200
229
  else:
201
- routes = _fallback(ctx)
230
+ routes, spec_derived = _fallback(ctx), []
202
231
  engine = "regex-fallback (install OWASP Noir for full coverage: brew install noir)"
232
+ # honor user --exclude against route code_paths too (Noir's own --exclude-path glob is
233
+ # unreliable for bare dir names; this guarantees `--exclude <path>` drops those routes).
234
+ if getattr(ctx, "excludes", None):
235
+ routes = [r for r in routes if not ctx._excluded(r.get("code_path", ""))]
236
+ # Noir doesn't honor SKIP_DIRS — drop any route it found under build output / deps /
237
+ # nested worktrees (e.g. .claude/worktrees/* doubling the whole app).
238
+ routes = [r for r in routes if not _in_skip_dir(r.get("code_path", ""))]
203
239
  by_method: dict = {}
204
240
  by_tech: dict = {}
205
241
  for r in routes:
206
242
  by_method[r["method"]] = by_method.get(r["method"], 0) + 1
207
243
  by_tech[r["technology"]] = by_tech.get(r["technology"], 0) + 1
208
- return {
244
+ out = {
209
245
  "engine": engine,
210
246
  "count": len(routes),
211
247
  "by_method": by_method,
@@ -213,3 +249,12 @@ class RoutesExtractor(Extractor):
213
249
  "endpoints": routes,
214
250
  "targeting": _derive(routes),
215
251
  }
252
+ if spec_derived:
253
+ from collections import Counter
254
+ srcs = Counter(r["code_path"] for r in spec_derived)
255
+ out["spec_derived_excluded"] = len(spec_derived)
256
+ out["spec_derived_sources"] = [f"{n}× {f}" for f, n in srcs.most_common(8)]
257
+ out["note"] = (f"⚠ {len(spec_derived)} routes came from vendored API SPEC files "
258
+ f"(OpenAPI/Swagger/GraphQL), not app handlers — EXCLUDED from the {len(routes)} "
259
+ f"app routes + all findings. Sources: {', '.join(f for f, _ in srcs.most_common(5))}.")
260
+ return out
@@ -26,6 +26,9 @@ from . import calibration
26
26
  STANDARDS = {
27
27
  "missing-auth": (["CWE-862 Missing Authorization", "CWE-306 Missing Authentication"],
28
28
  "ASVS V4.1.1", ["API1:2023 BOLA", "API5:2023 BFLA"]),
29
+ "unsafe-auth-decoder": (["CWE-347 Improper Verification of Cryptographic Signature",
30
+ "CWE-345 Insufficient Verification of Data Authenticity"],
31
+ "ASVS V3.5.2", ["API2:2023 Broken Authentication"]),
29
32
  "bola": (["CWE-639 Authorization Bypass (IDOR)"], "ASVS V4.2.1", ["API1:2023 BOLA"]),
30
33
  "ssrf": (["CWE-918 SSRF"], "ASVS V12.6", ["API7:2023 SSRF"]),
31
34
  "secret": (["CWE-798 Hard-coded Credentials"], "ASVS V2.10", ["API8:2023 Misconfiguration"]),
@@ -48,6 +51,9 @@ REMEDIATION = {
48
51
  "missing-auth": "Add an auth guard to the handler (e.g. requireAuth()/getServerSession()), or a "
49
52
  "middleware matcher over /api/(.*) with an explicit public allowlist so it can't be forgotten.",
50
53
  "bola": "Enforce object ownership: verify the authenticated principal owns/can access the resource id (tenant scope).",
54
+ "unsafe-auth-decoder": "Verify the token/signature before trusting it for an auth/identity decision — use a "
55
+ "verifying decode (e.g. jwt.verify with the key / a checked session), never an *Unsafe* "
56
+ "or decode-only path whose output then feeds requireAuth/requireAdmin.",
51
57
  "ssrf": "Validate + allowlist outbound URLs; block RFC1918/IMDS/file://; never fetch a raw user-supplied URL.",
52
58
  "secret": "Rotate the credential, remove from code/history, load from a secrets manager.",
53
59
  "cve": "Upgrade the dependency to the fixed version.",
@@ -150,6 +156,28 @@ def build_ledger(facts: dict, unified: dict | None, dynamic: dict | None = None,
150
156
  [{"layer": "dynamic", "detail": f"cross-tenant GET returned another tenant's data "
151
157
  f"(HTTP {lk.get('status')}, {lk.get('direction')})"}]))
152
158
 
159
+ # ---- 1c. Unsafe/unverified decoder feeding an auth decision (F5) ----
160
+ for ud in ((facts.get("authz", {}) or {}).get("unsafe_auth_decoders", []) or []):
161
+ out.append(_f(f"Auth decision uses an unverified decoder: {ud.get('decoder')}", "access-control",
162
+ "unsafe-auth-decoder", "HIGH", "MEDIUM", ud.get("file", ""),
163
+ [{"layer": "recon", "detail": f"{ud.get('file')} makes an auth/identity decision AND calls "
164
+ f"{ud.get('decoder')}() — if that decodes a token/signature WITHOUT verifying it, a forged "
165
+ "value is trusted (the decodeJwtPayloadUnsafe → requireAdmin class of bug). Trace the call path."}]))
166
+
167
+ # ---- 1d. Forged-token acceptance — unverified signature, DYNAMICALLY CONFIRMED ----
168
+ # The verdict for 1c: we presented an UNSIGNED/bogus-sig token and the route reached its
169
+ # handler anyway (no-auth 401/403 → reached-handler with the forged token). That is the
170
+ # decodeJwtPayloadUnsafe/jwt.decode(verify=False) hypothesis proven — CWE-347 broken auth.
171
+ for b in ((dynamic or {}).get("forged_token_bypass", {}) or {}).get("bypassed", []):
172
+ out.append(_f(
173
+ f"Auth bypass: forged unsigned token accepted — {b.get('method')} {b.get('path')}",
174
+ "access-control", "unsafe-auth-decoder", "CRITICAL", "HIGH",
175
+ f"{b.get('method')} {b.get('path')}",
176
+ [{"layer": "dynamic", "detail": f"no auth → HTTP {b.get('baseline')}; a token with NO valid "
177
+ f"signature (via {b.get('via')}, far-future exp) → HTTP {b.get('forged')} — the auth gate "
178
+ "accepted it, so the signature is NOT verified. Reachable by anyone who can craft a token "
179
+ "string; route the guard through a verifying decode (jwt.verify w/ the key / a checked session)."}]))
180
+
153
181
  # ---- 2. Static scanner findings (de-duplicated `unified`) ----
154
182
  cat_to_class = {"sca": "cve", "secret": "secret", "iac": "iac", "sast": "sast"}
155
183
  for t in (unified or {}).get("top", []):
@@ -28,6 +28,8 @@ PROBES = {
28
28
  "a low-priv token + a write endpoint that updates a record"),
29
29
  "jwt-attacks": ("jwt-attacks.sh", "JWT: alg:none, tamper, expiry, replay",
30
30
  "a valid token + the login + a protected endpoint"),
31
+ "forged-token": ("forged-token.sh", "Forged/unsigned-JWT acceptance (CWE-347 broken auth)",
32
+ "just the target base URL — it forges its own token + reads routes from probe-context.json"),
31
33
  "hs256-brute-force": ("hs256-brute-force.py", "Offline HS256 weak-secret brute",
32
34
  "one HS256 JWT (offline — no live app needed)"),
33
35
  "ssrf-probes": ("ssrf-probes.sh", "SSRF: IMDS / RFC1918 / file://",
@@ -47,7 +49,7 @@ PROBES = {
47
49
 
48
50
  # unauth-baseline is ALWAYS staged: it's the cheapest probe and directly exercises the
49
51
  # #1 lead class (missing authentication) — the one a no-creds run can confirm immediately.
50
- ALWAYS = ["unauth-baseline", "jwt-attacks", "hs256-brute-force", "rate-limit-burst"]
52
+ ALWAYS = ["unauth-baseline", "forged-token", "jwt-attacks", "hs256-brute-force", "rate-limit-burst"]
51
53
 
52
54
  # which targeting bucket each probe should be pointed at (for the manifest's real targets)
53
55
  _TARGET_KEYS = {
@@ -100,6 +102,10 @@ def build_context(facts: dict) -> dict:
100
102
  auth = facts.get("auth") or {}
101
103
  writes = [f"{e.get('method')} {e.get('path')}" for e in routes.get("endpoints", [])
102
104
  if e.get("method") in WRITE_VERBS][:80]
105
+ # GET/HEAD data-read routes — the read half of the protected surface (the forged-token probe
106
+ # needs these; the bypass class hits reads like /api/wallets/lookup that are in no other bucket).
107
+ reads = [f"{e.get('method')} {e.get('path')}" for e in routes.get("endpoints", [])
108
+ if e.get("method") in ("GET", "HEAD")][:80]
103
109
  return {
104
110
  "target_base_url": "FILL_ME (e.g. http://localhost:3000)",
105
111
  "auth": {
@@ -111,6 +117,7 @@ def build_context(facts: dict) -> dict:
111
117
  },
112
118
  "endpoints": {
113
119
  "writes": writes,
120
+ "reads": reads,
114
121
  "idor_candidates": tgt.get("idor_candidates", [])[:60],
115
122
  "ssrf_candidates": tgt.get("ssrf_candidates", [])[:40],
116
123
  "upload_candidates": tgt.get("upload_candidates", [])[:40],
@@ -14,8 +14,8 @@ from .extractors.base import RepoContext
14
14
  from .extractors.stack import StackExtractor
15
15
 
16
16
 
17
- def build_facts(root: Path, version: str) -> dict:
18
- return extractors.run_all(root, version)
17
+ def build_facts(root: Path, version: str, excludes: list | None = None) -> dict:
18
+ return extractors.run_all(root, version, excludes)
19
19
 
20
20
 
21
21
  def write_facts(facts: dict, out: Path) -> Path:
@@ -43,8 +43,12 @@ def render(facts: dict, scanners: dict, scan_results: list, unified: dict | None
43
43
  chain = " → ".join(e["layer"] for e in f["evidence"])
44
44
  api = (" · " + ", ".join(f["standards"]["owasp_api"])) if f["standards"]["owasp_api"] else ""
45
45
  cal = f.get("calibrated") or {}
46
- calstr = (f" · P(real)≈**{cal.get('p')}** CI {cal.get('ci')} (n={cal.get('n')}, {cal.get('basis')})"
47
- if cal else "")
46
+ if not cal:
47
+ calstr = ""
48
+ elif cal.get("n", 0) == 0 or str(cal.get("basis", "")).startswith("prior"):
49
+ calstr = " · P(real): _uncalibrated — verify manually_" # don't dress n=0 as a measurement (B4)
50
+ else:
51
+ calstr = f" · P(real)≈**{cal.get('p')}** CI {cal.get('ci')} (n={cal.get('n')}, {cal.get('basis')})"
48
52
  _ll.append(f"- **[{f['severity']}/{f['confidence']}]** {f['title']} \n"
49
53
  f" `{f['location']}` · evidence: {chain} · {cwe}{api}{calstr} \n"
50
54
  f" _fix:_ {f['remediation']}")
@@ -68,7 +72,7 @@ def render(facts: dict, scanners: dict, scan_results: list, unified: dict | None
68
72
  | | |
69
73
  |---|---|
70
74
  | Stack | {", ".join(stack.get("languages", [])) or "?"} · {", ".join(stack.get("frameworks", [])) or "?"} · {", ".join(stack.get("datastores", [])) or "?"} |
71
- | Endpoints | **{routes.get('count', 0)}** (via {routes.get('engine','?').split(' ')[0]}) |
75
+ | Endpoints | **{routes.get('count', 0)}** app routes (via {routes.get('engine','?').split(' ')[0]}){(" · " + str(routes.get('spec_derived_excluded')) + " spec-derived excluded") if routes.get('spec_derived_excluded') else ""} |
72
76
  | Auth | {facts.get('auth', {}).get('scheme','?')} · roles: {', '.join(authz.get('roles_detected', [])) or 'none'} |
73
77
  | Access control | {gs.get('with_visible_guard', 0)} guarded · **{gs.get('no_visible_guard', 0)} no visible guard** · global-middleware: {authz.get('global_auth_middleware', False)} |
74
78
  | Static scanner (raw, pre-triage) | {sev_line} |