websec-validator 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {websec_validator-0.2.2/src/websec_validator.egg-info → websec_validator-0.2.4}/PKG-INFO +4 -4
  2. {websec_validator-0.2.2 → websec_validator-0.2.4}/README.md +3 -3
  3. {websec_validator-0.2.2 → websec_validator-0.2.4}/pyproject.toml +1 -1
  4. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/briefing.py +1 -0
  5. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/cli.py +7 -2
  6. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/extractors/__init__.py +2 -2
  7. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/extractors/auth.py +1 -1
  8. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/extractors/authz.py +16 -0
  9. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/extractors/base.py +8 -1
  10. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/extractors/routes.py +49 -18
  11. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/findings.py +14 -0
  12. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/probes.py +5 -0
  13. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/recon.py +2 -2
  14. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/report.py +7 -3
  15. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/scanners.py +13 -8
  16. websec_validator-0.2.4/src/websec_validator/templates/probes/_lib.py +90 -0
  17. websec_validator-0.2.4/src/websec_validator/templates/probes/bola-cross-tenant.sh +49 -0
  18. websec_validator-0.2.4/src/websec_validator/templates/probes/bola-write-verbs.py +58 -0
  19. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/templates/probes/dlp-bypass-offline.py +21 -29
  20. websec_validator-0.2.4/src/websec_validator/templates/probes/jwt-attacks.sh +61 -0
  21. websec_validator-0.2.4/src/websec_validator/templates/probes/mass-assignment.py +60 -0
  22. websec_validator-0.2.4/src/websec_validator/templates/probes/race-conditions.py +97 -0
  23. websec_validator-0.2.4/src/websec_validator/templates/probes/ssrf-probes.sh +62 -0
  24. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/templates/probes/unauth-baseline.sh +2 -1
  25. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/templates/probes/webhook-forgery.py +10 -13
  26. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/templates/reports/FINDINGS-SUMMARY.md.template +15 -15
  27. {websec_validator-0.2.2 → websec_validator-0.2.4/src/websec_validator.egg-info}/PKG-INFO +4 -4
  28. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator.egg-info/SOURCES.txt +1 -0
  29. {websec_validator-0.2.2 → websec_validator-0.2.4}/tests/test_recon.py +52 -2
  30. websec_validator-0.2.2/src/websec_validator/templates/probes/bola-cross-tenant.sh +0 -192
  31. websec_validator-0.2.2/src/websec_validator/templates/probes/bola-write-verbs.py +0 -147
  32. websec_validator-0.2.2/src/websec_validator/templates/probes/jwt-attacks.sh +0 -161
  33. websec_validator-0.2.2/src/websec_validator/templates/probes/mass-assignment.py +0 -201
  34. websec_validator-0.2.2/src/websec_validator/templates/probes/race-conditions.py +0 -144
  35. websec_validator-0.2.2/src/websec_validator/templates/probes/ssrf-probes.sh +0 -189
  36. {websec_validator-0.2.2 → websec_validator-0.2.4}/LICENSE +0 -0
  37. {websec_validator-0.2.2 → websec_validator-0.2.4}/setup.cfg +0 -0
  38. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/__init__.py +0 -0
  39. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/calibration.json +0 -0
  40. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/calibration.py +0 -0
  41. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/constitution.py +0 -0
  42. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/corpus.json +0 -0
  43. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/dynamic.py +0 -0
  44. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/extractors/client_exposure.py +0 -0
  45. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/extractors/graphql.py +0 -0
  46. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/extractors/iac_ci.py +0 -0
  47. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/extractors/integrations.py +0 -0
  48. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/extractors/schemas.py +0 -0
  49. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/extractors/stack.py +0 -0
  50. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/extractors/surface.py +0 -0
  51. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/extractors/tenant.py +0 -0
  52. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/proof.py +0 -0
  53. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/templates/probes/compare-roles.sh +0 -0
  54. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/templates/probes/hs256-brute-force.py +0 -0
  55. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/templates/probes/rate-limit-burst.sh +0 -0
  56. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/templates/probes/s3-assess.sh +0 -0
  57. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/templates/reports/access-control-matrix.md.template +0 -0
  58. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/templates/reports/findings-triage.md.template +0 -0
  59. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/templates/reports/pentest-handover-brief.md.template +0 -0
  60. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/templates/reports/per-tool-FINDINGS.md.template +0 -0
  61. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator.egg-info/dependency_links.txt +0 -0
  62. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator.egg-info/entry_points.txt +0 -0
  63. {websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: websec-validator
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app.
5
5
  Author: Ricardo Accioly
6
6
  License: MIT
@@ -171,9 +171,9 @@ the next dynamic probes (explicitly gated — they mutate).
171
171
 
172
172
  ## Validated on
173
173
 
174
- HugoCross (Next.js), `wu-whatsappinbox` (106-service Express/AWS monorepo), VAmPI, NodeGoat, DVGA
175
- independently reproducing a hand-done pentest's findings (tenant boundary, SSO-endpoint SSRF, media
176
- upload, conversation-BOLA routes, roles).
174
+ A production Next.js app, a large Express/AWS monorepo, and the VAmPI / NodeGoat / DVGA vuln-app
175
+ corpus — independently reproducing a hand-done pentest's findings (tenant boundary, SSRF, file
176
+ upload, cross-tenant BOLA, role/authz gaps).
177
177
 
178
178
  ## Tests
179
179
 
@@ -159,9 +159,9 @@ the next dynamic probes (explicitly gated — they mutate).
159
159
 
160
160
  ## Validated on
161
161
 
162
- HugoCross (Next.js), `wu-whatsappinbox` (106-service Express/AWS monorepo), VAmPI, NodeGoat, DVGA
163
- independently reproducing a hand-done pentest's findings (tenant boundary, SSO-endpoint SSRF, media
164
- upload, conversation-BOLA routes, roles).
162
+ A production Next.js app, a large Express/AWS monorepo, and the VAmPI / NodeGoat / DVGA vuln-app
163
+ corpus — independently reproducing a hand-done pentest's findings (tenant boundary, SSRF, file
164
+ upload, cross-tenant BOLA, role/authz gaps).
165
165
 
166
166
  ## Tests
167
167
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "websec-validator"
7
- version = "0.2.2"
7
+ version = "0.2.4"
8
8
  description = "Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -118,6 +118,7 @@ credentials** — ask the human, never fabricate, never hit production.
118
118
  - **Datastores:** {", ".join(stack.get("datastores", [])) or "?"} · **Monorepo:** {stack.get("monorepo", False)}
119
119
  - **Auth scheme:** `{auth.get("scheme","?")}` (token in {auth.get("token_location","?")}) · guard files: {len(auth.get("guard_files", []))}
120
120
  - **Route engine:** {routes.get("engine","?")} · **{routes.get('count',0)} endpoints** · by method: {routes.get("by_method", {})}
121
+ {("> " + routes["note"]) if routes.get("note") else ""}
121
122
 
122
123
  ## 2. ★ Tenant boundary (confirm first — highest value, easiest to get wrong)
123
124
 
@@ -87,7 +87,7 @@ def cmd_run(args) -> int:
87
87
  print(f"websec-validator v{__version__} · target: {target} · run {ts}\n")
88
88
 
89
89
  # 1. recon
90
- facts = recon.build_facts(target, __version__)
90
+ facts = recon.build_facts(target, __version__, args.exclude)
91
91
  recon.write_facts(facts, out / "FACTS.json")
92
92
  langs = facts["stack"]["languages"]
93
93
  _print_facts_summary(facts)
@@ -98,7 +98,8 @@ def cmd_run(args) -> int:
98
98
  unified = None
99
99
  if args.scan:
100
100
  print("\n running available static scanners (read-only)…")
101
- scan_results = scanners.run_available(target, out, langs)
101
+ only = args.scanners.split(",") if args.scanners else None
102
+ scan_results = scanners.run_available(target, out, langs, excludes=args.exclude, only=only)
102
103
  for r in scan_results:
103
104
  tag = r.get("findings", r.get("status", "?"))
104
105
  print(f" {r['name']}: {tag}")
@@ -342,6 +343,10 @@ def build_parser() -> argparse.ArgumentParser:
342
343
  r.add_argument("target")
343
344
  r.add_argument("--scan", action="store_true", help="also execute available static scanners")
344
345
  r.add_argument("--out", help="output dir (default: ./websec-out)")
346
+ r.add_argument("--exclude", action="append", metavar="PATH",
347
+ help="exclude a path/glob from recon + scanners (repeatable; e.g. --exclude 'docs/**')")
348
+ r.add_argument("--scanners", metavar="A,B",
349
+ help="comma-separated subset of scanners to run with --scan (e.g. gitleaks,semgrep)")
345
350
  r.set_defaults(func=cmd_run)
346
351
 
347
352
  # recon/proof/calibrate are hidden from the main --help (argparse.SUPPRESS): recon is a
@@ -39,9 +39,9 @@ REGISTRY: list[Extractor] = [
39
39
  ]
40
40
 
41
41
 
42
- def run_all(root: Path, version: str) -> dict:
42
+ def run_all(root: Path, version: str, excludes: list | None = None) -> dict:
43
43
  """Walk the repo once, run every extractor, return the merged FACTS dict."""
44
- ctx = RepoContext(root)
44
+ ctx = RepoContext(root, excludes)
45
45
  facts: dict = {
46
46
  "tool": "websec-validator",
47
47
  "version": version,
@@ -47,7 +47,7 @@ class AuthExtractor(Extractor):
47
47
 
48
48
  # Detect ALL schemes present, then pick a primary by priority. A JWT app
49
49
  # that also wires Passport for SSO must read as primary=jwt, not passport
50
- # (the bug the WhatsApp app exposed). Priority: nextauth > jwt > session > passport > api-key.
50
+ # (Passport is often SSO-only). Priority: nextauth > jwt > session > passport > api-key.
51
51
  detected = []
52
52
  if nextauth:
53
53
  detected.append("nextauth (session JWT in cookie)")
@@ -52,6 +52,14 @@ ROLE = re.compile(
52
52
  r"has_?[Rr]ole\s*\(\s*['\"]([\w:.-]+)['\"]|"
53
53
  r"authorizeRoles\s*\(([^)]*)\)|permission_required\s*\(\s*['\"]([\w:.-]+)['\"]")
54
54
 
55
+ # F5: a call to a decoder/parser named "unsafe"/"unverified"/"noVerify"/"skipVerify"
56
+ # (e.g. decodeJwtPayloadUnsafe) — dangerous when its result feeds an auth decision.
57
+ UNSAFE_DECODER = re.compile(r"\b([A-Za-z_]\w*(?:[Uu]nsafe|[Uu]nverified|[Nn]o[Vv]erif\w*|[Ss]kip[Vv]erif\w*)\w*)\s*\(")
58
+ # does this file actually make an auth/identity decision? (so the unsafe decode matters)
59
+ AUTH_CONTEXT = re.compile(
60
+ r"require(?:Auth|Admin|Role|Permission)|isAdmin|authoriz|getToken\s*\(|getServerSession|"
61
+ r"req\.auth\b|currentUser|jwt\.(?:decode|verify)|decodeJwt", re.I)
62
+
55
63
 
56
64
  def _parse_next_middleware(ctx: RepoContext) -> dict:
57
65
  # Next 15.5+/16 renamed `middleware.ts` → `proxy.ts` (both filenames are valid; the
@@ -127,6 +135,13 @@ class AuthzExtractor(Extractor):
127
135
  if e.get("method") in WRITE_VERBS and not PUBLIC_HINT.search(e.get("path", "")):
128
136
  no_guard_writes.append(f"{e['method']} {e['path']} ({relcp or '?'})")
129
137
 
138
+ # F5: files that make an auth decision AND call an unsafe/unverified decoder
139
+ unsafe_decoders = []
140
+ for _p, rel, text in ctx.iter_code():
141
+ if AUTH_CONTEXT.search(text):
142
+ for dec in sorted(set(UNSAFE_DECODER.findall(text))):
143
+ unsafe_decoders.append({"file": rel, "decoder": dec})
144
+
130
145
  if global_auth:
131
146
  where = f"`{mw['file']}` (matcher {mw.get('matchers') or '—'})" if mw_auth else "`app.use(<auth>)`"
132
147
  note = (f"A GLOBAL auth middleware ({where}) was detected — most routes are protected by default. "
@@ -146,5 +161,6 @@ class AuthzExtractor(Extractor):
146
161
  "no_visible_guard": no_guard, "unknown": unknown},
147
162
  "endpoint_guards": egs[:400],
148
163
  "write_endpoints_without_visible_guard": sorted(set(no_guard_writes))[:60],
164
+ "unsafe_auth_decoders": unsafe_decoders[:30],
149
165
  "note": note,
150
166
  }
@@ -9,6 +9,7 @@ still say something useful.
9
9
 
10
10
  from __future__ import annotations
11
11
 
12
+ import fnmatch
12
13
  from pathlib import Path
13
14
 
14
15
  SKIP_DIRS = {".git", "node_modules", "dist", "build", ".next", ".nuxt", "venv",
@@ -27,13 +28,17 @@ MAX_BYTES = 2_000_000
27
28
  class RepoContext:
28
29
  """Walk the tree once; cache file text; serve cheap queries to every extractor."""
29
30
 
30
- def __init__(self, root: Path):
31
+ def __init__(self, root: Path, excludes: list | None = None):
31
32
  self.root = root
33
+ self.excludes = [e for e in (excludes or []) if e] # user --exclude paths/globs
32
34
  self._text: dict[Path, str] = {}
33
35
  self.code_files: list[Path] = []
34
36
  self.stack: dict = {} # filled by StackExtractor, read by the rest
35
37
  self._walk()
36
38
 
39
+ def _excluded(self, rel: str) -> bool:
40
+ return any(ex in rel or fnmatch.fnmatch(rel, ex) for ex in self.excludes)
41
+
37
42
  def _walk(self) -> None:
38
43
  n = 0
39
44
  for p in self.root.rglob("*"):
@@ -44,6 +49,8 @@ class RepoContext:
44
49
  # have its whole tree skipped.
45
50
  if p.is_dir() or any(part in SKIP_DIRS for part in p.relative_to(self.root).parts):
46
51
  continue
52
+ if self.excludes and self._excluded(str(p.relative_to(self.root))):
53
+ continue
47
54
  if p.suffix.lower() in CODE_EXT:
48
55
  self.code_files.append(p)
49
56
  n += 1
@@ -38,6 +38,20 @@ TRAVERSAL_NAMES = re.compile(r"^(file|filename|filepath|path|dir|folder|template
38
38
  TEMPLATED = ("BASE_URL", "localhost", "127.0.0.1", "${", "{{")
39
39
  ASSET_GLOB = re.compile(r"\*\.\w+")
40
40
 
41
+ # A route whose source file is a vendored/third-party API SPEC (OpenAPI/Swagger/GraphQL
42
+ # schema), not an app handler. Noir parses these and emits their paths as if the app
43
+ # served them — which on a repo that vendors e.g. a 16k-line swagger turns ~15 real
44
+ # findings into hundreds of phantom ones. We split these out as informational.
45
+ SPEC_PATH = re.compile(
46
+ r"\.(?:ya?ml|graphql|gql|raml)$" # spec file formats
47
+ r"|(?:^|/)(?:node_modules|vendor|vendored|third[_-]?party|examples?|schemas?"
48
+ r"|(?:docs?|documentation)[\w-]*)/" # vendor/docs/schema dirs
49
+ r"|swagger|openapi", re.I)
50
+
51
+
52
+ def _is_spec_derived(code_path: str) -> bool:
53
+ return bool(code_path) and bool(SPEC_PATH.search(code_path))
54
+
41
55
 
42
56
  def _clean_path(p: str) -> str:
43
57
  p = re.sub(r":(\w+)", r"{\1}", p) # Express :id -> {id}
@@ -53,16 +67,17 @@ def _is_noise(path: str) -> bool:
53
67
  return bool(ASSET_GLOB.search(path)) # static-asset glob route (/*.png)
54
68
 
55
69
 
56
- def _noir_scan(root: Path) -> list | None:
70
+ def _noir_scan(root: Path, extra_excludes: list | None = None) -> list | None:
57
71
  """Run Noir → list of endpoint dicts, or None if Noir unavailable/failed."""
58
72
  if not shutil.which("noir"):
59
73
  return None
74
+ excl = EXCLUDE_GLOBS + ("," + ",".join(extra_excludes) if extra_excludes else "")
60
75
  with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tf:
61
76
  out = Path(tf.name)
62
77
  try:
63
78
  proc = subprocess.run(
64
79
  ["noir", "scan", str(root), "-f", "json", "-o", str(out),
65
- "--exclude-path", EXCLUDE_GLOBS, "--no-log", "--no-color"],
80
+ "--exclude-path", excl, "--no-log", "--no-color"],
66
81
  capture_output=True, text=True, timeout=300)
67
82
  if not out.exists():
68
83
  return None
@@ -77,8 +92,10 @@ def _noir_scan(root: Path) -> list | None:
77
92
  pass
78
93
 
79
94
 
80
- def _normalize_noir(eps: list) -> list:
81
- rows, seen = [], set()
95
+ def _normalize_noir(eps: list) -> tuple:
96
+ """→ (app_routes, spec_derived_routes). Routes whose source file is a vendored API
97
+ spec are split out so they don't generate phantom findings (B1)."""
98
+ rows, spec, seen = [], [], set()
82
99
  for e in eps:
83
100
  if e.get("internal"):
84
101
  continue
@@ -89,21 +106,22 @@ def _normalize_noir(eps: list) -> list:
89
106
  if _is_noise(path):
90
107
  continue
91
108
  method = (e.get("method") or "GET").upper()
92
- if (method, path) in seen:
93
- continue
94
- seen.add((method, path))
95
- params = [{"name": p.get("name", ""), "where": p.get("param_type", "")}
96
- for p in (e.get("params") or [])]
97
109
  cp = (e.get("details", {}) or {}).get("code_paths") or [{}]
98
- rows.append({
110
+ code_path = cp[0].get("path", "")
111
+ if (method, path, code_path) in seen:
112
+ continue
113
+ seen.add((method, path, code_path))
114
+ row = {
99
115
  "method": method,
100
116
  "path": path,
101
- "params": params,
117
+ "params": [{"name": p.get("name", ""), "where": p.get("param_type", "")}
118
+ for p in (e.get("params") or [])],
102
119
  "technology": (e.get("details", {}) or {}).get("technology", ""),
103
- "code_path": cp[0].get("path", ""),
120
+ "code_path": code_path,
104
121
  "source": "noir",
105
- })
106
- return rows
122
+ }
123
+ (spec if _is_spec_derived(code_path) else rows).append(row)
124
+ return rows, spec
107
125
 
108
126
 
109
127
  # ---- regex fallback (only when Noir is absent) ---------------------------------------------
@@ -193,19 +211,23 @@ class RoutesExtractor(Extractor):
193
211
  category = "surface"
194
212
 
195
213
  def extract(self, ctx: RepoContext, facts: dict) -> dict:
196
- eps = _noir_scan(ctx.root)
214
+ eps = _noir_scan(ctx.root, getattr(ctx, "excludes", None))
197
215
  if eps is not None:
198
- routes = _normalize_noir(eps)
216
+ routes, spec_derived = _normalize_noir(eps)
199
217
  engine = "noir"
200
218
  else:
201
- routes = _fallback(ctx)
219
+ routes, spec_derived = _fallback(ctx), []
202
220
  engine = "regex-fallback (install OWASP Noir for full coverage: brew install noir)"
221
+ # honor user --exclude against route code_paths too (Noir's own --exclude-path glob is
222
+ # unreliable for bare dir names; this guarantees `--exclude <path>` drops those routes).
223
+ if getattr(ctx, "excludes", None):
224
+ routes = [r for r in routes if not ctx._excluded(r.get("code_path", ""))]
203
225
  by_method: dict = {}
204
226
  by_tech: dict = {}
205
227
  for r in routes:
206
228
  by_method[r["method"]] = by_method.get(r["method"], 0) + 1
207
229
  by_tech[r["technology"]] = by_tech.get(r["technology"], 0) + 1
208
- return {
230
+ out = {
209
231
  "engine": engine,
210
232
  "count": len(routes),
211
233
  "by_method": by_method,
@@ -213,3 +235,12 @@ class RoutesExtractor(Extractor):
213
235
  "endpoints": routes,
214
236
  "targeting": _derive(routes),
215
237
  }
238
+ if spec_derived:
239
+ from collections import Counter
240
+ srcs = Counter(r["code_path"] for r in spec_derived)
241
+ out["spec_derived_excluded"] = len(spec_derived)
242
+ out["spec_derived_sources"] = [f"{n}× {f}" for f, n in srcs.most_common(8)]
243
+ out["note"] = (f"⚠ {len(spec_derived)} routes came from vendored API SPEC files "
244
+ f"(OpenAPI/Swagger/GraphQL), not app handlers — EXCLUDED from the {len(routes)} "
245
+ f"app routes + all findings. Sources: {', '.join(f for f, _ in srcs.most_common(5))}.")
246
+ return out
@@ -26,6 +26,9 @@ from . import calibration
26
26
  STANDARDS = {
27
27
  "missing-auth": (["CWE-862 Missing Authorization", "CWE-306 Missing Authentication"],
28
28
  "ASVS V4.1.1", ["API1:2023 BOLA", "API5:2023 BFLA"]),
29
+ "unsafe-auth-decoder": (["CWE-347 Improper Verification of Cryptographic Signature",
30
+ "CWE-345 Insufficient Verification of Data Authenticity"],
31
+ "ASVS V3.5.2", ["API2:2023 Broken Authentication"]),
29
32
  "bola": (["CWE-639 Authorization Bypass (IDOR)"], "ASVS V4.2.1", ["API1:2023 BOLA"]),
30
33
  "ssrf": (["CWE-918 SSRF"], "ASVS V12.6", ["API7:2023 SSRF"]),
31
34
  "secret": (["CWE-798 Hard-coded Credentials"], "ASVS V2.10", ["API8:2023 Misconfiguration"]),
@@ -48,6 +51,9 @@ REMEDIATION = {
48
51
  "missing-auth": "Add an auth guard to the handler (e.g. requireAuth()/getServerSession()), or a "
49
52
  "middleware matcher over /api/(.*) with an explicit public allowlist so it can't be forgotten.",
50
53
  "bola": "Enforce object ownership: verify the authenticated principal owns/can access the resource id (tenant scope).",
54
+ "unsafe-auth-decoder": "Verify the token/signature before trusting it for an auth/identity decision — use a "
55
+ "verifying decode (e.g. jwt.verify with the key / a checked session), never an *Unsafe* "
56
+ "or decode-only path whose output then feeds requireAuth/requireAdmin.",
51
57
  "ssrf": "Validate + allowlist outbound URLs; block RFC1918/IMDS/file://; never fetch a raw user-supplied URL.",
52
58
  "secret": "Rotate the credential, remove from code/history, load from a secrets manager.",
53
59
  "cve": "Upgrade the dependency to the fixed version.",
@@ -150,6 +156,14 @@ def build_ledger(facts: dict, unified: dict | None, dynamic: dict | None = None,
150
156
  [{"layer": "dynamic", "detail": f"cross-tenant GET returned another tenant's data "
151
157
  f"(HTTP {lk.get('status')}, {lk.get('direction')})"}]))
152
158
 
159
+ # ---- 1c. Unsafe/unverified decoder feeding an auth decision (F5) ----
160
+ for ud in ((facts.get("authz", {}) or {}).get("unsafe_auth_decoders", []) or []):
161
+ out.append(_f(f"Auth decision uses an unverified decoder: {ud.get('decoder')}", "access-control",
162
+ "unsafe-auth-decoder", "HIGH", "MEDIUM", ud.get("file", ""),
163
+ [{"layer": "recon", "detail": f"{ud.get('file')} makes an auth/identity decision AND calls "
164
+ f"{ud.get('decoder')}() — if that decodes a token/signature WITHOUT verifying it, a forged "
165
+ "value is trusted (the decodeJwtPayloadUnsafe → requireAdmin class of bug). Trace the call path."}]))
166
+
153
167
  # ---- 2. Static scanner findings (de-duplicated `unified`) ----
154
168
  cat_to_class = {"sca": "cve", "secret": "secret", "iac": "iac", "sast": "sast"}
155
169
  for t in (unified or {}).get("top", []):
@@ -136,6 +136,11 @@ def stage(chosen: list, outdir: Path, facts: dict | None = None) -> list:
136
136
  manifest = [{"key": "_context", "file": "probes/probe-context.json",
137
137
  "note": "the target's real routes/auth/fields — finalize the drafts against this"}]
138
138
  src_root = resources.files("websec_validator").joinpath("templates/probes")
139
+ # always ship the shared helper the Python probes import (load context + env auth)
140
+ try:
141
+ (dest / "_lib.py").write_text(src_root.joinpath("_lib.py").read_text())
142
+ except Exception:
143
+ pass
139
144
  for key in chosen:
140
145
  fname, attack, needs = PROBES[key]
141
146
  targets = (tgt.get(_TARGET_KEYS[key], []) if key in _TARGET_KEYS else [])[:15]
@@ -14,8 +14,8 @@ from .extractors.base import RepoContext
14
14
  from .extractors.stack import StackExtractor
15
15
 
16
16
 
17
- def build_facts(root: Path, version: str) -> dict:
18
- return extractors.run_all(root, version)
17
+ def build_facts(root: Path, version: str, excludes: list | None = None) -> dict:
18
+ return extractors.run_all(root, version, excludes)
19
19
 
20
20
 
21
21
  def write_facts(facts: dict, out: Path) -> Path:
@@ -43,8 +43,12 @@ def render(facts: dict, scanners: dict, scan_results: list, unified: dict | None
43
43
  chain = " → ".join(e["layer"] for e in f["evidence"])
44
44
  api = (" · " + ", ".join(f["standards"]["owasp_api"])) if f["standards"]["owasp_api"] else ""
45
45
  cal = f.get("calibrated") or {}
46
- calstr = (f" · P(real)≈**{cal.get('p')}** CI {cal.get('ci')} (n={cal.get('n')}, {cal.get('basis')})"
47
- if cal else "")
46
+ if not cal:
47
+ calstr = ""
48
+ elif cal.get("n", 0) == 0 or str(cal.get("basis", "")).startswith("prior"):
49
+ calstr = " · P(real): _uncalibrated — verify manually_" # don't dress n=0 as a measurement (B4)
50
+ else:
51
+ calstr = f" · P(real)≈**{cal.get('p')}** CI {cal.get('ci')} (n={cal.get('n')}, {cal.get('basis')})"
48
52
  _ll.append(f"- **[{f['severity']}/{f['confidence']}]** {f['title']} \n"
49
53
  f" `{f['location']}` · evidence: {chain} · {cwe}{api}{calstr} \n"
50
54
  f" _fix:_ {f['remediation']}")
@@ -68,7 +72,7 @@ def render(facts: dict, scanners: dict, scan_results: list, unified: dict | None
68
72
  | | |
69
73
  |---|---|
70
74
  | Stack | {", ".join(stack.get("languages", [])) or "?"} · {", ".join(stack.get("frameworks", [])) or "?"} · {", ".join(stack.get("datastores", [])) or "?"} |
71
- | Endpoints | **{routes.get('count', 0)}** (via {routes.get('engine','?').split(' ')[0]}) |
75
+ | Endpoints | **{routes.get('count', 0)}** app routes (via {routes.get('engine','?').split(' ')[0]}){(" · " + str(routes.get('spec_derived_excluded')) + " spec-derived excluded") if routes.get('spec_derived_excluded') else ""} |
72
76
  | Auth | {facts.get('auth', {}).get('scheme','?')} · roles: {', '.join(authz.get('roles_detected', [])) or 'none'} |
73
77
  | Access control | {gs.get('with_visible_guard', 0)} guarded · **{gs.get('no_visible_guard', 0)} no visible guard** · global-middleware: {authz.get('global_auth_middleware', False)} |
74
78
  | Static scanner (raw, pre-triage) | {sev_line} |
@@ -40,27 +40,27 @@ EXCLUDE_DIRS = ("websec-out", "node_modules", ".next", "dist", "build", ".git",
40
40
  "security", ".venv", "venv", "__pycache__", ".mypy_cache", "coverage")
41
41
 
42
42
 
43
- def _trivy(target: Path, out: Path) -> list:
43
+ def _trivy(target: Path, out: Path, excludes=()) -> list:
44
44
  # SCA + secrets + IaC misconfig in one pass; pinned by the user's install.
45
45
  cmd = ["trivy", "fs", "--scanners", "vuln,secret,misconfig", "--format", "json", "--output", str(out)]
46
- for d in EXCLUDE_DIRS:
46
+ for d in list(EXCLUDE_DIRS) + list(excludes):
47
47
  cmd += ["--skip-dirs", d]
48
48
  return cmd + [str(target)]
49
49
 
50
50
 
51
- def _gitleaks(target: Path, out: Path) -> list:
51
+ def _gitleaks(target: Path, out: Path, excludes=()) -> list:
52
52
  return ["gitleaks", "detect", "--source", str(target), "--no-banner",
53
53
  "--report-format", "json", "--report-path", str(out)]
54
54
 
55
55
 
56
- def _semgrep(target: Path, out: Path) -> list:
56
+ def _semgrep(target: Path, out: Path, excludes=()) -> list:
57
57
  cmd = ["semgrep", "scan", "--config", "auto", "--json", "--output", str(out)]
58
- for d in EXCLUDE_DIRS:
58
+ for d in list(EXCLUDE_DIRS) + list(excludes):
59
59
  cmd += ["--exclude", d]
60
60
  return cmd + [str(target)]
61
61
 
62
62
 
63
- def _checkov(target: Path, out: Path) -> list:
63
+ def _checkov(target: Path, out: Path, excludes=()) -> list:
64
64
  return ["checkov", "-d", str(target), "--compact", "-o", "json",
65
65
  "--output-file-path", str(out.parent)]
66
66
 
@@ -103,26 +103,31 @@ def detect(stack_languages: list | None = None) -> dict:
103
103
 
104
104
 
105
105
  def run_available(target: Path, outdir: Path, stack_languages: list | None = None,
106
- timeout: int = 600) -> list:
106
+ timeout: int = 600, excludes: list | None = None, only: list | None = None) -> list:
107
107
  """Execute every available, runnable static scanner. Returns per-scanner status.
108
108
 
109
+ `excludes`: extra paths/dirs to skip (--exclude). `only`: run just these scanner keys.
109
110
  Raw JSON lands in outdir/scanners/<key>.json. We capture status only here;
110
111
  cross-tool normalization + de-duplication is a separate (next) step.
111
112
  """
112
113
  langs = set(stack_languages or [])
114
+ excludes = excludes or []
115
+ only = set(only) if only else None
113
116
  scan_dir = outdir / "scanners"
114
117
  scan_dir.mkdir(parents=True, exist_ok=True)
115
118
  results = []
116
119
  for s in REGISTRY:
117
120
  if s.argv is None:
118
121
  continue # detect-only for now
122
+ if only is not None and s.key not in only:
123
+ continue
119
124
  if s.languages and not (set(s.languages) & langs):
120
125
  continue
121
126
  if not shutil.which(s.binary):
122
127
  continue
123
128
  out_file = scan_dir / f"{s.key}.json"
124
129
  try:
125
- proc = subprocess.run(s.argv(target, out_file), capture_output=True,
130
+ proc = subprocess.run(s.argv(target, out_file, excludes), capture_output=True,
126
131
  text=True, timeout=timeout)
127
132
  results.append({"key": s.key, "name": s.name, "category": s.category,
128
133
  "exit_code": proc.returncode, "output": str(out_file),
@@ -0,0 +1,90 @@
1
+ """Shared probe helpers — load THIS target's real surface from probe-context.json
2
+ (written by `websec run`) and auth/ids from environment variables.
3
+
4
+ Why env vars: recon gives you the real endpoints, auth scheme, and tenant key — but it
5
+ cannot mint live tokens or know real object ids. You (or your agent, against a TEST
6
+ instance) supply those:
7
+
8
+ TARGET=http://localhost:3000 # base URL (or set target_base_url in probe-context.json)
9
+ TOKEN_A=... TOKEN_B=... # bearer JWTs for two test accounts (different tenants)
10
+ COOKIE_A=... COOKIE_B=... # OR session cookies (e.g. NextAuth) instead of bearer
11
+ APIKEY=... # OR an API key
12
+ OBJ_A=... OBJ_B=... # a sample object id owned by each account/tenant
13
+ GROUP_A=... GROUP_B=... # each account's tenant/group id (defaults to OBJ_* if unset)
14
+
15
+ Run only against a TEST instance you're authorized to probe. Never production.
16
+ """
17
+ import json
18
+ import os
19
+ import subprocess
20
+ import sys
21
+ from pathlib import Path
22
+
23
+ _HERE = Path(__file__).resolve().parent
24
+
25
+
26
+ def context() -> dict:
27
+ p = _HERE / "probe-context.json"
28
+ if not p.is_file():
29
+ sys.exit("probe-context.json not found next to this probe — run `websec run <repo>` and use "
30
+ "the probes/ it stages (probe-context.json holds this app's real routes/auth).")
31
+ return json.loads(p.read_text())
32
+
33
+
34
+ def base_url() -> str:
35
+ u = os.environ.get("TARGET") or context().get("target_base_url", "")
36
+ if not u or u.startswith("FILL"):
37
+ sys.exit("Set TARGET=http://host:port (or fill target_base_url in probe-context.json).")
38
+ return u.rstrip("/")
39
+
40
+
41
+ def auth_headers(role: str = "A") -> list:
42
+ """Auth header for a role (A/B), adapting to whatever the operator supplied."""
43
+ tok = os.environ.get(f"TOKEN_{role}")
44
+ cookie = os.environ.get(f"COOKIE_{role}")
45
+ apikey = os.environ.get("APIKEY")
46
+ if tok:
47
+ return ["-H", f"Authorization: Bearer {tok}"]
48
+ if cookie:
49
+ return ["-H", f"Cookie: {cookie}"]
50
+ if apikey:
51
+ return ["-H", f"X-API-Key: {apikey}"]
52
+ return [] # unauthenticated
53
+
54
+
55
+ def require(*names: str) -> None:
56
+ missing = [n for n in names if not os.environ.get(n)]
57
+ if missing:
58
+ sys.exit(f"This probe needs these env var(s): {', '.join(missing)}. See _lib.py for the list.")
59
+
60
+
61
+ def curl(method: str, url: str, headers=None, body=None, timeout: int = 20):
62
+ """Returns (status_code, body_text). Never raises on HTTP errors."""
63
+ cmd = ["curl", "-s", "-X", method, url, "-w", "\nHTTP_CODE:%{http_code}",
64
+ "--max-time", str(timeout)] + (headers or [])
65
+ if body is not None:
66
+ cmd += ["-H", "content-type: application/json", "-d", json.dumps(body)]
67
+ out = subprocess.run(cmd, capture_output=True, text=True).stdout
68
+ code = int(out.split("HTTP_CODE:")[-1].strip()) if "HTTP_CODE:" in out else 0
69
+ return code, out.split("\nHTTP_CODE:")[0]
70
+
71
+
72
+ def tenant_key(default: str = "groupId") -> str:
73
+ keys = context().get("tenant_keys") or []
74
+ return keys[0] if keys else default
75
+
76
+
77
+ def write_endpoints() -> list:
78
+ """[(METHOD, path), …] for this app's mutating routes, from probe-context.json."""
79
+ out = []
80
+ for ep in context().get("endpoints", {}).get("writes", []):
81
+ parts = ep.split(" ", 1)
82
+ if len(parts) == 2:
83
+ out.append((parts[0], parts[1]))
84
+ return out
85
+
86
+
87
+ def save(name: str, findings: list) -> Path:
88
+ out = _HERE / f"{name}-findings.json"
89
+ out.write_text(json.dumps(findings, indent=2) + "\n")
90
+ return out
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env bash
2
+ # BOLA / cross-tenant READ probe — FACTS-driven. Role A uses its OWN token against
3
+ # tenant B's id (and B→A), on this app's tenant-scoped routes (from probe-context.json).
4
+ # Expect 401/403/404. A 200 that returns the OTHER tenant's data = cross-tenant BOLA
5
+ # (OWASP API #1) — the thing an automated scanner can't tell from "just another 200".
6
+ #
7
+ # Env: TARGET, TOKEN_A, TOKEN_B (two accounts in DIFFERENT tenants), GROUP_A, GROUP_B
8
+ # (each account's tenant/group id). Bearer auth; cookie users: swap the -H below.
9
+ # Run only against a TEST instance you're authorized to probe.
10
+ set -uo pipefail
11
+ cd "$(dirname "$0")"
12
+ ctx=probe-context.json
13
+
14
+ BASE="${TARGET:-$(python3 -c "import json;print(json.load(open('$ctx'))['target_base_url'])" 2>/dev/null)}"
15
+ if [ -z "${BASE:-}" ] || [ "${BASE#FILL}" != "$BASE" ]; then echo "Set TARGET=http://host:port (or fill probe-context.json)"; exit 2; fi
16
+ : "${TOKEN_A:?set TOKEN_A=<jwt for an account in tenant A>}"
17
+ : "${TOKEN_B:?set TOKEN_B=<jwt for an account in a DIFFERENT tenant>}"
18
+ : "${GROUP_A:?set GROUP_A=<tenant/group id of account A>}"
19
+ : "${GROUP_B:?set GROUP_B=<tenant/group id of account B>}"
20
+
21
+ PATHS=() # (portable; macOS bash 3.2 lacks `mapfile`)
22
+ while IFS= read -r line; do [ -n "$line" ] && PATHS+=("$line"); done < <(python3 -c "
23
+ import json
24
+ c = json.load(open('$ctx'))['endpoints']
25
+ cand = c.get('idor_candidates') or [w.split(' ',1)[1] for w in c.get('writes',[]) if ' ' in w]
26
+ for p in cand:
27
+ print(p.split(' ',1)[1] if (' ' in p and p.split(' ',1)[0].isupper()) else p)
28
+ " 2>/dev/null)
29
+ [ "${#PATHS[@]}" -eq 0 ] && { echo "No tenant-scoped / IDOR-candidate routes in probe-context.json."; exit 2; }
30
+
31
+ pass=0; leak=0
32
+ attack() { # $1=token $2=target-group-id $3=label
33
+ for raw in "${PATHS[@]}"; do
34
+ path=$(python3 -c "import re,sys; print(re.sub(r'\{[^}]+\}', sys.argv[1], sys.argv[2]))" "$2" "$raw")
35
+ code=$(curl -s -o /dev/null -w '%{http_code}' -m 15 -H "Authorization: Bearer $1" "$BASE$path")
36
+ case "$code" in
37
+ 401|403|404) printf ' ok %s %-4s %s\n' "$code" "$3" "$path"; pass=$((pass+1)) ;;
38
+ 200|206) printf ' LEAK %s %-4s %s ← returned data for the OTHER tenant? verify\n' "$code" "$3" "$path"; leak=$((leak+1)) ;;
39
+ *) printf ' ?? %s %-4s %s\n' "$code" "$3" "$path" ;;
40
+ esac
41
+ done
42
+ }
43
+
44
+ echo "=== cross-tenant BOLA vs $BASE (expect 401/403/404) ==="
45
+ echo "--- A → B's tenant ($GROUP_B) ---"; attack "$TOKEN_A" "$GROUP_B" "A→B"
46
+ echo "--- B → A's tenant ($GROUP_A) ---"; attack "$TOKEN_B" "$GROUP_A" "B→A"
47
+ echo "summary: $pass blocked · $leak potential leak(s)"
48
+ [ "$leak" -gt 0 ] && echo "A 200 means the route served the OTHER tenant's id — confirm it's actually their data (not empty / your own), then debate-verify before reporting."
49
+ exit "$leak"