websec-validator 0.2.1__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {websec_validator-0.2.1/src/websec_validator.egg-info → websec_validator-0.2.3}/PKG-INFO +34 -13
  2. {websec_validator-0.2.1 → websec_validator-0.2.3}/README.md +33 -12
  3. {websec_validator-0.2.1 → websec_validator-0.2.3}/pyproject.toml +1 -1
  4. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/briefing.py +5 -0
  5. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/cli.py +5 -1
  6. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/dynamic.py +26 -2
  7. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/extractors/auth.py +1 -1
  8. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/extractors/authz.py +29 -9
  9. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/findings.py +25 -4
  10. websec_validator-0.2.3/src/websec_validator/probes.py +161 -0
  11. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/report.py +2 -1
  12. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/scanners.py +42 -11
  13. websec_validator-0.2.3/src/websec_validator/templates/probes/_lib.py +90 -0
  14. websec_validator-0.2.3/src/websec_validator/templates/probes/bola-cross-tenant.sh +48 -0
  15. websec_validator-0.2.3/src/websec_validator/templates/probes/bola-write-verbs.py +58 -0
  16. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/templates/probes/dlp-bypass-offline.py +21 -29
  17. websec_validator-0.2.3/src/websec_validator/templates/probes/mass-assignment.py +60 -0
  18. websec_validator-0.2.3/src/websec_validator/templates/probes/race-conditions.py +97 -0
  19. websec_validator-0.2.3/src/websec_validator/templates/probes/unauth-baseline.sh +44 -0
  20. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/templates/probes/webhook-forgery.py +10 -13
  21. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/templates/reports/FINDINGS-SUMMARY.md.template +15 -15
  22. {websec_validator-0.2.1 → websec_validator-0.2.3/src/websec_validator.egg-info}/PKG-INFO +34 -13
  23. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator.egg-info/SOURCES.txt +2 -0
  24. {websec_validator-0.2.1 → websec_validator-0.2.3}/tests/test_recon.py +100 -1
  25. websec_validator-0.2.1/src/websec_validator/probes.py +0 -79
  26. websec_validator-0.2.1/src/websec_validator/templates/probes/bola-cross-tenant.sh +0 -192
  27. websec_validator-0.2.1/src/websec_validator/templates/probes/bola-write-verbs.py +0 -147
  28. websec_validator-0.2.1/src/websec_validator/templates/probes/mass-assignment.py +0 -201
  29. websec_validator-0.2.1/src/websec_validator/templates/probes/race-conditions.py +0 -144
  30. {websec_validator-0.2.1 → websec_validator-0.2.3}/LICENSE +0 -0
  31. {websec_validator-0.2.1 → websec_validator-0.2.3}/setup.cfg +0 -0
  32. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/__init__.py +0 -0
  33. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/calibration.json +0 -0
  34. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/calibration.py +0 -0
  35. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/constitution.py +0 -0
  36. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/corpus.json +0 -0
  37. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/extractors/__init__.py +0 -0
  38. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/extractors/base.py +0 -0
  39. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/extractors/client_exposure.py +0 -0
  40. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/extractors/graphql.py +0 -0
  41. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/extractors/iac_ci.py +0 -0
  42. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/extractors/integrations.py +0 -0
  43. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/extractors/routes.py +0 -0
  44. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/extractors/schemas.py +0 -0
  45. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/extractors/stack.py +0 -0
  46. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/extractors/surface.py +0 -0
  47. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/extractors/tenant.py +0 -0
  48. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/proof.py +0 -0
  49. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/recon.py +0 -0
  50. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/templates/probes/compare-roles.sh +0 -0
  51. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/templates/probes/hs256-brute-force.py +0 -0
  52. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/templates/probes/jwt-attacks.sh +0 -0
  53. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/templates/probes/rate-limit-burst.sh +0 -0
  54. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/templates/probes/s3-assess.sh +0 -0
  55. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/templates/probes/ssrf-probes.sh +0 -0
  56. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/templates/reports/access-control-matrix.md.template +0 -0
  57. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/templates/reports/findings-triage.md.template +0 -0
  58. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/templates/reports/pentest-handover-brief.md.template +0 -0
  59. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator/templates/reports/per-tool-FINDINGS.md.template +0 -0
  60. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator.egg-info/dependency_links.txt +0 -0
  61. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator.egg-info/entry_points.txt +0 -0
  62. {websec_validator-0.2.1 → websec_validator-0.2.3}/src/websec_validator.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: websec-validator
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app.
5
5
  Author: Ricardo Accioly
6
6
  License: MIT
@@ -21,7 +21,7 @@ Dynamic: license-file
21
21
  It is *not* an autonomous scanner and *not* a SaaS. It's the missing front-half: the thing that
22
22
  turns a repo into a precise, fact-grounded security brief an AI agent (with a human in the loop)
23
23
  can act on — an auto-filled, repo-aware version of a senior pentester's "here's what to test and
24
- how" handoff. Full landscape + why this niche is real: [`MARKET-ANALYSIS-AND-VERDICT.md`](MARKET-ANALYSIS-AND-VERDICT.md).
24
+ how" handoff. How it works + the reasoning behind every check: [`docs/METHODOLOGY.md`](docs/METHODOLOGY.md).
25
25
 
26
26
  ## Quickstart — just point it at your repo
27
27
 
@@ -37,7 +37,7 @@ local. The four ways to get there, all ending in the same `AGENT-BRIEFING.md` yo
37
37
  | **Tell your agent** (simplest) | — | say the line above |
38
38
  | **CLI** (a terminal) | `pipx install websec-validator` | `websec run /path/to/your/app` |
39
39
  | **Claude Code plugin** (slash) | `/plugin marketplace add raccioly/websec-validator` → `/plugin install websec-validator@websec-plugins` | invoke the **security-pass** skill, or just ask |
40
- | **Docker** (no install) | `docker build -t websec-validator .` | `docker run --rm -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out` |
40
+ | **Docker** (no install) | `docker build -t websec-validator .` | `docker run --rm --user "$(id -u):$(id -g)" -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out` |
41
41
 
42
42
  ➡️ **Want the reasoning behind every check?** Read **[docs/METHODOLOGY.md](docs/METHODOLOGY.md)** — what each test does and why.
43
43
 
@@ -63,7 +63,7 @@ No need to install Noir or any scanner — the image bundles them all (arch-awar
63
63
 
64
64
  ```bash
65
65
  docker build -t websec-validator .
66
- docker run --rm -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out
66
+ docker run --rm --user "$(id -u):$(id -g)" -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out
67
67
  ```
68
68
 
69
69
  The image carries Noir + Trivy + Gitleaks + Semgrep + Checkov; mount your repo at `/scan` and the
@@ -171,9 +171,9 @@ the next dynamic probes (explicitly gated — they mutate).
171
171
 
172
172
  ## Validated on
173
173
 
174
- HugoCross (Next.js), `wu-whatsappinbox` (106-service Express/AWS monorepo), VAmPI, NodeGoat, DVGA
175
- independently reproducing a hand-done pentest's findings (tenant boundary, SSO-endpoint SSRF, media
176
- upload, conversation-BOLA routes, roles).
174
+ A production Next.js app, a large Express/AWS monorepo, and the VAmPI / NodeGoat / DVGA vuln-app
175
+ corpus — independently reproducing a hand-done pentest's findings (tenant boundary, SSRF, file
176
+ upload, cross-tenant BOLA, role/authz gaps).
177
177
 
178
178
  ## Tests
179
179
 
@@ -186,10 +186,12 @@ python3 -m unittest discover -s tests # stdlib only, no Noir/network — 23 t
186
186
  Published to PyPI via **Trusted Publishing** (OIDC — no API token in the repo). To cut a release:
187
187
 
188
188
  ```bash
189
- # 1. bump the version in pyproject.toml (e.g. 0.2.0 → 0.2.1)
189
+ # 1. bump the version in pyproject.toml (e.g. 0.2.1 → 0.2.2)
190
190
  # 2. tag it and push — the tag must match pyproject's version (CI verifies):
191
- git tag v0.2.1 && git push origin v0.2.1
192
- # → .github/workflows/publish.yml builds + publishes to PyPI
191
+ git tag v0.2.2 && git push origin v0.2.2
192
+ # → publish.yml builds, INSTALLS + smoke-tests the wheel (version match,
193
+ # calibration ships, a real `websec run`), then publishes. A bad build fails
194
+ # CI instead of reaching PyPI — so you never have to yank after the fact.
193
195
  ```
194
196
 
195
197
  One-time PyPI setup (before the first release): on pypi.org → **Account → Publishing → Add a pending
@@ -225,8 +227,27 @@ lets you just ask, in plain English, for a security pass: it runs `websec`, read
225
227
  works the findings with you. For other agents the universal interface is unchanged: run the CLI, read
226
228
  `AGENT-BRIEFING.md`.
227
229
 
230
+ **Install gotchas (field-tested):**
231
+
232
+ - The install id is `plugin@marketplace` — `websec-validator@websec-plugins` (the marketplace name
233
+ from `.claude-plugin/marketplace.json`), **not** `@websec-validator` (the repo).
234
+ - The plugin only delivers the *instructions*; the actual scanning is a **separate Python CLI**
235
+ (`websec`). The skill's Step 0 installs it (`pipx install websec-validator`) if it's missing.
236
+ - **`/plugin …` only works in the terminal CLI.** In the Claude **app / Agent SDK** (no `/plugin`),
237
+ configure it in `.claude/settings.json` instead:
238
+ ```json
239
+ {
240
+ "extraKnownMarketplaces": {
241
+ "websec-plugins": { "source": { "source": "github", "repo": "raccioly/websec-validator" } }
242
+ },
243
+ "enabledPlugins": { "websec-validator@websec-plugins": true }
244
+ }
245
+ ```
246
+ This **registers + enables** the plugin but does **not** auto-fetch it — the first download still
247
+ needs the CLI (`/plugin install websec-validator@websec-plugins`) once. (Project `.claude/settings.json`
248
+ for a team; `~/.claude/settings.json` for just you.)
249
+
228
250
  ## Credits
229
251
 
230
- Methodology + probe library come from a real authenticated pentest pass
231
- ([`base-research/REPLICATION-PLAYBOOK.md`](base-research/REPLICATION-PLAYBOOK.md), not committed).
232
- This tool productizes that hand-written pass into something an AI agent can run on any repo.
252
+ Methodology + probe library are distilled from a real authenticated penetration-testing pass.
253
+ This tool productizes that hand-written methodology into something an AI agent can run on any repo.
@@ -9,7 +9,7 @@
9
9
  It is *not* an autonomous scanner and *not* a SaaS. It's the missing front-half: the thing that
10
10
  turns a repo into a precise, fact-grounded security brief an AI agent (with a human in the loop)
11
11
  can act on — an auto-filled, repo-aware version of a senior pentester's "here's what to test and
12
- how" handoff. Full landscape + why this niche is real: [`MARKET-ANALYSIS-AND-VERDICT.md`](MARKET-ANALYSIS-AND-VERDICT.md).
12
+ how" handoff. How it works + the reasoning behind every check: [`docs/METHODOLOGY.md`](docs/METHODOLOGY.md).
13
13
 
14
14
  ## Quickstart — just point it at your repo
15
15
 
@@ -25,7 +25,7 @@ local. The four ways to get there, all ending in the same `AGENT-BRIEFING.md` yo
25
25
  | **Tell your agent** (simplest) | — | say the line above |
26
26
  | **CLI** (a terminal) | `pipx install websec-validator` | `websec run /path/to/your/app` |
27
27
  | **Claude Code plugin** (slash) | `/plugin marketplace add raccioly/websec-validator` → `/plugin install websec-validator@websec-plugins` | invoke the **security-pass** skill, or just ask |
28
- | **Docker** (no install) | `docker build -t websec-validator .` | `docker run --rm -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out` |
28
+ | **Docker** (no install) | `docker build -t websec-validator .` | `docker run --rm --user "$(id -u):$(id -g)" -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out` |
29
29
 
30
30
  ➡️ **Want the reasoning behind every check?** Read **[docs/METHODOLOGY.md](docs/METHODOLOGY.md)** — what each test does and why.
31
31
 
@@ -51,7 +51,7 @@ No need to install Noir or any scanner — the image bundles them all (arch-awar
51
51
 
52
52
  ```bash
53
53
  docker build -t websec-validator .
54
- docker run --rm -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out
54
+ docker run --rm --user "$(id -u):$(id -g)" -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out
55
55
  ```
56
56
 
57
57
  The image carries Noir + Trivy + Gitleaks + Semgrep + Checkov; mount your repo at `/scan` and the
@@ -159,9 +159,9 @@ the next dynamic probes (explicitly gated — they mutate).
159
159
 
160
160
  ## Validated on
161
161
 
162
- HugoCross (Next.js), `wu-whatsappinbox` (106-service Express/AWS monorepo), VAmPI, NodeGoat, DVGA
163
- independently reproducing a hand-done pentest's findings (tenant boundary, SSO-endpoint SSRF, media
164
- upload, conversation-BOLA routes, roles).
162
+ A production Next.js app, a large Express/AWS monorepo, and the VAmPI / NodeGoat / DVGA vuln-app
163
+ corpus — independently reproducing a hand-done pentest's findings (tenant boundary, SSRF, file
164
+ upload, cross-tenant BOLA, role/authz gaps).
165
165
 
166
166
  ## Tests
167
167
 
@@ -174,10 +174,12 @@ python3 -m unittest discover -s tests # stdlib only, no Noir/network — 23 t
174
174
  Published to PyPI via **Trusted Publishing** (OIDC — no API token in the repo). To cut a release:
175
175
 
176
176
  ```bash
177
- # 1. bump the version in pyproject.toml (e.g. 0.2.0 → 0.2.1)
177
+ # 1. bump the version in pyproject.toml (e.g. 0.2.1 → 0.2.2)
178
178
  # 2. tag it and push — the tag must match pyproject's version (CI verifies):
179
- git tag v0.2.1 && git push origin v0.2.1
180
- # → .github/workflows/publish.yml builds + publishes to PyPI
179
+ git tag v0.2.2 && git push origin v0.2.2
180
+ # → publish.yml builds, INSTALLS + smoke-tests the wheel (version match,
181
+ # calibration ships, a real `websec run`), then publishes. A bad build fails
182
+ # CI instead of reaching PyPI — so you never have to yank after the fact.
181
183
  ```
182
184
 
183
185
  One-time PyPI setup (before the first release): on pypi.org → **Account → Publishing → Add a pending
@@ -213,8 +215,27 @@ lets you just ask, in plain English, for a security pass: it runs `websec`, read
213
215
  works the findings with you. For other agents the universal interface is unchanged: run the CLI, read
214
216
  `AGENT-BRIEFING.md`.
215
217
 
218
+ **Install gotchas (field-tested):**
219
+
220
+ - The install id is `plugin@marketplace` — `websec-validator@websec-plugins` (the marketplace name
221
+ from `.claude-plugin/marketplace.json`), **not** `@websec-validator` (the repo).
222
+ - The plugin only delivers the *instructions*; the actual scanning is a **separate Python CLI**
223
+ (`websec`). The skill's Step 0 installs it (`pipx install websec-validator`) if it's missing.
224
+ - **`/plugin …` only works in the terminal CLI.** In the Claude **app / Agent SDK** (no `/plugin`),
225
+ configure it in `.claude/settings.json` instead:
226
+ ```json
227
+ {
228
+ "extraKnownMarketplaces": {
229
+ "websec-plugins": { "source": { "source": "github", "repo": "raccioly/websec-validator" } }
230
+ },
231
+ "enabledPlugins": { "websec-validator@websec-plugins": true }
232
+ }
233
+ ```
234
+ This **registers + enables** the plugin but does **not** auto-fetch it — the first download still
235
+ needs the CLI (`/plugin install websec-validator@websec-plugins`) once. (Project `.claude/settings.json`
236
+ for a team; `~/.claude/settings.json` for just you.)
237
+
216
238
  ## Credits
217
239
 
218
- Methodology + probe library come from a real authenticated pentest pass
219
- ([`base-research/REPLICATION-PLAYBOOK.md`](base-research/REPLICATION-PLAYBOOK.md), not committed).
220
- This tool productizes that hand-written pass into something an AI agent can run on any repo.
240
+ Methodology + probe library are distilled from a real authenticated penetration-testing pass.
241
+ This tool productizes that hand-written methodology into something an AI agent can run on any repo.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "websec-validator"
7
- version = "0.2.1"
7
+ version = "0.2.3"
8
8
  description = "Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -164,6 +164,11 @@ Production source maps exposed: {client.get("production_source_maps", False)}
164
164
 
165
165
  Scanners available: {avail}
166
166
 
167
+ > ⚠️ The count below is **raw scanner output (pre-triage)** — expect mostly noise (vulnerable-looking
168
+ > patterns that are guarded, intended-public, or not exploitable). The **triaged, calibrated view** is the
169
+ > findings ledger in `REPORT.md` / `findings-ledger.json` — each finding there carries a `P(real)`. Start
170
+ > from the ledger and debate-verify; don't report these raw counts as vulnerabilities.
171
+
167
172
  {findings_block}
168
173
 
169
174
  Install for fuller coverage:
@@ -111,7 +111,7 @@ def cmd_run(args) -> int:
111
111
 
112
112
  # 3. probes: choose + stage
113
113
  chosen = probes.applicable(facts)
114
- manifest = probes.stage(chosen, out)
114
+ manifest = probes.stage(chosen, out, facts)
115
115
  print(f"\n staged {len([m for m in manifest if 'attack_class' in m])} tailored probe template(s) → {out / 'probes'}")
116
116
 
117
117
  # 4. traceable findings ledger (recon + static; dynamic merges in via `websec dynamic`)
@@ -156,12 +156,16 @@ def cmd_dynamic(args) -> int:
156
156
  dyn = dynamic.run_unauth(args.target, facts_path, out, probe_writes=args.probe_writes)
157
157
  u = dyn["unauth_reachability"]
158
158
  print(f" target: {u['target']} · → {u['summary']}")
159
+ if u.get("warning"):
160
+ print(f"\n {u['warning']}\n")
159
161
  for r in u["results"]:
160
162
  mark = "🔓" if r["verdict"] == "OPEN-no-auth" else (" ·" if r["verdict"] == "protected" else " ")
161
163
  print(f" {mark} {str(r['status']):>4} {r['verdict']:26} {r['path']}")
162
164
  if args.probe_writes:
163
165
  w = dyn["write_auth_enforcement"]
164
166
  print(f"\n write-verb auth enforcement → {w['summary']}")
167
+ if w.get("warning"):
168
+ print(f"\n {w['warning']}\n")
165
169
  for r in w["results"]:
166
170
  mark = "🔓" if r["verdict"] != "auth-enforced" and not r["verdict"].startswith("http-") else " ·"
167
171
  print(f" {mark} {str(r['status']):>4} {r['verdict']:42} {r['method']} {r['path']}")
@@ -138,6 +138,19 @@ SIDE_EFFECTING = re.compile(
138
138
  r"sponsor-post|upload|/refresh|/rebuild|/process|/dispatch|/import|/export|/scrape(?![\w-])", re.I)
139
139
 
140
140
 
141
+ # When NOTHING enforces auth, the likeliest cause in a test env is a fail-OPEN auth
142
+ # provider (unconfigured/erroring), not "the app has no auth". Say so loudly — a naive
143
+ # read of all-200s as "wide open" is a catastrophic false positive.
144
+ FAIL_OPEN_WARNING = (
145
+ "⚠ NO endpoint enforced auth (none returned 401/403). Before concluding authentication is missing, "
146
+ "RULE OUT a fail-OPEN test environment: an unconfigured or erroring auth provider "
147
+ "(Cognito/Auth0/NextAuth/…) can let every request through. Configure a valid (even dummy) provider, or "
148
+ "mock a session, and RE-RUN — if these flip to 401, the app is fine and the env was the bug. Until an "
149
+ "auth-enforced response is observed, treat ALL authN/authZ results here as UNTRUSTWORTHY. (If it stays "
150
+ "open WITH a working provider, that's a real finding: the middleware should fail CLOSED — deny on auth error.)"
151
+ )
152
+
153
+
141
154
  def unauth_reachability(target: str, facts: dict, max_endpoints: int = 50) -> dict:
142
155
  """STRICT read-only: GET each genuine data-read endpoint with NO auth, to see
143
156
  which are reachable unauthenticated. Skips side-effecting GETs and any path
@@ -170,6 +183,8 @@ def unauth_reachability(target: str, facts: dict, max_endpoints: int = 50) -> di
170
183
  results.append({"path": path, "status": code, "bytes": n, "verdict": verdict})
171
184
 
172
185
  openish = [r for r in results if r["verdict"] == "OPEN-no-auth"]
186
+ protected = [r for r in results if r["verdict"] in ("protected", "redirect (likely to login)")]
187
+ fail_open = len(results) >= 3 and not protected and bool(openish)
173
188
  return {
174
189
  "target": target,
175
190
  "mode": "STRICT read-only · unauthenticated · GET-only · side-effecting paths skipped",
@@ -177,8 +192,12 @@ def unauth_reachability(target: str, facts: dict, max_endpoints: int = 50) -> di
177
192
  "skipped_side_effecting": sorted(set(skipped)),
178
193
  "open_no_auth": openish,
179
194
  "results": results,
195
+ "fail_open_suspected": fail_open,
196
+ "authn_trustworthy": not fail_open,
197
+ "warning": FAIL_OPEN_WARNING if fail_open else "",
180
198
  "summary": f"{len(openish)}/{len(results)} data-read GET endpoints reachable WITHOUT auth"
181
- + (" — review whether these should be public" if openish else " — all gated"),
199
+ + (" — review whether these should be public" if openish else " — all gated")
200
+ + (" · ⚠ FAIL-OPEN SUSPECTED (nothing enforced auth — results untrustworthy)" if fail_open else ""),
182
201
  }
183
202
 
184
203
 
@@ -216,6 +235,7 @@ def write_auth_enforcement(target: str, facts: dict, max_endpoints: int = 80) ->
216
235
  missing = [r for r in results if r["verdict"] != "auth-enforced" and not r["verdict"].startswith("http-")]
217
236
  executed = [r for r in results if r["verdict"] == "EXECUTED-UNAUTH"]
218
237
  enforced = sum(1 for r in results if r["verdict"] == "auth-enforced")
238
+ fail_open = len(results) >= 3 and enforced == 0
219
239
  return {
220
240
  "note": "Heuristic: a protected route returns 401/403 BEFORE validation; a 400/404 unauth means "
221
241
  "the request reached the handler with no auth gate. VERIFY each — but inconsistency vs "
@@ -225,8 +245,12 @@ def write_auth_enforcement(target: str, facts: dict, max_endpoints: int = 80) ->
225
245
  "no_auth_gate": missing,
226
246
  "executed_unauth": executed,
227
247
  "results": results,
248
+ "fail_open_suspected": fail_open,
249
+ "authn_trustworthy": not fail_open,
250
+ "warning": FAIL_OPEN_WARNING if fail_open else "",
228
251
  "summary": f"{enforced}/{len(results)} write endpoints enforce auth · "
229
- f"{len(missing)} reached with no auth gate · {len(executed)} executed unauthenticated",
252
+ f"{len(missing)} reached with no auth gate · {len(executed)} executed unauthenticated"
253
+ + (" · ⚠ FAIL-OPEN SUSPECTED — results untrustworthy" if fail_open else ""),
230
254
  }
231
255
 
232
256
 
@@ -47,7 +47,7 @@ class AuthExtractor(Extractor):
47
47
 
48
48
  # Detect ALL schemes present, then pick a primary by priority. A JWT app
49
49
  # that also wires Passport for SSO must read as primary=jwt, not passport
50
- # (the bug the WhatsApp app exposed). Priority: nextauth > jwt > session > passport > api-key.
50
+ # (Passport is often SSO-only). Priority: nextauth > jwt > session > passport > api-key.
51
51
  detected = []
52
52
  if nextauth:
53
53
  detected.append("nextauth (session JWT in cookie)")
@@ -34,6 +34,14 @@ GLOBAL_AUTH = re.compile(
34
34
  r"app\.use\s*\(\s*[\w.]*(?:authenticate|requireAuth|authMiddleware|verifyToken|"
35
35
  r"isAuthenticated|jwtMiddleware|ensureAuth)\w*\s*\)", re.I)
36
36
 
37
+ # Does a Next.js middleware/proxy file actually enforce AUTH (vs. i18n/headers only)?
38
+ # `auth((req)=>…)` / `withAuth` / `req.auth` / getToken / getServerSession / redirect-to-login /
39
+ # a 401 / Clerk / Supabase updateSession all signal a global auth gate.
40
+ MW_AUTH = re.compile(
41
+ r"\bauth\s*\(|withAuth\b|req\.auth\b|getToken\s*\(|getServerSession\s*\(|clerkMiddleware|"
42
+ r"updateSession\s*\(|NextResponse\.redirect\([^)]*(?:login|signin)|status:\s*401|"
43
+ r"['\"]Authentication required['\"]", re.I)
44
+
37
45
  PUBLIC_HINT = re.compile(
38
46
  r"/(login|logout|register|signup|signin|health|healthz|ping|status|webhooks?|"
39
47
  r"public|\.well-known|robots|favicon|sitemap|callback|refresh|csrf|metrics)\b", re.I)
@@ -46,15 +54,20 @@ ROLE = re.compile(
46
54
 
47
55
 
48
56
  def _parse_next_middleware(ctx: RepoContext) -> dict:
49
- for cand in ("middleware.ts", "middleware.js", "src/middleware.ts", "src/middleware.js"):
57
+ # Next 15.5+/16 renamed `middleware.ts` `proxy.ts` (both filenames are valid; the
58
+ # framework recognizes either). Missing this made the tool report "no global auth" on
59
+ # Next 16 apps and flag every handler — the single biggest false-positive cluster.
60
+ for cand in ("middleware.ts", "middleware.js", "src/middleware.ts", "src/middleware.js",
61
+ "proxy.ts", "proxy.js", "src/proxy.ts", "src/proxy.js"):
50
62
  txt = ctx.manifest(cand)
51
63
  if not txt:
52
64
  continue
53
65
  matchers = re.findall(r"matcher\s*:\s*\[([^\]]*)\]", txt)
54
66
  patterns = re.findall(r"['\"]([^'\"]+)['\"]", matchers[0]) if matchers else []
55
67
  roles = [m for grp in ROLE.findall(txt) for m in grp if m]
56
- return {"present": True, "file": cand, "matchers": patterns, "role_checks": roles}
57
- return {"present": False, "matchers": []}
68
+ return {"present": True, "file": cand, "matchers": patterns,
69
+ "is_auth": bool(MW_AUTH.search(txt)), "role_checks": roles}
70
+ return {"present": False, "matchers": [], "is_auth": False}
58
71
 
59
72
 
60
73
  def _matcher_covers(path: str, matchers: list) -> bool:
@@ -85,8 +98,10 @@ class AuthzExtractor(Extractor):
85
98
  def extract(self, ctx: RepoContext, facts: dict) -> dict:
86
99
  endpoints = (facts.get("routes") or {}).get("endpoints", [])
87
100
  mw = _parse_next_middleware(ctx)
101
+ mw_auth = mw.get("is_auth", False)
88
102
 
89
- global_auth = any(GLOBAL_AUTH.search(t) for _p, _r, t in ctx.iter_code())
103
+ # global auth = an Express path-less auth middleware OR a Next auth middleware/proxy
104
+ global_auth = mw_auth or any(GLOBAL_AUTH.search(t) for _p, _r, t in ctx.iter_code())
90
105
  roles: set = set(mw.get("role_checks", []))
91
106
  protected = no_guard = unknown = 0
92
107
  no_guard_writes, egs = [], []
@@ -95,7 +110,10 @@ class AuthzExtractor(Extractor):
95
110
  cp = e.get("code_path", "")
96
111
  text = ctx.text(Path(cp)) if cp else ""
97
112
  _collect_roles(text, roles)
98
- guarded = bool(text and GUARD.search(text)) or _matcher_covers(e.get("path", ""), mw.get("matchers", []))
113
+ # a matcher only counts as a guard when the middleware actually does auth — a
114
+ # non-auth middleware.ts (i18n/headers) must NOT mark routes protected.
115
+ guarded = bool(text and GUARD.search(text)) or \
116
+ (mw_auth and _matcher_covers(e.get("path", ""), mw.get("matchers", [])))
99
117
  relcp = ctx.rel(Path(cp)) if cp else ""
100
118
  egs.append({"method": e.get("method"), "path": e.get("path"), "code_path": relcp,
101
119
  "guarded": bool(guarded), "analyzed": bool(text),
@@ -110,10 +128,12 @@ class AuthzExtractor(Extractor):
110
128
  no_guard_writes.append(f"{e['method']} {e['path']} ({relcp or '?'})")
111
129
 
112
130
  if global_auth:
113
- note = ("A GLOBAL auth middleware (`app.use(<auth>)`) was detected — most routes are likely "
114
- "protected by default. The list below is write endpoints with NO guard visible in their "
115
- "own handler file; they MAY be covered globally. Verify each is either covered or an "
116
- "intentional public exemption don't assume they're vulnerable.")
131
+ where = f"`{mw['file']}` (matcher {mw.get('matchers') or '—'})" if mw_auth else "`app.use(<auth>)`"
132
+ note = (f"A GLOBAL auth middleware ({where}) was detected most routes are protected by default. "
133
+ "Endpoints its matcher covers are reported as guarded (defense-in-depth handled centrally). "
134
+ "Any list below is write endpoints with NO guard visible in their own handler file AND not "
135
+ "covered by the matcher; verify each is either covered or an intentional public exemption — "
136
+ "don't assume they're vulnerable.")
117
137
  else:
118
138
  note = ("No global auth middleware detected. Write endpoints with no visible guard are "
119
139
  "high-signal missing-authz leads — verify each.")
@@ -111,6 +111,11 @@ def build_ledger(facts: dict, unified: dict | None, dynamic: dict | None = None,
111
111
  ((dynamic or {}).get("write_auth_enforcement", {}) or {}).get("results", [])}
112
112
  dyn_get = {r["path"]: r for r in
113
113
  ((dynamic or {}).get("unauth_reachability", {}) or {}).get("results", [])}
114
+ # If the dynamic run suspects a fail-OPEN test env, its unauth "successes" are untrustworthy —
115
+ # do NOT escalate them to CRITICAL (the catastrophic-false-positive trap). Fall back to the
116
+ # recon-level hypothesis with a caveat until the operator re-runs with auth resolving.
117
+ dyn_fail_open = bool(((dynamic or {}).get("write_auth_enforcement", {}) or {}).get("fail_open_suspected")
118
+ or ((dynamic or {}).get("unauth_reachability", {}) or {}).get("fail_open_suspected"))
114
119
  for eg in authz.get("endpoint_guards", []):
115
120
  if eg.get("guarded") or eg.get("public_hint") or not eg.get("analyzed"):
116
121
  continue
@@ -121,7 +126,12 @@ def build_ledger(facts: dict, unified: dict | None, dynamic: dict | None = None,
121
126
  dv = dyn_write.get((m, p)) or dyn_get.get(p)
122
127
  if dv:
123
128
  verdict = dv.get("verdict", "")
124
- if "EXECUTED-UNAUTH" in verdict:
129
+ if dyn_fail_open and verdict not in ("auth-enforced", "protected"):
130
+ ev.append({"layer": "dynamic", "detail": f"reached unauthenticated (HTTP {dv.get('status')}) — "
131
+ "BUT fail-open suspected (auth not resolving in the test env); UNTRUSTWORTHY, "
132
+ "re-run with a working auth provider before trusting this"})
133
+ # keep recon-level conf/sev; do not escalate
134
+ elif "EXECUTED-UNAUTH" in verdict:
125
135
  ev.append({"layer": "dynamic", "detail": f"{m} executed UNAUTHENTICATED (HTTP {dv.get('status')})"})
126
136
  conf, sev = "HIGH", "CRITICAL"
127
137
  elif "no-auth-gate" in verdict or verdict == "OPEN-no-auth":
@@ -151,11 +161,22 @@ def build_ledger(facts: dict, unified: dict | None, dynamic: dict | None = None,
151
161
  [{"layer": "static", "detail": f"{'+'.join(t.get('tools', []))}: {t.get('title','')}"}]))
152
162
 
153
163
  # ---- 3. Attack-surface sinks (recon hypotheses) ----
164
+ # On a purely-NoSQL datastore, classic SQL-injection alerts are almost always FPs —
165
+ # down-rank them (the inflation the field test flagged) rather than ranking them MEDIUM.
166
+ _ds = {d.lower() for d in (facts.get("stack", {}).get("datastores") or [])}
167
+ _nosql = {"dynamodb", "dynamo", "mongodb", "mongo", "firestore", "cosmos", "cosmosdb", "couchdb", "cassandra"}
168
+ _sql = {"postgres", "postgresql", "mysql", "mariadb", "sqlite", "mssql", "sqlserver", "aurora", "oracle", "cockroach"}
169
+ is_nosql_only = bool(_ds & _nosql) and not (_ds & _sql)
154
170
  for cls, info in (facts.get("surface", {}).get("sinks", {}) or {}).items():
171
+ sev = "MEDIUM"
172
+ ev = [{"layer": "recon", "detail": f"user-input-gated {cls} in {info.get('count')} file(s)"}]
173
+ if cls in ("sqli", "sql-injection") and is_nosql_only:
174
+ sev = "LOW"
175
+ ev.append({"layer": "recon", "detail": f"datastore is {', '.join(sorted(_ds)) or 'NoSQL'} — "
176
+ "classic SQLi is unlikely here; check for NoSQL injection instead (usually a false positive)"})
155
177
  out.append(_f(f"{cls} sink ({info.get('count')} site(s))", "attack-surface",
156
- cls if cls in STANDARDS else "sast", "MEDIUM", "LOW",
157
- (info.get("files") or ["?"])[0],
158
- [{"layer": "recon", "detail": f"user-input-gated {cls} in {info.get('count')} file(s)"}]))
178
+ cls if cls in STANDARDS else "sast", sev, "LOW",
179
+ (info.get("files") or ["?"])[0], ev))
159
180
 
160
181
  # ---- 4. Client-side secret exposure (HIGH — ships to browser) ----
161
182
  for leak in (facts.get("client_exposure", {}).get("public_secret_leaks", []) +
@@ -0,0 +1,161 @@
1
+ """Stage the probe library, tailored to the extracted attack surface.
2
+
3
+ Probe selection is driven by the real recon facts. Staging now also writes a
4
+ `probe-context.json` (the target's REAL routes/auth/sensitive-fields/tenant key,
5
+ from FACTS) next to the probes, prepends a "this is a draft — your surface is in
6
+ probe-context.json" banner to each, and records the real per-probe target endpoints
7
+ in the manifest — so the staged probes describe *this* app, not the reference app
8
+ the templates were authored against.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ from importlib import resources
15
+ from pathlib import Path
16
+
17
+ WRITE_VERBS = ("POST", "PUT", "PATCH", "DELETE")
18
+
19
+ # label -> (filename, attack class, what the agent must supply)
20
+ PROBES = {
21
+ "unauth-baseline": ("unauth-baseline.sh", "Missing authentication (no-creds baseline)",
22
+ "just the target base URL — it reads the routes from probe-context.json"),
23
+ "bola-cross-tenant": ("bola-cross-tenant.sh", "BOLA / cross-tenant read (OWASP API #1)",
24
+ "two role tokens in different tenants + the IDOR-candidate routes"),
25
+ "bola-write-verbs": ("bola-write-verbs.py", "BOLA on PATCH/PUT/POST/DELETE",
26
+ "two role tokens + the write endpoints + a sample object id per tenant"),
27
+ "mass-assignment": ("mass-assignment.py", "BOPLA / mass assignment (OWASP API #3)",
28
+ "a low-priv token + a write endpoint that updates a record"),
29
+ "jwt-attacks": ("jwt-attacks.sh", "JWT: alg:none, tamper, expiry, replay",
30
+ "a valid token + the login + a protected endpoint"),
31
+ "hs256-brute-force": ("hs256-brute-force.py", "Offline HS256 weak-secret brute",
32
+ "one HS256 JWT (offline — no live app needed)"),
33
+ "ssrf-probes": ("ssrf-probes.sh", "SSRF: IMDS / RFC1918 / file://",
34
+ "an authorized token + the SSRF-candidate endpoints/params"),
35
+ "race-conditions": ("race-conditions.py", "Race / claim-collision invariants",
36
+ "a token + an endpoint with a single-winner invariant + an idempotency key"),
37
+ "webhook-forgery": ("webhook-forgery.py", "Inbound webhook signature/replay",
38
+ "the webhook path + signature header name + scheme"),
39
+ "rate-limit-burst": ("rate-limit-burst.sh", "Rate-limit + X-Forwarded-For bypass",
40
+ "the login + a rate-limited endpoint"),
41
+ "compare-roles": ("compare-roles.sh", "Two-role DAST surface diff",
42
+ "two SARIF reports from a role-A and role-B scan (dynamic phase)"),
43
+ "dlp-bypass-offline": ("dlp-bypass-offline.py", "DLP/detection regex encoding bypass",
44
+ "your DLP/redaction regexes (offline)"),
45
+ "s3-assess": ("s3-assess.sh", "S3 bucket posture", "a bucket name + AWS creds"),
46
+ }
47
+
48
+ # unauth-baseline is ALWAYS staged: it's the cheapest probe and directly exercises the
49
+ # #1 lead class (missing authentication) — the one a no-creds run can confirm immediately.
50
+ ALWAYS = ["unauth-baseline", "jwt-attacks", "hs256-brute-force", "rate-limit-burst"]
51
+
52
+ # which targeting bucket each probe should be pointed at (for the manifest's real targets)
53
+ _TARGET_KEYS = {
54
+ "unauth-baseline": "write_endpoints",
55
+ "bola-write-verbs": "write_endpoints",
56
+ "mass-assignment": "write_endpoints",
57
+ "bola-cross-tenant": "idor_candidates",
58
+ "ssrf-probes": "ssrf_candidates",
59
+ "webhook-forgery": "write_endpoints",
60
+ }
61
+
62
+ _BANNER = (
63
+ "# ─────────────────────────────────────────────────────────────────────────────\n"
64
+ "# websec-validator — DRAFT probe. Any example endpoints / auth / login below are\n"
65
+ "# PLACEHOLDERS from the template. THIS target's real surface — routes, auth scheme\n"
66
+ "# + token location, sensitive fields, tenant key — is in ./probe-context.json\n"
67
+ "# (generated from FACTS.json for this app). Use those values before running; the\n"
68
+ "# agent should finalize this draft against probe-context.json, then fill secrets.\n"
69
+ "# ─────────────────────────────────────────────────────────────────────────────\n"
70
+ )
71
+
72
+
73
+ def applicable(facts: dict) -> list:
74
+ """Pick probes the extracted surface actually justifies."""
75
+ chosen = list(ALWAYS)
76
+ targeting = (facts.get("routes") or {}).get("targeting", {})
77
+ tenant = (facts.get("tenant") or {}).get("candidates")
78
+
79
+ if targeting.get("write_endpoints"):
80
+ chosen += ["mass-assignment"]
81
+ if tenant:
82
+ chosen += ["bola-cross-tenant", "bola-write-verbs", "compare-roles"]
83
+ if targeting.get("ssrf_candidates") or (facts.get("surface") or {}).get("sinks", {}).get("ssrf-outbound-http"):
84
+ chosen += ["ssrf-probes"]
85
+ if targeting.get("write_endpoints"):
86
+ chosen += ["webhook-forgery", "race-conditions"]
87
+
88
+ seen, ordered = set(), []
89
+ for k in chosen:
90
+ if k in PROBES and k not in seen:
91
+ seen.add(k)
92
+ ordered.append(k)
93
+ return ordered
94
+
95
+
96
+ def build_context(facts: dict) -> dict:
97
+ """The target's real, probe-ready surface — written to probe-context.json."""
98
+ routes = facts.get("routes") or {}
99
+ tgt = routes.get("targeting", {})
100
+ auth = facts.get("auth") or {}
101
+ writes = [f"{e.get('method')} {e.get('path')}" for e in routes.get("endpoints", [])
102
+ if e.get("method") in WRITE_VERBS][:80]
103
+ return {
104
+ "target_base_url": "FILL_ME (e.g. http://localhost:3000)",
105
+ "auth": {
106
+ "scheme": auth.get("scheme"),
107
+ "token_location": auth.get("token_location"),
108
+ "login_endpoints": tgt.get("auth_endpoints", [])[:10],
109
+ "how_to_authenticate": "cookie-session (e.g. NextAuth) → send the session cookie; "
110
+ "bearer → Authorization: Bearer <jwt>; api-key → the documented key header",
111
+ },
112
+ "endpoints": {
113
+ "writes": writes,
114
+ "idor_candidates": tgt.get("idor_candidates", [])[:60],
115
+ "ssrf_candidates": tgt.get("ssrf_candidates", [])[:40],
116
+ "upload_candidates": tgt.get("upload_candidates", [])[:40],
117
+ "auth_endpoints": tgt.get("auth_endpoints", [])[:20],
118
+ },
119
+ "sensitive_fields": (facts.get("schemas") or {}).get("sensitive_fields", []),
120
+ "tenant_keys": [c.get("key") for c in (facts.get("tenant") or {}).get("candidates", [])][:5],
121
+ "datastore_class": (facts.get("surface") or {}).get("datastore_class"),
122
+ "note": "These are THIS app's real routes/auth (from FACTS.json). Finalize each probe draft "
123
+ "against this file, supply secrets/tokens, then run against a TEST instance only.",
124
+ }
125
+
126
+
127
+ def stage(chosen: list, outdir: Path, facts: dict | None = None) -> list:
128
+ dest = outdir / "probes"
129
+ dest.mkdir(parents=True, exist_ok=True)
130
+ facts = facts or {}
131
+
132
+ ctx = build_context(facts)
133
+ (dest / "probe-context.json").write_text(json.dumps(ctx, indent=2) + "\n")
134
+ tgt = (facts.get("routes") or {}).get("targeting", {})
135
+
136
+ manifest = [{"key": "_context", "file": "probes/probe-context.json",
137
+ "note": "the target's real routes/auth/fields — finalize the drafts against this"}]
138
+ src_root = resources.files("websec_validator").joinpath("templates/probes")
139
+ # always ship the shared helper the Python probes import (load context + env auth)
140
+ try:
141
+ (dest / "_lib.py").write_text(src_root.joinpath("_lib.py").read_text())
142
+ except Exception:
143
+ pass
144
+ for key in chosen:
145
+ fname, attack, needs = PROBES[key]
146
+ targets = (tgt.get(_TARGET_KEYS[key], []) if key in _TARGET_KEYS else [])[:15]
147
+ try:
148
+ body = src_root.joinpath(fname).read_bytes()
149
+ # prepend the draft banner after any shebang line
150
+ text = body.decode("utf-8", "replace")
151
+ if text.startswith("#!"):
152
+ shebang, _, rest = text.partition("\n")
153
+ text = f"{shebang}\n{_BANNER}{rest}"
154
+ else:
155
+ text = _BANNER + text
156
+ (dest / fname).write_text(text)
157
+ manifest.append({"key": key, "file": f"probes/{fname}", "attack_class": attack,
158
+ "agent_must_supply": needs, "targets": targets})
159
+ except Exception as e:
160
+ manifest.append({"key": key, "file": fname, "status": f"stage-error: {e}"})
161
+ return manifest
@@ -71,7 +71,8 @@ def render(facts: dict, scanners: dict, scan_results: list, unified: dict | None
71
71
  | Endpoints | **{routes.get('count', 0)}** (via {routes.get('engine','?').split(' ')[0]}) |
72
72
  | Auth | {facts.get('auth', {}).get('scheme','?')} · roles: {', '.join(authz.get('roles_detected', [])) or 'none'} |
73
73
  | Access control | {gs.get('with_visible_guard', 0)} guarded · **{gs.get('no_visible_guard', 0)} no visible guard** · global-middleware: {authz.get('global_auth_middleware', False)} |
74
- | Findings (ledger) | {ledger_hdr} |
74
+ | Static scanner (raw, pre-triage) | {sev_line} |
75
+ | **Findings ledger** (triaged + calibrated) | {ledger_hdr} |
75
76
  | Attack surface | IDOR: {len(tgt.get('idor_candidates', []))} · SSRF: {len(tgt.get('ssrf_candidates', []))} · upload: {len(tgt.get('upload_candidates', []))} · writes: {len(tgt.get('write_endpoints', []))} |
76
77
 
77
78
  ## 1. Findings ledger (ranked · evidence chain · standards · confidence)