websec-validator 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {websec_validator-0.2.1/src/websec_validator.egg-info → websec_validator-0.2.2}/PKG-INFO +31 -10
  2. {websec_validator-0.2.1 → websec_validator-0.2.2}/README.md +30 -9
  3. {websec_validator-0.2.1 → websec_validator-0.2.2}/pyproject.toml +1 -1
  4. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/briefing.py +5 -0
  5. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/cli.py +5 -1
  6. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/dynamic.py +26 -2
  7. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/extractors/authz.py +29 -9
  8. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/findings.py +25 -4
  9. websec_validator-0.2.2/src/websec_validator/probes.py +156 -0
  10. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/report.py +2 -1
  11. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/scanners.py +42 -11
  12. websec_validator-0.2.2/src/websec_validator/templates/probes/unauth-baseline.sh +44 -0
  13. {websec_validator-0.2.1 → websec_validator-0.2.2/src/websec_validator.egg-info}/PKG-INFO +31 -10
  14. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator.egg-info/SOURCES.txt +1 -0
  15. {websec_validator-0.2.1 → websec_validator-0.2.2}/tests/test_recon.py +86 -1
  16. websec_validator-0.2.1/src/websec_validator/probes.py +0 -79
  17. {websec_validator-0.2.1 → websec_validator-0.2.2}/LICENSE +0 -0
  18. {websec_validator-0.2.1 → websec_validator-0.2.2}/setup.cfg +0 -0
  19. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/__init__.py +0 -0
  20. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/calibration.json +0 -0
  21. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/calibration.py +0 -0
  22. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/constitution.py +0 -0
  23. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/corpus.json +0 -0
  24. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/extractors/__init__.py +0 -0
  25. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/extractors/auth.py +0 -0
  26. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/extractors/base.py +0 -0
  27. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/extractors/client_exposure.py +0 -0
  28. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/extractors/graphql.py +0 -0
  29. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/extractors/iac_ci.py +0 -0
  30. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/extractors/integrations.py +0 -0
  31. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/extractors/routes.py +0 -0
  32. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/extractors/schemas.py +0 -0
  33. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/extractors/stack.py +0 -0
  34. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/extractors/surface.py +0 -0
  35. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/extractors/tenant.py +0 -0
  36. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/proof.py +0 -0
  37. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/recon.py +0 -0
  38. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/templates/probes/bola-cross-tenant.sh +0 -0
  39. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/templates/probes/bola-write-verbs.py +0 -0
  40. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/templates/probes/compare-roles.sh +0 -0
  41. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/templates/probes/dlp-bypass-offline.py +0 -0
  42. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/templates/probes/hs256-brute-force.py +0 -0
  43. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/templates/probes/jwt-attacks.sh +0 -0
  44. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/templates/probes/mass-assignment.py +0 -0
  45. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/templates/probes/race-conditions.py +0 -0
  46. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/templates/probes/rate-limit-burst.sh +0 -0
  47. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/templates/probes/s3-assess.sh +0 -0
  48. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/templates/probes/ssrf-probes.sh +0 -0
  49. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/templates/probes/webhook-forgery.py +0 -0
  50. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/templates/reports/FINDINGS-SUMMARY.md.template +0 -0
  51. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/templates/reports/access-control-matrix.md.template +0 -0
  52. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/templates/reports/findings-triage.md.template +0 -0
  53. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/templates/reports/pentest-handover-brief.md.template +0 -0
  54. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator/templates/reports/per-tool-FINDINGS.md.template +0 -0
  55. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator.egg-info/dependency_links.txt +0 -0
  56. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator.egg-info/entry_points.txt +0 -0
  57. {websec_validator-0.2.1 → websec_validator-0.2.2}/src/websec_validator.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: websec-validator
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app.
5
5
  Author: Ricardo Accioly
6
6
  License: MIT
@@ -21,7 +21,7 @@ Dynamic: license-file
21
21
  It is *not* an autonomous scanner and *not* a SaaS. It's the missing front-half: the thing that
22
22
  turns a repo into a precise, fact-grounded security brief an AI agent (with a human in the loop)
23
23
  can act on — an auto-filled, repo-aware version of a senior pentester's "here's what to test and
24
- how" handoff. Full landscape + why this niche is real: [`MARKET-ANALYSIS-AND-VERDICT.md`](MARKET-ANALYSIS-AND-VERDICT.md).
24
+ how" handoff. How it works + the reasoning behind every check: [`docs/METHODOLOGY.md`](docs/METHODOLOGY.md).
25
25
 
26
26
  ## Quickstart — just point it at your repo
27
27
 
@@ -37,7 +37,7 @@ local. The four ways to get there, all ending in the same `AGENT-BRIEFING.md` yo
37
37
  | **Tell your agent** (simplest) | — | say the line above |
38
38
  | **CLI** (a terminal) | `pipx install websec-validator` | `websec run /path/to/your/app` |
39
39
  | **Claude Code plugin** (slash) | `/plugin marketplace add raccioly/websec-validator` → `/plugin install websec-validator@websec-plugins` | invoke the **security-pass** skill, or just ask |
40
- | **Docker** (no install) | `docker build -t websec-validator .` | `docker run --rm -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out` |
40
+ | **Docker** (no install) | `docker build -t websec-validator .` | `docker run --rm --user "$(id -u):$(id -g)" -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out` |
41
41
 
42
42
  ➡️ **Want the reasoning behind every check?** Read **[docs/METHODOLOGY.md](docs/METHODOLOGY.md)** — what each test does and why.
43
43
 
@@ -63,7 +63,7 @@ No need to install Noir or any scanner — the image bundles them all (arch-awar
63
63
 
64
64
  ```bash
65
65
  docker build -t websec-validator .
66
- docker run --rm -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out
66
+ docker run --rm --user "$(id -u):$(id -g)" -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out
67
67
  ```
68
68
 
69
69
  The image carries Noir + Trivy + Gitleaks + Semgrep + Checkov; mount your repo at `/scan` and the
@@ -186,10 +186,12 @@ python3 -m unittest discover -s tests # stdlib only, no Noir/network — 23 t
186
186
  Published to PyPI via **Trusted Publishing** (OIDC — no API token in the repo). To cut a release:
187
187
 
188
188
  ```bash
189
- # 1. bump the version in pyproject.toml (e.g. 0.2.0 → 0.2.1)
189
+ # 1. bump the version in pyproject.toml (e.g. 0.2.1 → 0.2.2)
190
190
  # 2. tag it and push — the tag must match pyproject's version (CI verifies):
191
- git tag v0.2.1 && git push origin v0.2.1
192
- # → .github/workflows/publish.yml builds + publishes to PyPI
191
+ git tag v0.2.2 && git push origin v0.2.2
192
+ # → publish.yml builds, INSTALLS + smoke-tests the wheel (version match,
193
+ # calibration ships, a real `websec run`), then publishes. A bad build fails
194
+ # CI instead of reaching PyPI — so you never have to yank after the fact.
193
195
  ```
194
196
 
195
197
  One-time PyPI setup (before the first release): on pypi.org → **Account → Publishing → Add a pending
@@ -225,8 +227,27 @@ lets you just ask, in plain English, for a security pass: it runs `websec`, read
225
227
  works the findings with you. For other agents the universal interface is unchanged: run the CLI, read
226
228
  `AGENT-BRIEFING.md`.
227
229
 
230
+ **Install gotchas (field-tested):**
231
+
232
+ - The install id is `plugin@marketplace` — `websec-validator@websec-plugins` (the marketplace name
233
+ from `.claude-plugin/marketplace.json`), **not** `@websec-validator` (the repo).
234
+ - The plugin only delivers the *instructions*; the actual scanning is a **separate Python CLI**
235
+ (`websec`). The skill's Step 0 installs it (`pipx install websec-validator`) if it's missing.
236
+ - **`/plugin …` only works in the terminal CLI.** In the Claude **app / Agent SDK** (no `/plugin`),
237
+ configure it in `.claude/settings.json` instead:
238
+ ```json
239
+ {
240
+ "extraKnownMarketplaces": {
241
+ "websec-plugins": { "source": { "source": "github", "repo": "raccioly/websec-validator" } }
242
+ },
243
+ "enabledPlugins": { "websec-validator@websec-plugins": true }
244
+ }
245
+ ```
246
+ This **registers + enables** the plugin but does **not** auto-fetch it — the first download still
247
+ needs the CLI (`/plugin install websec-validator@websec-plugins`) once. (Project `.claude/settings.json`
248
+ for a team; `~/.claude/settings.json` for just you.)
249
+
228
250
  ## Credits
229
251
 
230
- Methodology + probe library come from a real authenticated pentest pass
231
- ([`base-research/REPLICATION-PLAYBOOK.md`](base-research/REPLICATION-PLAYBOOK.md), not committed).
232
- This tool productizes that hand-written pass into something an AI agent can run on any repo.
252
+ Methodology + probe library are distilled from a real authenticated penetration-testing pass.
253
+ This tool productizes that hand-written methodology into something an AI agent can run on any repo.
@@ -9,7 +9,7 @@
9
9
  It is *not* an autonomous scanner and *not* a SaaS. It's the missing front-half: the thing that
10
10
  turns a repo into a precise, fact-grounded security brief an AI agent (with a human in the loop)
11
11
  can act on — an auto-filled, repo-aware version of a senior pentester's "here's what to test and
12
- how" handoff. Full landscape + why this niche is real: [`MARKET-ANALYSIS-AND-VERDICT.md`](MARKET-ANALYSIS-AND-VERDICT.md).
12
+ how" handoff. How it works + the reasoning behind every check: [`docs/METHODOLOGY.md`](docs/METHODOLOGY.md).
13
13
 
14
14
  ## Quickstart — just point it at your repo
15
15
 
@@ -25,7 +25,7 @@ local. The four ways to get there, all ending in the same `AGENT-BRIEFING.md` yo
25
25
  | **Tell your agent** (simplest) | — | say the line above |
26
26
  | **CLI** (a terminal) | `pipx install websec-validator` | `websec run /path/to/your/app` |
27
27
  | **Claude Code plugin** (slash) | `/plugin marketplace add raccioly/websec-validator` → `/plugin install websec-validator@websec-plugins` | invoke the **security-pass** skill, or just ask |
28
- | **Docker** (no install) | `docker build -t websec-validator .` | `docker run --rm -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out` |
28
+ | **Docker** (no install) | `docker build -t websec-validator .` | `docker run --rm --user "$(id -u):$(id -g)" -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out` |
29
29
 
30
30
  ➡️ **Want the reasoning behind every check?** Read **[docs/METHODOLOGY.md](docs/METHODOLOGY.md)** — what each test does and why.
31
31
 
@@ -51,7 +51,7 @@ No need to install Noir or any scanner — the image bundles them all (arch-awar
51
51
 
52
52
  ```bash
53
53
  docker build -t websec-validator .
54
- docker run --rm -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out
54
+ docker run --rm --user "$(id -u):$(id -g)" -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out
55
55
  ```
56
56
 
57
57
  The image carries Noir + Trivy + Gitleaks + Semgrep + Checkov; mount your repo at `/scan` and the
@@ -174,10 +174,12 @@ python3 -m unittest discover -s tests # stdlib only, no Noir/network — 23 t
174
174
  Published to PyPI via **Trusted Publishing** (OIDC — no API token in the repo). To cut a release:
175
175
 
176
176
  ```bash
177
- # 1. bump the version in pyproject.toml (e.g. 0.2.0 → 0.2.1)
177
+ # 1. bump the version in pyproject.toml (e.g. 0.2.1 → 0.2.2)
178
178
  # 2. tag it and push — the tag must match pyproject's version (CI verifies):
179
- git tag v0.2.1 && git push origin v0.2.1
180
- # → .github/workflows/publish.yml builds + publishes to PyPI
179
+ git tag v0.2.2 && git push origin v0.2.2
180
+ # → publish.yml builds, INSTALLS + smoke-tests the wheel (version match,
181
+ # calibration ships, a real `websec run`), then publishes. A bad build fails
182
+ # CI instead of reaching PyPI — so you never have to yank after the fact.
181
183
  ```
182
184
 
183
185
  One-time PyPI setup (before the first release): on pypi.org → **Account → Publishing → Add a pending
@@ -213,8 +215,27 @@ lets you just ask, in plain English, for a security pass: it runs `websec`, read
213
215
  works the findings with you. For other agents the universal interface is unchanged: run the CLI, read
214
216
  `AGENT-BRIEFING.md`.
215
217
 
218
+ **Install gotchas (field-tested):**
219
+
220
+ - The install id is `plugin@marketplace` — `websec-validator@websec-plugins` (the marketplace name
221
+ from `.claude-plugin/marketplace.json`), **not** `@websec-validator` (the repo).
222
+ - The plugin only delivers the *instructions*; the actual scanning is a **separate Python CLI**
223
+ (`websec`). The skill's Step 0 installs it (`pipx install websec-validator`) if it's missing.
224
+ - **`/plugin …` only works in the terminal CLI.** In the Claude **app / Agent SDK** (no `/plugin`),
225
+ configure it in `.claude/settings.json` instead:
226
+ ```json
227
+ {
228
+ "extraKnownMarketplaces": {
229
+ "websec-plugins": { "source": { "source": "github", "repo": "raccioly/websec-validator" } }
230
+ },
231
+ "enabledPlugins": { "websec-validator@websec-plugins": true }
232
+ }
233
+ ```
234
+ This **registers + enables** the plugin but does **not** auto-fetch it — the first download still
235
+ needs the CLI (`/plugin install websec-validator@websec-plugins`) once. (Project `.claude/settings.json`
236
+ for a team; `~/.claude/settings.json` for just you.)
237
+
216
238
  ## Credits
217
239
 
218
- Methodology + probe library come from a real authenticated pentest pass
219
- ([`base-research/REPLICATION-PLAYBOOK.md`](base-research/REPLICATION-PLAYBOOK.md), not committed).
220
- This tool productizes that hand-written pass into something an AI agent can run on any repo.
240
+ Methodology + probe library are distilled from a real authenticated penetration-testing pass.
241
+ This tool productizes that hand-written methodology into something an AI agent can run on any repo.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "websec-validator"
7
- version = "0.2.1"
7
+ version = "0.2.2"
8
8
  description = "Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -164,6 +164,11 @@ Production source maps exposed: {client.get("production_source_maps", False)}
164
164
 
165
165
  Scanners available: {avail}
166
166
 
167
+ > ⚠️ The count below is **raw scanner output (pre-triage)** — expect mostly noise (vulnerable-looking
168
+ > patterns that are guarded, intended-public, or not exploitable). The **triaged, calibrated view** is the
169
+ > findings ledger in `REPORT.md` / `findings-ledger.json` — each finding there carries a `P(real)`. Start
170
+ > from the ledger and debate-verify; don't report these raw counts as vulnerabilities.
171
+
167
172
  {findings_block}
168
173
 
169
174
  Install for fuller coverage:
@@ -111,7 +111,7 @@ def cmd_run(args) -> int:
111
111
 
112
112
  # 3. probes: choose + stage
113
113
  chosen = probes.applicable(facts)
114
- manifest = probes.stage(chosen, out)
114
+ manifest = probes.stage(chosen, out, facts)
115
115
  print(f"\n staged {len([m for m in manifest if 'attack_class' in m])} tailored probe template(s) → {out / 'probes'}")
116
116
 
117
117
  # 4. traceable findings ledger (recon + static; dynamic merges in via `websec dynamic`)
@@ -156,12 +156,16 @@ def cmd_dynamic(args) -> int:
156
156
  dyn = dynamic.run_unauth(args.target, facts_path, out, probe_writes=args.probe_writes)
157
157
  u = dyn["unauth_reachability"]
158
158
  print(f" target: {u['target']} · → {u['summary']}")
159
+ if u.get("warning"):
160
+ print(f"\n {u['warning']}\n")
159
161
  for r in u["results"]:
160
162
  mark = "🔓" if r["verdict"] == "OPEN-no-auth" else (" ·" if r["verdict"] == "protected" else " ")
161
163
  print(f" {mark} {str(r['status']):>4} {r['verdict']:26} {r['path']}")
162
164
  if args.probe_writes:
163
165
  w = dyn["write_auth_enforcement"]
164
166
  print(f"\n write-verb auth enforcement → {w['summary']}")
167
+ if w.get("warning"):
168
+ print(f"\n {w['warning']}\n")
165
169
  for r in w["results"]:
166
170
  mark = "🔓" if r["verdict"] != "auth-enforced" and not r["verdict"].startswith("http-") else " ·"
167
171
  print(f" {mark} {str(r['status']):>4} {r['verdict']:42} {r['method']} {r['path']}")
@@ -138,6 +138,19 @@ SIDE_EFFECTING = re.compile(
138
138
  r"sponsor-post|upload|/refresh|/rebuild|/process|/dispatch|/import|/export|/scrape(?![\w-])", re.I)
139
139
 
140
140
 
141
+ # When NOTHING enforces auth, the likeliest cause in a test env is a fail-OPEN auth
142
+ # provider (unconfigured/erroring), not "the app has no auth". Say so loudly — a naive
143
+ # read of all-200s as "wide open" is a catastrophic false positive.
144
+ FAIL_OPEN_WARNING = (
145
+ "⚠ NO endpoint enforced auth (none returned 401/403). Before concluding authentication is missing, "
146
+ "RULE OUT a fail-OPEN test environment: an unconfigured or erroring auth provider "
147
+ "(Cognito/Auth0/NextAuth/…) can let every request through. Configure a valid (even dummy) provider, or "
148
+ "mock a session, and RE-RUN — if these flip to 401, the app is fine and the env was the bug. Until an "
149
+ "auth-enforced response is observed, treat ALL authN/authZ results here as UNTRUSTWORTHY. (If it stays "
150
+ "open WITH a working provider, that's a real finding: the middleware should fail CLOSED — deny on auth error.)"
151
+ )
152
+
153
+
141
154
  def unauth_reachability(target: str, facts: dict, max_endpoints: int = 50) -> dict:
142
155
  """STRICT read-only: GET each genuine data-read endpoint with NO auth, to see
143
156
  which are reachable unauthenticated. Skips side-effecting GETs and any path
@@ -170,6 +183,8 @@ def unauth_reachability(target: str, facts: dict, max_endpoints: int = 50) -> di
170
183
  results.append({"path": path, "status": code, "bytes": n, "verdict": verdict})
171
184
 
172
185
  openish = [r for r in results if r["verdict"] == "OPEN-no-auth"]
186
+ protected = [r for r in results if r["verdict"] in ("protected", "redirect (likely to login)")]
187
+ fail_open = len(results) >= 3 and not protected and bool(openish)
173
188
  return {
174
189
  "target": target,
175
190
  "mode": "STRICT read-only · unauthenticated · GET-only · side-effecting paths skipped",
@@ -177,8 +192,12 @@ def unauth_reachability(target: str, facts: dict, max_endpoints: int = 50) -> di
177
192
  "skipped_side_effecting": sorted(set(skipped)),
178
193
  "open_no_auth": openish,
179
194
  "results": results,
195
+ "fail_open_suspected": fail_open,
196
+ "authn_trustworthy": not fail_open,
197
+ "warning": FAIL_OPEN_WARNING if fail_open else "",
180
198
  "summary": f"{len(openish)}/{len(results)} data-read GET endpoints reachable WITHOUT auth"
181
- + (" — review whether these should be public" if openish else " — all gated"),
199
+ + (" — review whether these should be public" if openish else " — all gated")
200
+ + (" · ⚠ FAIL-OPEN SUSPECTED (nothing enforced auth — results untrustworthy)" if fail_open else ""),
182
201
  }
183
202
 
184
203
 
@@ -216,6 +235,7 @@ def write_auth_enforcement(target: str, facts: dict, max_endpoints: int = 80) ->
216
235
  missing = [r for r in results if r["verdict"] != "auth-enforced" and not r["verdict"].startswith("http-")]
217
236
  executed = [r for r in results if r["verdict"] == "EXECUTED-UNAUTH"]
218
237
  enforced = sum(1 for r in results if r["verdict"] == "auth-enforced")
238
+ fail_open = len(results) >= 3 and enforced == 0
219
239
  return {
220
240
  "note": "Heuristic: a protected route returns 401/403 BEFORE validation; a 400/404 unauth means "
221
241
  "the request reached the handler with no auth gate. VERIFY each — but inconsistency vs "
@@ -225,8 +245,12 @@ def write_auth_enforcement(target: str, facts: dict, max_endpoints: int = 80) ->
225
245
  "no_auth_gate": missing,
226
246
  "executed_unauth": executed,
227
247
  "results": results,
248
+ "fail_open_suspected": fail_open,
249
+ "authn_trustworthy": not fail_open,
250
+ "warning": FAIL_OPEN_WARNING if fail_open else "",
228
251
  "summary": f"{enforced}/{len(results)} write endpoints enforce auth · "
229
- f"{len(missing)} reached with no auth gate · {len(executed)} executed unauthenticated",
252
+ f"{len(missing)} reached with no auth gate · {len(executed)} executed unauthenticated"
253
+ + (" · ⚠ FAIL-OPEN SUSPECTED — results untrustworthy" if fail_open else ""),
230
254
  }
231
255
 
232
256
 
@@ -34,6 +34,14 @@ GLOBAL_AUTH = re.compile(
34
34
  r"app\.use\s*\(\s*[\w.]*(?:authenticate|requireAuth|authMiddleware|verifyToken|"
35
35
  r"isAuthenticated|jwtMiddleware|ensureAuth)\w*\s*\)", re.I)
36
36
 
37
+ # Does a Next.js middleware/proxy file actually enforce AUTH (vs. i18n/headers only)?
38
+ # `auth((req)=>…)` / `withAuth` / `req.auth` / getToken / getServerSession / redirect-to-login /
39
+ # a 401 / Clerk / Supabase updateSession all signal a global auth gate.
40
+ MW_AUTH = re.compile(
41
+ r"\bauth\s*\(|withAuth\b|req\.auth\b|getToken\s*\(|getServerSession\s*\(|clerkMiddleware|"
42
+ r"updateSession\s*\(|NextResponse\.redirect\([^)]*(?:login|signin)|status:\s*401|"
43
+ r"['\"]Authentication required['\"]", re.I)
44
+
37
45
  PUBLIC_HINT = re.compile(
38
46
  r"/(login|logout|register|signup|signin|health|healthz|ping|status|webhooks?|"
39
47
  r"public|\.well-known|robots|favicon|sitemap|callback|refresh|csrf|metrics)\b", re.I)
@@ -46,15 +54,20 @@ ROLE = re.compile(
46
54
 
47
55
 
48
56
  def _parse_next_middleware(ctx: RepoContext) -> dict:
49
- for cand in ("middleware.ts", "middleware.js", "src/middleware.ts", "src/middleware.js"):
57
+ # Next 15.5+/16 renamed `middleware.ts` `proxy.ts` (both filenames are valid; the
58
+ # framework recognizes either). Missing this made the tool report "no global auth" on
59
+ # Next 16 apps and flag every handler — the single biggest false-positive cluster.
60
+ for cand in ("middleware.ts", "middleware.js", "src/middleware.ts", "src/middleware.js",
61
+ "proxy.ts", "proxy.js", "src/proxy.ts", "src/proxy.js"):
50
62
  txt = ctx.manifest(cand)
51
63
  if not txt:
52
64
  continue
53
65
  matchers = re.findall(r"matcher\s*:\s*\[([^\]]*)\]", txt)
54
66
  patterns = re.findall(r"['\"]([^'\"]+)['\"]", matchers[0]) if matchers else []
55
67
  roles = [m for grp in ROLE.findall(txt) for m in grp if m]
56
- return {"present": True, "file": cand, "matchers": patterns, "role_checks": roles}
57
- return {"present": False, "matchers": []}
68
+ return {"present": True, "file": cand, "matchers": patterns,
69
+ "is_auth": bool(MW_AUTH.search(txt)), "role_checks": roles}
70
+ return {"present": False, "matchers": [], "is_auth": False}
58
71
 
59
72
 
60
73
  def _matcher_covers(path: str, matchers: list) -> bool:
@@ -85,8 +98,10 @@ class AuthzExtractor(Extractor):
85
98
  def extract(self, ctx: RepoContext, facts: dict) -> dict:
86
99
  endpoints = (facts.get("routes") or {}).get("endpoints", [])
87
100
  mw = _parse_next_middleware(ctx)
101
+ mw_auth = mw.get("is_auth", False)
88
102
 
89
- global_auth = any(GLOBAL_AUTH.search(t) for _p, _r, t in ctx.iter_code())
103
+ # global auth = an Express path-less auth middleware OR a Next auth middleware/proxy
104
+ global_auth = mw_auth or any(GLOBAL_AUTH.search(t) for _p, _r, t in ctx.iter_code())
90
105
  roles: set = set(mw.get("role_checks", []))
91
106
  protected = no_guard = unknown = 0
92
107
  no_guard_writes, egs = [], []
@@ -95,7 +110,10 @@ class AuthzExtractor(Extractor):
95
110
  cp = e.get("code_path", "")
96
111
  text = ctx.text(Path(cp)) if cp else ""
97
112
  _collect_roles(text, roles)
98
- guarded = bool(text and GUARD.search(text)) or _matcher_covers(e.get("path", ""), mw.get("matchers", []))
113
+ # a matcher only counts as a guard when the middleware actually does auth — a
114
+ # non-auth middleware.ts (i18n/headers) must NOT mark routes protected.
115
+ guarded = bool(text and GUARD.search(text)) or \
116
+ (mw_auth and _matcher_covers(e.get("path", ""), mw.get("matchers", [])))
99
117
  relcp = ctx.rel(Path(cp)) if cp else ""
100
118
  egs.append({"method": e.get("method"), "path": e.get("path"), "code_path": relcp,
101
119
  "guarded": bool(guarded), "analyzed": bool(text),
@@ -110,10 +128,12 @@ class AuthzExtractor(Extractor):
110
128
  no_guard_writes.append(f"{e['method']} {e['path']} ({relcp or '?'})")
111
129
 
112
130
  if global_auth:
113
- note = ("A GLOBAL auth middleware (`app.use(<auth>)`) was detected — most routes are likely "
114
- "protected by default. The list below is write endpoints with NO guard visible in their "
115
- "own handler file; they MAY be covered globally. Verify each is either covered or an "
116
- "intentional public exemption don't assume they're vulnerable.")
131
+ where = f"`{mw['file']}` (matcher {mw.get('matchers') or '—'})" if mw_auth else "`app.use(<auth>)`"
132
+ note = (f"A GLOBAL auth middleware ({where}) was detected most routes are protected by default. "
133
+ "Endpoints its matcher covers are reported as guarded (defense-in-depth handled centrally). "
134
+ "Any list below is write endpoints with NO guard visible in their own handler file AND not "
135
+ "covered by the matcher; verify each is either covered or an intentional public exemption — "
136
+ "don't assume they're vulnerable.")
117
137
  else:
118
138
  note = ("No global auth middleware detected. Write endpoints with no visible guard are "
119
139
  "high-signal missing-authz leads — verify each.")
@@ -111,6 +111,11 @@ def build_ledger(facts: dict, unified: dict | None, dynamic: dict | None = None,
111
111
  ((dynamic or {}).get("write_auth_enforcement", {}) or {}).get("results", [])}
112
112
  dyn_get = {r["path"]: r for r in
113
113
  ((dynamic or {}).get("unauth_reachability", {}) or {}).get("results", [])}
114
+ # If the dynamic run suspects a fail-OPEN test env, its unauth "successes" are untrustworthy —
115
+ # do NOT escalate them to CRITICAL (the catastrophic-false-positive trap). Fall back to the
116
+ # recon-level hypothesis with a caveat until the operator re-runs with auth resolving.
117
+ dyn_fail_open = bool(((dynamic or {}).get("write_auth_enforcement", {}) or {}).get("fail_open_suspected")
118
+ or ((dynamic or {}).get("unauth_reachability", {}) or {}).get("fail_open_suspected"))
114
119
  for eg in authz.get("endpoint_guards", []):
115
120
  if eg.get("guarded") or eg.get("public_hint") or not eg.get("analyzed"):
116
121
  continue
@@ -121,7 +126,12 @@ def build_ledger(facts: dict, unified: dict | None, dynamic: dict | None = None,
121
126
  dv = dyn_write.get((m, p)) or dyn_get.get(p)
122
127
  if dv:
123
128
  verdict = dv.get("verdict", "")
124
- if "EXECUTED-UNAUTH" in verdict:
129
+ if dyn_fail_open and verdict not in ("auth-enforced", "protected"):
130
+ ev.append({"layer": "dynamic", "detail": f"reached unauthenticated (HTTP {dv.get('status')}) — "
131
+ "BUT fail-open suspected (auth not resolving in the test env); UNTRUSTWORTHY, "
132
+ "re-run with a working auth provider before trusting this"})
133
+ # keep recon-level conf/sev; do not escalate
134
+ elif "EXECUTED-UNAUTH" in verdict:
125
135
  ev.append({"layer": "dynamic", "detail": f"{m} executed UNAUTHENTICATED (HTTP {dv.get('status')})"})
126
136
  conf, sev = "HIGH", "CRITICAL"
127
137
  elif "no-auth-gate" in verdict or verdict == "OPEN-no-auth":
@@ -151,11 +161,22 @@ def build_ledger(facts: dict, unified: dict | None, dynamic: dict | None = None,
151
161
  [{"layer": "static", "detail": f"{'+'.join(t.get('tools', []))}: {t.get('title','')}"}]))
152
162
 
153
163
  # ---- 3. Attack-surface sinks (recon hypotheses) ----
164
+ # On a purely-NoSQL datastore, classic SQL-injection alerts are almost always FPs —
165
+ # down-rank them (the inflation the field test flagged) rather than ranking them MEDIUM.
166
+ _ds = {d.lower() for d in (facts.get("stack", {}).get("datastores") or [])}
167
+ _nosql = {"dynamodb", "dynamo", "mongodb", "mongo", "firestore", "cosmos", "cosmosdb", "couchdb", "cassandra"}
168
+ _sql = {"postgres", "postgresql", "mysql", "mariadb", "sqlite", "mssql", "sqlserver", "aurora", "oracle", "cockroach"}
169
+ is_nosql_only = bool(_ds & _nosql) and not (_ds & _sql)
154
170
  for cls, info in (facts.get("surface", {}).get("sinks", {}) or {}).items():
171
+ sev = "MEDIUM"
172
+ ev = [{"layer": "recon", "detail": f"user-input-gated {cls} in {info.get('count')} file(s)"}]
173
+ if cls in ("sqli", "sql-injection") and is_nosql_only:
174
+ sev = "LOW"
175
+ ev.append({"layer": "recon", "detail": f"datastore is {', '.join(sorted(_ds)) or 'NoSQL'} — "
176
+ "classic SQLi is unlikely here; check for NoSQL injection instead (usually a false positive)"})
155
177
  out.append(_f(f"{cls} sink ({info.get('count')} site(s))", "attack-surface",
156
- cls if cls in STANDARDS else "sast", "MEDIUM", "LOW",
157
- (info.get("files") or ["?"])[0],
158
- [{"layer": "recon", "detail": f"user-input-gated {cls} in {info.get('count')} file(s)"}]))
178
+ cls if cls in STANDARDS else "sast", sev, "LOW",
179
+ (info.get("files") or ["?"])[0], ev))
159
180
 
160
181
  # ---- 4. Client-side secret exposure (HIGH — ships to browser) ----
161
182
  for leak in (facts.get("client_exposure", {}).get("public_secret_leaks", []) +
@@ -0,0 +1,156 @@
1
+ """Stage the probe library, tailored to the extracted attack surface.
2
+
3
+ Probe selection is driven by the real recon facts. Staging now also writes a
4
+ `probe-context.json` (the target's REAL routes/auth/sensitive-fields/tenant key,
5
+ from FACTS) next to the probes, prepends a "this is a draft — your surface is in
6
+ probe-context.json" banner to each, and records the real per-probe target endpoints
7
+ in the manifest — so the staged probes describe *this* app, not the reference app
8
+ the templates were authored against.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ from importlib import resources
15
+ from pathlib import Path
16
+
17
+ WRITE_VERBS = ("POST", "PUT", "PATCH", "DELETE")
18
+
19
+ # label -> (filename, attack class, what the agent must supply)
20
+ PROBES = {
21
+ "unauth-baseline": ("unauth-baseline.sh", "Missing authentication (no-creds baseline)",
22
+ "just the target base URL — it reads the routes from probe-context.json"),
23
+ "bola-cross-tenant": ("bola-cross-tenant.sh", "BOLA / cross-tenant read (OWASP API #1)",
24
+ "two role tokens in different tenants + the IDOR-candidate routes"),
25
+ "bola-write-verbs": ("bola-write-verbs.py", "BOLA on PATCH/PUT/POST/DELETE",
26
+ "two role tokens + the write endpoints + a sample object id per tenant"),
27
+ "mass-assignment": ("mass-assignment.py", "BOPLA / mass assignment (OWASP API #3)",
28
+ "a low-priv token + a write endpoint that updates a record"),
29
+ "jwt-attacks": ("jwt-attacks.sh", "JWT: alg:none, tamper, expiry, replay",
30
+ "a valid token + the login + a protected endpoint"),
31
+ "hs256-brute-force": ("hs256-brute-force.py", "Offline HS256 weak-secret brute",
32
+ "one HS256 JWT (offline — no live app needed)"),
33
+ "ssrf-probes": ("ssrf-probes.sh", "SSRF: IMDS / RFC1918 / file://",
34
+ "an authorized token + the SSRF-candidate endpoints/params"),
35
+ "race-conditions": ("race-conditions.py", "Race / claim-collision invariants",
36
+ "a token + an endpoint with a single-winner invariant + an idempotency key"),
37
+ "webhook-forgery": ("webhook-forgery.py", "Inbound webhook signature/replay",
38
+ "the webhook path + signature header name + scheme"),
39
+ "rate-limit-burst": ("rate-limit-burst.sh", "Rate-limit + X-Forwarded-For bypass",
40
+ "the login + a rate-limited endpoint"),
41
+ "compare-roles": ("compare-roles.sh", "Two-role DAST surface diff",
42
+ "two SARIF reports from a role-A and role-B scan (dynamic phase)"),
43
+ "dlp-bypass-offline": ("dlp-bypass-offline.py", "DLP/detection regex encoding bypass",
44
+ "your DLP/redaction regexes (offline)"),
45
+ "s3-assess": ("s3-assess.sh", "S3 bucket posture", "a bucket name + AWS creds"),
46
+ }
47
+
48
+ # unauth-baseline is ALWAYS staged: it's the cheapest probe and directly exercises the
49
+ # #1 lead class (missing authentication) — the one a no-creds run can confirm immediately.
50
+ ALWAYS = ["unauth-baseline", "jwt-attacks", "hs256-brute-force", "rate-limit-burst"]
51
+
52
+ # which targeting bucket each probe should be pointed at (for the manifest's real targets)
53
+ _TARGET_KEYS = {
54
+ "unauth-baseline": "write_endpoints",
55
+ "bola-write-verbs": "write_endpoints",
56
+ "mass-assignment": "write_endpoints",
57
+ "bola-cross-tenant": "idor_candidates",
58
+ "ssrf-probes": "ssrf_candidates",
59
+ "webhook-forgery": "write_endpoints",
60
+ }
61
+
62
+ _BANNER = (
63
+ "# ─────────────────────────────────────────────────────────────────────────────\n"
64
+ "# websec-validator — DRAFT probe. Any example endpoints / auth / login below are\n"
65
+ "# PLACEHOLDERS from the template. THIS target's real surface — routes, auth scheme\n"
66
+ "# + token location, sensitive fields, tenant key — is in ./probe-context.json\n"
67
+ "# (generated from FACTS.json for this app). Use those values before running; the\n"
68
+ "# agent should finalize this draft against probe-context.json, then fill secrets.\n"
69
+ "# ─────────────────────────────────────────────────────────────────────────────\n"
70
+ )
71
+
72
+
73
+ def applicable(facts: dict) -> list:
74
+ """Pick probes the extracted surface actually justifies."""
75
+ chosen = list(ALWAYS)
76
+ targeting = (facts.get("routes") or {}).get("targeting", {})
77
+ tenant = (facts.get("tenant") or {}).get("candidates")
78
+
79
+ if targeting.get("write_endpoints"):
80
+ chosen += ["mass-assignment"]
81
+ if tenant:
82
+ chosen += ["bola-cross-tenant", "bola-write-verbs", "compare-roles"]
83
+ if targeting.get("ssrf_candidates") or (facts.get("surface") or {}).get("sinks", {}).get("ssrf-outbound-http"):
84
+ chosen += ["ssrf-probes"]
85
+ if targeting.get("write_endpoints"):
86
+ chosen += ["webhook-forgery", "race-conditions"]
87
+
88
+ seen, ordered = set(), []
89
+ for k in chosen:
90
+ if k in PROBES and k not in seen:
91
+ seen.add(k)
92
+ ordered.append(k)
93
+ return ordered
94
+
95
+
96
+ def build_context(facts: dict) -> dict:
97
+ """The target's real, probe-ready surface — written to probe-context.json."""
98
+ routes = facts.get("routes") or {}
99
+ tgt = routes.get("targeting", {})
100
+ auth = facts.get("auth") or {}
101
+ writes = [f"{e.get('method')} {e.get('path')}" for e in routes.get("endpoints", [])
102
+ if e.get("method") in WRITE_VERBS][:80]
103
+ return {
104
+ "target_base_url": "FILL_ME (e.g. http://localhost:3000)",
105
+ "auth": {
106
+ "scheme": auth.get("scheme"),
107
+ "token_location": auth.get("token_location"),
108
+ "login_endpoints": tgt.get("auth_endpoints", [])[:10],
109
+ "how_to_authenticate": "cookie-session (e.g. NextAuth) → send the session cookie; "
110
+ "bearer → Authorization: Bearer <jwt>; api-key → the documented key header",
111
+ },
112
+ "endpoints": {
113
+ "writes": writes,
114
+ "idor_candidates": tgt.get("idor_candidates", [])[:60],
115
+ "ssrf_candidates": tgt.get("ssrf_candidates", [])[:40],
116
+ "upload_candidates": tgt.get("upload_candidates", [])[:40],
117
+ "auth_endpoints": tgt.get("auth_endpoints", [])[:20],
118
+ },
119
+ "sensitive_fields": (facts.get("schemas") or {}).get("sensitive_fields", []),
120
+ "tenant_keys": [c.get("key") for c in (facts.get("tenant") or {}).get("candidates", [])][:5],
121
+ "datastore_class": (facts.get("surface") or {}).get("datastore_class"),
122
+ "note": "These are THIS app's real routes/auth (from FACTS.json). Finalize each probe draft "
123
+ "against this file, supply secrets/tokens, then run against a TEST instance only.",
124
+ }
125
+
126
+
127
+ def stage(chosen: list, outdir: Path, facts: dict | None = None) -> list:
128
+ dest = outdir / "probes"
129
+ dest.mkdir(parents=True, exist_ok=True)
130
+ facts = facts or {}
131
+
132
+ ctx = build_context(facts)
133
+ (dest / "probe-context.json").write_text(json.dumps(ctx, indent=2) + "\n")
134
+ tgt = (facts.get("routes") or {}).get("targeting", {})
135
+
136
+ manifest = [{"key": "_context", "file": "probes/probe-context.json",
137
+ "note": "the target's real routes/auth/fields — finalize the drafts against this"}]
138
+ src_root = resources.files("websec_validator").joinpath("templates/probes")
139
+ for key in chosen:
140
+ fname, attack, needs = PROBES[key]
141
+ targets = (tgt.get(_TARGET_KEYS[key], []) if key in _TARGET_KEYS else [])[:15]
142
+ try:
143
+ body = src_root.joinpath(fname).read_bytes()
144
+ # prepend the draft banner after any shebang line
145
+ text = body.decode("utf-8", "replace")
146
+ if text.startswith("#!"):
147
+ shebang, _, rest = text.partition("\n")
148
+ text = f"{shebang}\n{_BANNER}{rest}"
149
+ else:
150
+ text = _BANNER + text
151
+ (dest / fname).write_text(text)
152
+ manifest.append({"key": key, "file": f"probes/{fname}", "attack_class": attack,
153
+ "agent_must_supply": needs, "targets": targets})
154
+ except Exception as e:
155
+ manifest.append({"key": key, "file": fname, "status": f"stage-error: {e}"})
156
+ return manifest
@@ -71,7 +71,8 @@ def render(facts: dict, scanners: dict, scan_results: list, unified: dict | None
71
71
  | Endpoints | **{routes.get('count', 0)}** (via {routes.get('engine','?').split(' ')[0]}) |
72
72
  | Auth | {facts.get('auth', {}).get('scheme','?')} · roles: {', '.join(authz.get('roles_detected', [])) or 'none'} |
73
73
  | Access control | {gs.get('with_visible_guard', 0)} guarded · **{gs.get('no_visible_guard', 0)} no visible guard** · global-middleware: {authz.get('global_auth_middleware', False)} |
74
- | Findings (ledger) | {ledger_hdr} |
74
+ | Static scanner (raw, pre-triage) | {sev_line} |
75
+ | **Findings ledger** (triaged + calibrated) | {ledger_hdr} |
75
76
  | Attack surface | IDOR: {len(tgt.get('idor_candidates', []))} · SSRF: {len(tgt.get('ssrf_candidates', []))} · upload: {len(tgt.get('upload_candidates', []))} · writes: {len(tgt.get('write_endpoints', []))} |
76
77
 
77
78
  ## 1. Findings ledger (ranked · evidence chain · standards · confidence)
@@ -14,6 +14,7 @@ here — that is the dynamic phase, which v1 leaves to the agent + human.
14
14
  from __future__ import annotations
15
15
 
16
16
  import json
17
+ import re
17
18
  import shutil
18
19
  import subprocess
19
20
  from dataclasses import dataclass
@@ -32,11 +33,19 @@ class Scanner:
32
33
  argv: object = None
33
34
 
34
35
 
36
+ # Never scan the tool's own output, deps, or build artifacts. Scanning `websec-out/`
37
+ # made Semgrep re-flag the AWS keys Gitleaks had just written into the report (and the
38
+ # count compounded across runs). Filesystem scanners get these excluded explicitly.
39
+ EXCLUDE_DIRS = ("websec-out", "node_modules", ".next", "dist", "build", ".git",
40
+ "security", ".venv", "venv", "__pycache__", ".mypy_cache", "coverage")
41
+
42
+
35
43
  def _trivy(target: Path, out: Path) -> list:
36
44
  # SCA + secrets + IaC misconfig in one pass; pinned by the user's install.
37
- return ["trivy", "fs", "--scanners", "vuln,secret,misconfig",
38
- "--skip-dirs", "node_modules", "--skip-dirs", "security",
39
- "--format", "json", "--output", str(out), str(target)]
45
+ cmd = ["trivy", "fs", "--scanners", "vuln,secret,misconfig", "--format", "json", "--output", str(out)]
46
+ for d in EXCLUDE_DIRS:
47
+ cmd += ["--skip-dirs", d]
48
+ return cmd + [str(target)]
40
49
 
41
50
 
42
51
  def _gitleaks(target: Path, out: Path) -> list:
@@ -45,8 +54,10 @@ def _gitleaks(target: Path, out: Path) -> list:
45
54
 
46
55
 
47
56
  def _semgrep(target: Path, out: Path) -> list:
48
- return ["semgrep", "scan", "--config", "auto", "--json",
49
- "--output", str(out), str(target)]
57
+ cmd = ["semgrep", "scan", "--config", "auto", "--json", "--output", str(out)]
58
+ for d in EXCLUDE_DIRS:
59
+ cmd += ["--exclude", d]
60
+ return cmd + [str(target)]
50
61
 
51
62
 
52
63
  def _checkov(target: Path, out: Path) -> list:
@@ -156,6 +167,24 @@ def _sev(s: str) -> str:
156
167
  return s if s in SEV_ORDER else "MEDIUM"
157
168
 
158
169
 
170
+ def _aws_secret_tier(secret: str, match: str):
171
+ """Tier an AWS-credential hit by key type / context → (severity, note) or (None, None).
172
+
173
+ Not every 'AWS key' is a live, long-lived breach risk: presigned-URL creds and ASIA
174
+ short-lived STS tokens are usually scoped + expired. Only AKIA long-lived keys are HIGH.
175
+ """
176
+ blob = f"{secret or ''} {match or ''}"
177
+ if re.search(r"X-Amz-(Signature|Credential|Expires|Security-Token)=", blob, re.I):
178
+ return "LOW", "presigned-URL credential (temporary + scoped, usually already expired)"
179
+ if re.search(r"\bASIA[0-9A-Z]{16}\b", blob):
180
+ return "MEDIUM", "temporary STS token (ASIA — short-lived, likely expired)"
181
+ if re.search(r"\b(?:AROA|AIDA|AGPA|AIPA|ANPA|ANVA)[0-9A-Z]{16}\b", blob):
182
+ return "LOW", "AWS resource/role identifier (not a usable secret)"
183
+ if re.search(r"\bAKIA[0-9A-Z]{16}\b", blob):
184
+ return "HIGH", "long-lived access key (AKIA)"
185
+ return None, None
186
+
187
+
159
188
  def _norm_trivy(data: dict) -> list:
160
189
  out = []
161
190
  for res in (data.get("Results") or []):
@@ -166,10 +195,11 @@ def _norm_trivy(data: dict) -> list:
166
195
  "title": f"{v.get('PkgName')} {v.get('InstalledVersion')} → {v.get('FixedVersion', '(no fix)')}",
167
196
  "fingerprint": f"cve|{v.get('PkgName')}|{v.get('VulnerabilityID')}"})
168
197
  for s in (res.get("Secrets") or []):
169
- out.append({"tool": "trivy", "category": "secret", "severity": _sev(s.get("Severity") or "HIGH"),
198
+ sev, note = _aws_secret_tier(s.get("Match", ""), s.get("Code", "") or "")
199
+ title = f"secret: {s.get('Title') or s.get('RuleID')}" + (f" — {note}" if note else "")
200
+ out.append({"tool": "trivy", "category": "secret", "severity": sev or _sev(s.get("Severity") or "HIGH"),
170
201
  "key": s.get("RuleID", ""), "file": tgt, "line": s.get("StartLine", 0),
171
- "title": f"secret: {s.get('Title') or s.get('RuleID')}",
172
- "fingerprint": f"secret|{tgt}|{s.get('RuleID')}"})
202
+ "title": title, "fingerprint": f"secret|{tgt}|{s.get('RuleID')}"})
173
203
  for m in (res.get("Misconfigurations") or []):
174
204
  out.append({"tool": "trivy", "category": "iac", "severity": _sev(m.get("Severity")),
175
205
  "key": m.get("ID", ""), "file": tgt, "line": 0, "title": (m.get("Title") or "")[:90],
@@ -182,10 +212,11 @@ def _norm_gitleaks(data) -> list:
182
212
  out = []
183
213
  for x in rows:
184
214
  f, rule = x.get("File", ""), x.get("RuleID", "")
185
- out.append({"tool": "gitleaks", "category": "secret", "severity": "HIGH",
215
+ sev, note = _aws_secret_tier(x.get("Secret", ""), x.get("Match", ""))
216
+ title = f"secret: {(x.get('Description') or rule)[:80]}" + (f" — {note}" if note else "")
217
+ out.append({"tool": "gitleaks", "category": "secret", "severity": sev or "HIGH",
186
218
  "key": rule, "file": f, "line": x.get("StartLine", 0),
187
- "title": f"secret: {(x.get('Description') or rule)[:80]}",
188
- "fingerprint": f"secret|{f}|{rule}"})
219
+ "title": title, "fingerprint": f"secret|{f}|{rule}"})
189
220
  return out
190
221
 
191
222
 
@@ -0,0 +1,44 @@
1
+ #!/usr/bin/env bash
2
+ # unauth-baseline — the cheapest, highest-value probe: hit every MUTATING route with
3
+ # NO credentials and expect 401/403. Any 2xx (or a non-401 that reached the handler)
4
+ # is a missing-authentication lead. Run this FIRST — it confirms the auth model before
5
+ # you spend effort on authorization/BOLA probes, and it catches both failure modes:
6
+ # a genuinely-open endpoint, AND an app whose auth fails OPEN in the test env (see below).
7
+ #
8
+ # Reads the target's real write routes from ./probe-context.json (written by websec).
9
+ # Usage: TARGET=http://localhost:3000 bash unauth-baseline.sh
10
+ set -uo pipefail
11
+
12
+ ctx="$(dirname "$0")/probe-context.json"
13
+ BASE="${TARGET:-$(python3 -c "import json;print(json.load(open('$ctx'))['target_base_url'])" 2>/dev/null)}"
14
+ if [ -z "${BASE:-}" ] || [ "${BASE#FILL}" != "$BASE" ]; then
15
+ echo "Set TARGET=http://host:port (or fill target_base_url in probe-context.json)"; exit 2
16
+ fi
17
+
18
+ mapfile -t EPS < <(python3 -c "import json;[print(e) for e in json.load(open('$ctx'))['endpoints']['writes']]" 2>/dev/null)
19
+ if [ "${#EPS[@]}" -eq 0 ]; then
20
+ echo "No write endpoints in probe-context.json — add 'METHOD /path' lines under endpoints.writes."; exit 2
21
+ fi
22
+
23
+ echo "unauth baseline vs $BASE (no credentials sent; each SHOULD be 401/403)"
24
+ echo "------------------------------------------------------------------------"
25
+ leads=0 ok=0
26
+ for ep in "${EPS[@]}"; do
27
+ method="${ep%% *}"; path="${ep#* }"
28
+ code=$(curl -s -o /dev/null -w '%{http_code}' -X "$method" "$BASE$path" \
29
+ -H 'content-type: application/json' --data '{}' --max-time 15)
30
+ case "$code" in
31
+ 401|403) printf ' ok %s %s %s\n' "$code" "$method" "$path"; ok=$((ok+1)) ;;
32
+ 000) printf ' ???? conn-fail %s %s (is the app running?)\n' "$method" "$path" ;;
33
+ *) printf ' LEAD %s %s %s ← reached WITHOUT auth — verify\n' "$code" "$method" "$path"; leads=$((leads+1)) ;;
34
+ esac
35
+ done
36
+ echo "------------------------------------------------------------------------"
37
+ echo "summary: $ok enforced (401/403) · $leads lead(s) reached without auth"
38
+ if [ "$ok" -eq 0 ] && [ "${#EPS[@]}" -gt 1 ]; then
39
+ echo "⚠ EVERY route was reachable unauthenticated. Before concluding 'no auth', RULE OUT a"
40
+ echo " fail-OPEN test env: if the auth provider (Cognito/Auth0/etc.) isn't configured, the"
41
+ echo " middleware may be erroring through. Configure a valid/dummy provider (or mock a"
42
+ echo " session) and re-run — if these flip to 401, the app is fine and the env was the bug."
43
+ fi
44
+ exit "$leads"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: websec-validator
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app.
5
5
  Author: Ricardo Accioly
6
6
  License: MIT
@@ -21,7 +21,7 @@ Dynamic: license-file
21
21
  It is *not* an autonomous scanner and *not* a SaaS. It's the missing front-half: the thing that
22
22
  turns a repo into a precise, fact-grounded security brief an AI agent (with a human in the loop)
23
23
  can act on — an auto-filled, repo-aware version of a senior pentester's "here's what to test and
24
- how" handoff. Full landscape + why this niche is real: [`MARKET-ANALYSIS-AND-VERDICT.md`](MARKET-ANALYSIS-AND-VERDICT.md).
24
+ how" handoff. How it works + the reasoning behind every check: [`docs/METHODOLOGY.md`](docs/METHODOLOGY.md).
25
25
 
26
26
  ## Quickstart — just point it at your repo
27
27
 
@@ -37,7 +37,7 @@ local. The four ways to get there, all ending in the same `AGENT-BRIEFING.md` yo
37
37
  | **Tell your agent** (simplest) | — | say the line above |
38
38
  | **CLI** (a terminal) | `pipx install websec-validator` | `websec run /path/to/your/app` |
39
39
  | **Claude Code plugin** (slash) | `/plugin marketplace add raccioly/websec-validator` → `/plugin install websec-validator@websec-plugins` | invoke the **security-pass** skill, or just ask |
40
- | **Docker** (no install) | `docker build -t websec-validator .` | `docker run --rm -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out` |
40
+ | **Docker** (no install) | `docker build -t websec-validator .` | `docker run --rm --user "$(id -u):$(id -g)" -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out` |
41
41
 
42
42
  ➡️ **Want the reasoning behind every check?** Read **[docs/METHODOLOGY.md](docs/METHODOLOGY.md)** — what each test does and why.
43
43
 
@@ -63,7 +63,7 @@ No need to install Noir or any scanner — the image bundles them all (arch-awar
63
63
 
64
64
  ```bash
65
65
  docker build -t websec-validator .
66
- docker run --rm -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out
66
+ docker run --rm --user "$(id -u):$(id -g)" -v "$PWD:/scan" websec-validator run /scan --out /scan/websec-out
67
67
  ```
68
68
 
69
69
  The image carries Noir + Trivy + Gitleaks + Semgrep + Checkov; mount your repo at `/scan` and the
@@ -186,10 +186,12 @@ python3 -m unittest discover -s tests # stdlib only, no Noir/network — 23 t
186
186
  Published to PyPI via **Trusted Publishing** (OIDC — no API token in the repo). To cut a release:
187
187
 
188
188
  ```bash
189
- # 1. bump the version in pyproject.toml (e.g. 0.2.0 → 0.2.1)
189
+ # 1. bump the version in pyproject.toml (e.g. 0.2.1 → 0.2.2)
190
190
  # 2. tag it and push — the tag must match pyproject's version (CI verifies):
191
- git tag v0.2.1 && git push origin v0.2.1
192
- # → .github/workflows/publish.yml builds + publishes to PyPI
191
+ git tag v0.2.2 && git push origin v0.2.2
192
+ # → publish.yml builds, INSTALLS + smoke-tests the wheel (version match,
193
+ # calibration ships, a real `websec run`), then publishes. A bad build fails
194
+ # CI instead of reaching PyPI — so you never have to yank after the fact.
193
195
  ```
194
196
 
195
197
  One-time PyPI setup (before the first release): on pypi.org → **Account → Publishing → Add a pending
@@ -225,8 +227,27 @@ lets you just ask, in plain English, for a security pass: it runs `websec`, read
225
227
  works the findings with you. For other agents the universal interface is unchanged: run the CLI, read
226
228
  `AGENT-BRIEFING.md`.
227
229
 
230
+ **Install gotchas (field-tested):**
231
+
232
+ - The install id is `plugin@marketplace` — `websec-validator@websec-plugins` (the marketplace name
233
+ from `.claude-plugin/marketplace.json`), **not** `@websec-validator` (the repo).
234
+ - The plugin only delivers the *instructions*; the actual scanning is a **separate Python CLI**
235
+ (`websec`). The skill's Step 0 installs it (`pipx install websec-validator`) if it's missing.
236
+ - **`/plugin …` only works in the terminal CLI.** In the Claude **app / Agent SDK** (no `/plugin`),
237
+ configure it in `.claude/settings.json` instead:
238
+ ```json
239
+ {
240
+ "extraKnownMarketplaces": {
241
+ "websec-plugins": { "source": { "source": "github", "repo": "raccioly/websec-validator" } }
242
+ },
243
+ "enabledPlugins": { "websec-validator@websec-plugins": true }
244
+ }
245
+ ```
246
+ This **registers + enables** the plugin but does **not** auto-fetch it — the first download still
247
+ needs the CLI (`/plugin install websec-validator@websec-plugins`) once. (Project `.claude/settings.json`
248
+ for a team; `~/.claude/settings.json` for just you.)
249
+
228
250
  ## Credits
229
251
 
230
- Methodology + probe library come from a real authenticated pentest pass
231
- ([`base-research/REPLICATION-PLAYBOOK.md`](base-research/REPLICATION-PLAYBOOK.md), not committed).
232
- This tool productizes that hand-written pass into something an AI agent can run on any repo.
252
+ Methodology + probe library are distilled from a real authenticated penetration-testing pass.
253
+ This tool productizes that hand-written methodology into something an AI agent can run on any repo.
@@ -44,6 +44,7 @@ src/websec_validator/templates/probes/race-conditions.py
44
44
  src/websec_validator/templates/probes/rate-limit-burst.sh
45
45
  src/websec_validator/templates/probes/s3-assess.sh
46
46
  src/websec_validator/templates/probes/ssrf-probes.sh
47
+ src/websec_validator/templates/probes/unauth-baseline.sh
47
48
  src/websec_validator/templates/probes/webhook-forgery.py
48
49
  src/websec_validator/templates/reports/FINDINGS-SUMMARY.md.template
49
50
  src/websec_validator/templates/reports/access-control-matrix.md.template
@@ -15,9 +15,10 @@ from pathlib import Path
15
15
  ROOT = Path(__file__).resolve().parents[1]
16
16
  sys.path.insert(0, str(ROOT / "src"))
17
17
 
18
- from websec_validator import calibration, findings, scanners # noqa: E402
18
+ from websec_validator import calibration, findings, probes, scanners # noqa: E402
19
19
  from websec_validator.extractors import routes # noqa: E402
20
20
  from websec_validator.extractors.auth import AuthExtractor # noqa: E402
21
+ from websec_validator.extractors.authz import AuthzExtractor # noqa: E402
21
22
  from websec_validator.extractors.base import RepoContext # noqa: E402
22
23
  from websec_validator.extractors.stack import StackExtractor # noqa: E402
23
24
  from websec_validator.extractors.schemas import SchemasExtractor # noqa: E402
@@ -168,6 +169,90 @@ class CalibrationTests(unittest.TestCase):
168
169
  calibration.LOCAL_PATH = saved
169
170
 
170
171
 
172
+ class FieldFeedbackBatch1Tests(unittest.TestCase):
173
+ """Regressions for the HugoCross live-run false positives (proxy.ts, self-scan, ASIA)."""
174
+
175
+ def _next_app(self, proxy_body):
176
+ d = Path(tempfile.mkdtemp())
177
+ (d / "src").mkdir()
178
+ (d / "src" / "proxy.ts").write_text(proxy_body)
179
+ (d / "src" / "r.ts").write_text("export async function POST(req){ return Response.json({}) }")
180
+ ctx = RepoContext(d)
181
+ facts = {"routes": {"endpoints": [
182
+ {"method": "POST", "path": "/api/x", "code_path": str(d / "src" / "r.ts")}]}}
183
+ return AuthzExtractor().extract(ctx, facts)
184
+
185
+ def test_nextjs_proxy_ts_detected_as_global_auth(self):
186
+ out = self._next_app(
187
+ 'export default auth((req) => { if (!req.auth) return NextResponse.json({}, {status: 401}); });\n'
188
+ 'export const config = { matcher: ["/((?!_next/static|favicon.ico).*)"] };')
189
+ self.assertTrue(out["global_auth_middleware"])
190
+ self.assertEqual(out["next_middleware"]["file"], "src/proxy.ts")
191
+ self.assertTrue(out["next_middleware"]["is_auth"])
192
+ self.assertEqual(out["write_endpoints_without_visible_guard"], []) # the 42-HIGH FP cluster, gone
193
+
194
+ def test_non_auth_middleware_does_not_falsely_guard(self):
195
+ out = self._next_app('export function proxy(req){ return NextResponse.next(); }\n'
196
+ 'export const config = { matcher: ["/((?!_next).*)"] };')
197
+ self.assertFalse(out["global_auth_middleware"]) # not auth → not a guard
198
+ self.assertEqual(out["guard_summary"]["no_visible_guard"], 1) # so the route IS still flagged
199
+
200
+ def test_scanner_argv_excludes_self_output(self):
201
+ self.assertIn("websec-out", scanners._trivy(Path("/repo"), Path("/o")))
202
+ self.assertIn("websec-out", scanners._semgrep(Path("/repo"), Path("/o")))
203
+
204
+ def test_aws_credential_tiering(self):
205
+ self.assertEqual(scanners._aws_secret_tier("AKIAIOSFODNN7EXAMPLE", "")[0], "HIGH")
206
+ self.assertEqual(scanners._aws_secret_tier("ASIAIOSFODNN7EXAMPLE", "")[0], "MEDIUM")
207
+ self.assertEqual(scanners._aws_secret_tier("", "X-Amz-Signature=z&X-Amz-Credential=ASIA")[0], "LOW")
208
+ rows = [{"File": "j.json", "RuleID": "aws", "Description": "AWS key",
209
+ "Secret": "ASIAEXAMPLE000000000", "Match": "X-Amz-Signature=zzz"}]
210
+ self.assertEqual(scanners._norm_gitleaks(rows)[0]["severity"], "LOW") # presigned ASIA ≠ HIGH
211
+
212
+
213
+ class FailOpenGuardTests(unittest.TestCase):
214
+ """P0-4: a fail-open test env must NOT escalate untrustworthy unauth 'successes' to CRITICAL."""
215
+
216
+ def _ledger(self, fail_open):
217
+ authz = {"endpoint_guards": [{"method": "POST", "path": "/api/x", "code_path": "r.ts",
218
+ "guarded": False, "analyzed": True, "public_hint": False}]}
219
+ ex = {"method": "POST", "path": "/api/x", "status": 201, "verdict": "EXECUTED-UNAUTH"}
220
+ dyn = {"write_auth_enforcement": {"results": [ex], "fail_open_suspected": fail_open}}
221
+ return findings.build_ledger({"authz": authz}, None, dyn, [])["findings"][0]
222
+
223
+ def test_fail_open_not_escalated(self):
224
+ f = self._ledger(True)
225
+ self.assertNotEqual(f["severity"], "CRITICAL") # not escalated
226
+ self.assertTrue(any("UNTRUSTWORTHY" in e["detail"] for e in f["evidence"]))
227
+
228
+ def test_healthy_env_still_escalates(self):
229
+ self.assertEqual(self._ledger(False)["severity"], "CRITICAL") # regression guard
230
+
231
+
232
+ class ProbeStagingTests(unittest.TestCase):
233
+ """P0-3 / P1-2: probes ship with the target's real surface + an always-on unauth baseline."""
234
+
235
+ def test_context_unauth_baseline_and_banner(self):
236
+ d = Path(tempfile.mkdtemp())
237
+ facts = {"routes": {"endpoints": [{"method": "POST", "path": "/api/sponsors"},
238
+ {"method": "GET", "path": "/api/health"}],
239
+ "targeting": {"write_endpoints": ["POST /api/sponsors"]}},
240
+ "auth": {"scheme": "jwt", "token_location": "bearer"},
241
+ "tenant": {"candidates": [{"key": "tenantId"}]}}
242
+ chosen = probes.applicable(facts)
243
+ self.assertIn("unauth-baseline", chosen) # always staged (P1-2)
244
+ man = probes.stage(chosen, d, facts)
245
+ ctx = json.loads((d / "probes" / "probe-context.json").read_text())
246
+ self.assertIn("POST /api/sponsors", ctx["endpoints"]["writes"]) # real route, not template's
247
+ self.assertEqual(ctx["auth"]["scheme"], "jwt")
248
+ body = (d / "probes" / "unauth-baseline.sh").read_text()
249
+ self.assertTrue(body.startswith("#!")) # shebang preserved
250
+ self.assertIn("DRAFT probe", body) # banner prepended
251
+ self.assertIn("probe-context.json", body)
252
+ ub = [m for m in man if m.get("key") == "unauth-baseline"][0]
253
+ self.assertEqual(ub["targets"], ["POST /api/sponsors"]) # real per-probe targets
254
+
255
+
171
256
  class RouteUnitTests(unittest.TestCase):
172
257
  def test_clean_path(self):
173
258
  self.assertEqual(routes._clean_path("/api/users/:id"), "/api/users/{id}")
@@ -1,79 +0,0 @@
1
- """Stage the probe library, tailored to the extracted attack surface.
2
-
3
- Probe selection is now driven by the real recon facts — we only stage what the
4
- surface justifies, and the briefing tells the agent exactly which endpoints to
5
- point each probe at.
6
- """
7
-
8
- from __future__ import annotations
9
-
10
- from importlib import resources
11
- from pathlib import Path
12
-
13
- # label -> (filename, attack class, what the agent must supply)
14
- PROBES = {
15
- "bola-cross-tenant": ("bola-cross-tenant.sh", "BOLA / cross-tenant read (OWASP API #1)",
16
- "two role tokens in different tenants + the IDOR-candidate routes"),
17
- "bola-write-verbs": ("bola-write-verbs.py", "BOLA on PATCH/PUT/POST/DELETE",
18
- "two role tokens + the write endpoints + a sample object id per tenant"),
19
- "mass-assignment": ("mass-assignment.py", "BOPLA / mass assignment (OWASP API #3)",
20
- "a low-priv token + a write endpoint that updates a record"),
21
- "jwt-attacks": ("jwt-attacks.sh", "JWT: alg:none, tamper, expiry, replay",
22
- "a valid token + the login + a protected endpoint"),
23
- "hs256-brute-force": ("hs256-brute-force.py", "Offline HS256 weak-secret brute",
24
- "one HS256 JWT (offline — no live app needed)"),
25
- "ssrf-probes": ("ssrf-probes.sh", "SSRF: IMDS / RFC1918 / file://",
26
- "an authorized token + the SSRF-candidate endpoints/params"),
27
- "race-conditions": ("race-conditions.py", "Race / claim-collision invariants",
28
- "a token + an endpoint with a single-winner invariant + an idempotency key"),
29
- "webhook-forgery": ("webhook-forgery.py", "Inbound webhook signature/replay",
30
- "the webhook path + signature header name + scheme"),
31
- "rate-limit-burst": ("rate-limit-burst.sh", "Rate-limit + X-Forwarded-For bypass",
32
- "the login + a rate-limited endpoint"),
33
- "compare-roles": ("compare-roles.sh", "Two-role DAST surface diff",
34
- "two SARIF reports from a role-A and role-B scan (dynamic phase)"),
35
- "dlp-bypass-offline": ("dlp-bypass-offline.py", "DLP/detection regex encoding bypass",
36
- "your DLP/redaction regexes (offline)"),
37
- "s3-assess": ("s3-assess.sh", "S3 bucket posture", "a bucket name + AWS creds"),
38
- }
39
-
40
- ALWAYS = ["jwt-attacks", "hs256-brute-force", "rate-limit-burst"]
41
-
42
-
43
- def applicable(facts: dict) -> list:
44
- """Pick probes the extracted surface actually justifies."""
45
- chosen = list(ALWAYS)
46
- targeting = (facts.get("routes") or {}).get("targeting", {})
47
- tenant = (facts.get("tenant") or {}).get("candidates")
48
-
49
- if targeting.get("write_endpoints"):
50
- chosen += ["mass-assignment"]
51
- if tenant:
52
- chosen += ["bola-cross-tenant", "bola-write-verbs", "compare-roles"]
53
- if targeting.get("ssrf_candidates") or (facts.get("surface") or {}).get("sinks", {}).get("ssrf-outbound-http"):
54
- chosen += ["ssrf-probes"]
55
- if targeting.get("write_endpoints"):
56
- chosen += ["webhook-forgery", "race-conditions"]
57
-
58
- seen, ordered = set(), []
59
- for k in chosen:
60
- if k in PROBES and k not in seen:
61
- seen.add(k)
62
- ordered.append(k)
63
- return ordered
64
-
65
-
66
- def stage(chosen: list, outdir: Path) -> list:
67
- dest = outdir / "probes"
68
- dest.mkdir(parents=True, exist_ok=True)
69
- manifest = []
70
- src_root = resources.files("websec_validator").joinpath("templates/probes")
71
- for key in chosen:
72
- fname, attack, needs = PROBES[key]
73
- try:
74
- (dest / fname).write_bytes(src_root.joinpath(fname).read_bytes())
75
- manifest.append({"key": key, "file": f"probes/{fname}",
76
- "attack_class": attack, "agent_must_supply": needs})
77
- except Exception as e:
78
- manifest.append({"key": key, "file": fname, "status": f"stage-error: {e}"})
79
- return manifest