websec-validator 0.4.0__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {websec_validator-0.4.0/src/websec_validator.egg-info → websec_validator-0.4.1}/PKG-INFO +1 -1
- {websec_validator-0.4.0 → websec_validator-0.4.1}/pyproject.toml +1 -1
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/__init__.py +8 -4
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/briefing.py +7 -1
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/cli.py +6 -1
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/dynamic.py +25 -6
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/__init__.py +5 -1
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/authz.py +8 -1
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/base.py +28 -1
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/integrations.py +9 -3
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/routes.py +16 -7
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/findings.py +48 -6
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/scanners.py +19 -7
- {websec_validator-0.4.0 → websec_validator-0.4.1/src/websec_validator.egg-info}/PKG-INFO +1 -1
- {websec_validator-0.4.0 → websec_validator-0.4.1}/tests/test_hardening.py +85 -1
- {websec_validator-0.4.0 → websec_validator-0.4.1}/tests/test_recon.py +72 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/LICENSE +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/README.md +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/setup.cfg +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/calibration.json +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/calibration.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/constitution.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/corpus.json +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/auth.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/client_exposure.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/client_integrity.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/graphql.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/iac_ci.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/pii_exposure.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/policy_consistency.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/schemas.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/stack.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/surface.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/tenant.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/upload_security.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/probes.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/proof.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/recon.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/report.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/rules/error-stack-disclosure.yml +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/rules/insecure-default-secret.yml +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/_lib.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/appsync-cswsh.sh +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/appsync-introspection.sh +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/appsync-subscription-bola.sh +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/bola-cross-tenant.sh +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/bola-write-verbs.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/client-integrity-checklist.sh +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/compare-roles.sh +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/dlp-bypass-offline.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/error-disclosure-probe.sh +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/forged-token.sh +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/hs256-brute-force.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/jwt-attacks.sh +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/mass-assignment.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/password-reuse.sh +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/pii-output-diff.sh +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/race-conditions.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/rate-limit-burst.sh +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/s3-assess.sh +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/ssrf-probes.sh +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/unauth-baseline.sh +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/upload-matrix.sh +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/webhook-forgery.py +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/reports/FINDINGS-SUMMARY.md.template +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/reports/access-control-matrix.md.template +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/reports/findings-triage.md.template +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/reports/pentest-handover-brief.md.template +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/reports/per-tool-FINDINGS.md.template +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator.egg-info/SOURCES.txt +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator.egg-info/dependency_links.txt +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator.egg-info/entry_points.txt +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator.egg-info/top_level.txt +0 -0
- {websec_validator-0.4.0 → websec_validator-0.4.1}/tests/test_pentest_regressions.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: websec-validator
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app.
|
|
5
5
|
Author: Ricardo Accioly
|
|
6
6
|
License: MIT
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "websec-validator"
|
|
7
|
-
version = "0.4.
|
|
7
|
+
version = "0.4.1"
|
|
8
8
|
description = "Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
"""websec-validator — local-first security recon that briefs an AI coding agent.
|
|
2
2
|
|
|
3
3
|
The tool does the deterministic half (read the repo, run the scanners it finds,
|
|
4
|
-
stage the probe library tailored to what it discovered) and emits
|
|
4
|
+
stage the probe library tailored to what it discovered) and emits, per immutable run:
|
|
5
5
|
|
|
6
|
-
1.
|
|
7
|
-
2.
|
|
8
|
-
3.
|
|
6
|
+
1. FACTS.json — stack, routes, auth-model candidates, attack surface
|
|
7
|
+
2. findings.json — de-duplicated static scanner results (when --scan)
|
|
8
|
+
3. findings-ledger.json — ranked, standards-cited, calibrated findings (recon + static + dynamic)
|
|
9
|
+
4. AGENT-BRIEFING.md — marching orders + the per-attack-class targeting
|
|
10
|
+
5. REPORT.md — the human-readable historical record
|
|
11
|
+
6. CONSTITUTION.md — the app's security invariants as checkable Given/When/Then
|
|
12
|
+
7. probes/ — the probe library staged against THIS app's real surface
|
|
9
13
|
|
|
10
14
|
It never calls an LLM, never runs a server, and never needs a running instance of
|
|
11
15
|
the target app. Running the probes and applying fixes is the agent + human's job.
|
|
@@ -127,6 +127,12 @@ def render(facts: dict, scanners: dict, scan_results: list, probe_manifest: list
|
|
|
127
127
|
endpoints = routes.get("endpoints", [])
|
|
128
128
|
inventory = _bullets([f"`{e['method']:6}` {e['path']}" for e in endpoints], cap=80)
|
|
129
129
|
|
|
130
|
+
partial_banner = (
|
|
131
|
+
f"\n> ⚠️ **PARTIAL SCAN** — the walker stopped at the {facts.get('file_cap','?')}-file cap "
|
|
132
|
+
f"({facts.get('files_scanned','?')} files read, filesystem order), so recon may be INCOMPLETE on "
|
|
133
|
+
"this repo. Re-run scoped to a subdirectory or with `--exclude` to cover the rest before trusting "
|
|
134
|
+
"an absence of findings.\n" if facts.get("files_truncated") else "")
|
|
135
|
+
|
|
130
136
|
return f"""# AGENT BRIEFING — security pass for `{facts.get('target','')}`
|
|
131
137
|
|
|
132
138
|
> Generated by **websec-validator v{facts.get('version','')}** — deterministic recon, no LLM.
|
|
@@ -141,7 +147,7 @@ def render(facts: dict, scanners: dict, scan_results: list, probe_manifest: list
|
|
|
141
147
|
|
|
142
148
|
⚠️ Static findings + recon need **no running app**. The probes need a **live test instance + test
|
|
143
149
|
credentials** — ask the human, never fabricate, never hit production.
|
|
144
|
-
|
|
150
|
+
{partial_banner}
|
|
145
151
|
---
|
|
146
152
|
|
|
147
153
|
## 1. What this app is (detected)
|
|
@@ -134,9 +134,11 @@ def cmd_run(args) -> int:
|
|
|
134
134
|
# 5. briefing + comprehensive REPORT.md (immutable run record)
|
|
135
135
|
(out / "AGENT-BRIEFING.md").write_text(briefing.render(facts, det, scan_results, manifest, unified))
|
|
136
136
|
(out / "REPORT.md").write_text(report.render(facts, det, scan_results, unified, manifest, ts, ledger))
|
|
137
|
+
# drop the full `all` finding list from the manifest — it's a duplicate of findings.json
|
|
138
|
+
manifest_summary = {k: v for k, v in unified.items() if k != "all"} if unified else None
|
|
137
139
|
(out / "manifest.json").write_text(json.dumps(
|
|
138
140
|
{"facts": "FACTS.json", "scanners": det, "scan_results": scan_results,
|
|
139
|
-
"findings_summary":
|
|
141
|
+
"findings_summary": manifest_summary, "ledger": {"total": ledger["total"], "by_severity": ledger["by_severity"]},
|
|
140
142
|
"probes": manifest, "timestamp": ts}, indent=2))
|
|
141
143
|
|
|
142
144
|
print(f"\n✓ run {ts} saved (immutable — nothing overwritten):\n {out}")
|
|
@@ -327,6 +329,9 @@ def _which(b):
|
|
|
327
329
|
|
|
328
330
|
|
|
329
331
|
def _print_facts_summary(facts: dict) -> None:
|
|
332
|
+
if facts.get("files_truncated"):
|
|
333
|
+
print(f" ⚠ PARTIAL SCAN — hit the {facts.get('file_cap', '?')}-file cap; recon may be incomplete. "
|
|
334
|
+
"Narrow with --exclude or scan a subdirectory.")
|
|
330
335
|
st = facts.get("stack", {})
|
|
331
336
|
rt = facts.get("routes", {})
|
|
332
337
|
tg = rt.get("targeting", {})
|
|
@@ -106,7 +106,9 @@ def cross_tenant_bola(cfg: dict, facts: dict) -> dict:
|
|
|
106
106
|
for path in endpoints:
|
|
107
107
|
# attacker A tries to read B's tenant data, and vice-versa
|
|
108
108
|
for atk, vic, direction in ((a, b, "A→B"), (b, a, "B→A")):
|
|
109
|
-
|
|
109
|
+
# str(): a tenant id is often numeric (auto-increment) — str.replace's 2nd arg must be a
|
|
110
|
+
# str, so a JSON int would crash this (uncaught) authenticated path.
|
|
111
|
+
url = cfg["target"] + path.replace("{" + param + "}", str(vic["tenant"]))
|
|
110
112
|
code, body = _request("GET", url, atk["token"])
|
|
111
113
|
if code in (401, 403, 404):
|
|
112
114
|
verdict = "blocked"
|
|
@@ -164,7 +166,9 @@ def unauth_reachability(target: str, facts: dict, max_endpoints: int = 50) -> di
|
|
|
164
166
|
if e.get("method") != "GET" or "{" in p or SIDE_EFFECTING.search(p):
|
|
165
167
|
continue
|
|
166
168
|
eps.append(p)
|
|
167
|
-
|
|
169
|
+
_all_eps = sorted(set(eps))
|
|
170
|
+
eps = _all_eps[:max_endpoints]
|
|
171
|
+
over_cap = max(0, len(_all_eps) - max_endpoints) # disclose, don't silently drop (a missed endpoint = a missed lead)
|
|
168
172
|
|
|
169
173
|
results, skipped = [], [e.get("path") for e in (facts.get("routes") or {}).get("endpoints", [])
|
|
170
174
|
if e.get("method") == "GET" and SIDE_EFFECTING.search(e.get("path", ""))]
|
|
@@ -195,11 +199,13 @@ def unauth_reachability(target: str, facts: dict, max_endpoints: int = 50) -> di
|
|
|
195
199
|
"skipped_side_effecting": sorted(set(skipped)),
|
|
196
200
|
"open_no_auth": openish,
|
|
197
201
|
"results": results,
|
|
202
|
+
"endpoints_over_cap": over_cap,
|
|
198
203
|
"fail_open_suspected": fail_open,
|
|
199
204
|
"authn_trustworthy": not fail_open,
|
|
200
205
|
"warning": FAIL_OPEN_WARNING if fail_open else "",
|
|
201
206
|
"summary": f"{len(openish)}/{len(results)} data-read GET endpoints reachable WITHOUT auth"
|
|
202
207
|
+ (" — review whether these should be public" if openish else " — all gated")
|
|
208
|
+
+ (f" · ⚠ {over_cap} more over the {max_endpoints}-endpoint cap NOT tested" if over_cap else "")
|
|
203
209
|
+ (" · ⚠ FAIL-OPEN SUSPECTED (nothing enforced auth — results untrustworthy)" if fail_open else ""),
|
|
204
210
|
}
|
|
205
211
|
|
|
@@ -219,7 +225,9 @@ def write_auth_enforcement(target: str, facts: dict, max_endpoints: int = 80) ->
|
|
|
219
225
|
p = e.get("path", "")
|
|
220
226
|
if e.get("method") in WRITE_VERBS and not SIDE_EFFECTING.search(p):
|
|
221
227
|
eps.append((e["method"], p))
|
|
222
|
-
|
|
228
|
+
_all_eps = sorted(set(eps))
|
|
229
|
+
eps = _all_eps[:max_endpoints]
|
|
230
|
+
over_cap = max(0, len(_all_eps) - max_endpoints)
|
|
223
231
|
|
|
224
232
|
results = []
|
|
225
233
|
for method, path in eps:
|
|
@@ -229,9 +237,14 @@ def write_auth_enforcement(target: str, facts: dict, max_endpoints: int = 80) ->
|
|
|
229
237
|
verdict = "auth-enforced"
|
|
230
238
|
elif code in (200, 201, 204):
|
|
231
239
|
verdict = "EXECUTED-UNAUTH"
|
|
232
|
-
elif code in (400, 422, 404, 405, 409, 415
|
|
240
|
+
elif code in (400, 422, 404, 405, 409, 415):
|
|
233
241
|
verdict = "no-auth-gate (reached handler/validation)"
|
|
234
242
|
else:
|
|
243
|
+
# 500 (and any other code) is INCONCLUSIVE: a 500 may be the auth layer itself throwing,
|
|
244
|
+
# not the handler running unauthenticated — so it must NOT become a no-auth-gate verdict
|
|
245
|
+
# (which would escalate to a HIGH missing-auth finding AND poison the calibration oracle
|
|
246
|
+
# with a confirmed-real sample). Matches the forged-token engine, which also excludes 500
|
|
247
|
+
# from "reached handler".
|
|
235
248
|
verdict = f"http-{code}"
|
|
236
249
|
results.append({"method": method, "path": path, "status": code, "verdict": verdict})
|
|
237
250
|
|
|
@@ -248,11 +261,13 @@ def write_auth_enforcement(target: str, facts: dict, max_endpoints: int = 80) ->
|
|
|
248
261
|
"no_auth_gate": missing,
|
|
249
262
|
"executed_unauth": executed,
|
|
250
263
|
"results": results,
|
|
264
|
+
"endpoints_over_cap": over_cap,
|
|
251
265
|
"fail_open_suspected": fail_open,
|
|
252
266
|
"authn_trustworthy": not fail_open,
|
|
253
267
|
"warning": FAIL_OPEN_WARNING if fail_open else "",
|
|
254
268
|
"summary": f"{enforced}/{len(results)} write endpoints enforce auth · "
|
|
255
269
|
f"{len(missing)} reached with no auth gate · {len(executed)} executed unauthenticated"
|
|
270
|
+
+ (f" · ⚠ {over_cap} more over the {max_endpoints}-endpoint cap NOT tested" if over_cap else "")
|
|
256
271
|
+ (" · ⚠ FAIL-OPEN SUSPECTED — results untrustworthy" if fail_open else ""),
|
|
257
272
|
}
|
|
258
273
|
|
|
@@ -299,7 +314,9 @@ def forged_token_bypass(target: str, facts: dict, cookie_names=None,
|
|
|
299
314
|
targets += [(e.get("method"), e.get("path", "")) for e in (facts.get("routes") or {}).get("endpoints", [])
|
|
300
315
|
if e.get("method") in WRITE_VERBS and "{" not in e.get("path", "")
|
|
301
316
|
and not SIDE_EFFECTING.search(e.get("path", ""))]
|
|
302
|
-
|
|
317
|
+
_all_targets = sorted(set(targets))
|
|
318
|
+
targets = _all_targets[:max_endpoints]
|
|
319
|
+
over_cap = max(0, len(_all_targets) - max_endpoints)
|
|
303
320
|
|
|
304
321
|
results, bypassed = [], []
|
|
305
322
|
for method, path in targets:
|
|
@@ -335,9 +352,11 @@ def forged_token_bypass(target: str, facts: dict, cookie_names=None,
|
|
|
335
352
|
"tested": len(results),
|
|
336
353
|
"bypassed": bypassed,
|
|
337
354
|
"results": results,
|
|
355
|
+
"endpoints_over_cap": over_cap,
|
|
338
356
|
"summary": f"{len(bypassed)}/{len(results)} gated route(s) accepted a forged unsigned token"
|
|
339
357
|
+ (" — ⚠ SIGNATURE NOT VERIFIED (CWE-347 auth bypass)" if bypassed
|
|
340
|
-
else " — all rejected the forged token")
|
|
358
|
+
else " — all rejected the forged token")
|
|
359
|
+
+ (f" · ⚠ {over_cap} more over the {max_endpoints}-endpoint cap NOT tested" if over_cap else ""),
|
|
341
360
|
}
|
|
342
361
|
|
|
343
362
|
|
{websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/__init__.py
RENAMED
|
@@ -11,7 +11,7 @@ from pathlib import Path
|
|
|
11
11
|
|
|
12
12
|
from .auth import AuthExtractor
|
|
13
13
|
from .authz import AuthzExtractor
|
|
14
|
-
from .base import Extractor, RepoContext
|
|
14
|
+
from .base import MAX_FILES, Extractor, RepoContext
|
|
15
15
|
from .client_exposure import ClientExposureExtractor
|
|
16
16
|
from .client_integrity import ClientIntegrityExtractor
|
|
17
17
|
from .graphql import GraphQLExtractor
|
|
@@ -55,6 +55,10 @@ def run_all(root: Path, version: str, excludes: list | None = None) -> dict:
|
|
|
55
55
|
"version": version,
|
|
56
56
|
"target": str(root.resolve()),
|
|
57
57
|
"files_scanned": len(ctx.code_files),
|
|
58
|
+
# PARTIAL-scan guard: the walker stops at MAX_FILES (filesystem order), so on a very large
|
|
59
|
+
# monorepo recon may miss files. Surface it loudly rather than implying full coverage.
|
|
60
|
+
"files_truncated": bool(getattr(ctx, "truncated", False)),
|
|
61
|
+
"file_cap": MAX_FILES,
|
|
58
62
|
}
|
|
59
63
|
for ext in REGISTRY:
|
|
60
64
|
try:
|
|
@@ -21,6 +21,12 @@ from .base import Extractor, RepoContext
|
|
|
21
21
|
|
|
22
22
|
WRITE_VERBS = {"POST", "PUT", "PATCH", "DELETE"}
|
|
23
23
|
|
|
24
|
+
# endpoint_guards feeds the missing-auth ledger (findings.build_ledger), so capping it low was a
|
|
25
|
+
# silent coverage cliff: a big monorepo's unguarded write #401 never became a finding. Raised to
|
|
26
|
+
# cover realistic monorepos; truncation beyond this is DISCLOSED (endpoint_guards_truncated), never
|
|
27
|
+
# silent — mirrors constitution.py's "…and N more" pattern.
|
|
28
|
+
_MAX_ENDPOINT_GUARDS = 5000
|
|
29
|
+
|
|
24
30
|
GUARD = re.compile(
|
|
25
31
|
r"requireAuth|requirePermission|requireRole|requireGroupAccess|isAuthenticated|"
|
|
26
32
|
r"@login_required|@jwt_required|@permission_required|@roles_required|ensureAuth|"
|
|
@@ -181,7 +187,8 @@ class AuthzExtractor(Extractor):
|
|
|
181
187
|
"roles_detected": sorted(r for r in roles if r),
|
|
182
188
|
"guard_summary": {"with_visible_guard": protected,
|
|
183
189
|
"no_visible_guard": no_guard, "unknown": unknown},
|
|
184
|
-
"endpoint_guards": egs[:
|
|
190
|
+
"endpoint_guards": egs[:_MAX_ENDPOINT_GUARDS],
|
|
191
|
+
"endpoint_guards_truncated": max(0, len(egs) - _MAX_ENDPOINT_GUARDS),
|
|
185
192
|
"write_endpoints_without_visible_guard": sorted(set(no_guard_writes))[:60],
|
|
186
193
|
"unsafe_auth_decoders": unsafe_decoders[:30],
|
|
187
194
|
"unverified_signature_routes": unverified_routes,
|
|
@@ -31,6 +31,31 @@ MAX_FILES = 12000
|
|
|
31
31
|
MAX_BYTES = 2_000_000
|
|
32
32
|
|
|
33
33
|
|
|
34
|
+
def path_in_skip_dir(path: str, root: "Path | str | None" = None) -> bool:
|
|
35
|
+
"""True if `path` lies under a SKIP_DIR segment, measured RELATIVE to the scan root.
|
|
36
|
+
|
|
37
|
+
Checking the ABSOLUTE path's segments is the bug-005/bug-066 trap: when the scanned repo
|
|
38
|
+
itself lives under a skip-named ancestor (e.g. `.claude/worktrees/<id>`, `vendor/`,
|
|
39
|
+
`target/`, `~/.cache`), a segment ABOVE the root matches and the WHOLE tree — every route,
|
|
40
|
+
every finding — is silently dropped. Noir + the static scanners emit ABSOLUTE paths, so any
|
|
41
|
+
traversal that post-filters their output MUST strip the root prefix first (the walker already
|
|
42
|
+
does, via relative_to). Fail OPEN (keep the item) when the path can't be made relative — a
|
|
43
|
+
silent drop is the dangerous direction for a security tool. `root=None` preserves the legacy
|
|
44
|
+
raw-segment behavior for already-relative inputs.
|
|
45
|
+
"""
|
|
46
|
+
p = (path or "").replace("\\", "/")
|
|
47
|
+
if not p:
|
|
48
|
+
return False
|
|
49
|
+
if root is not None:
|
|
50
|
+
try:
|
|
51
|
+
p = Path(path).resolve().relative_to(Path(root).resolve()).as_posix()
|
|
52
|
+
except (ValueError, OSError):
|
|
53
|
+
if Path(p).is_absolute():
|
|
54
|
+
return False # absolute but outside the root → don't risk a false drop
|
|
55
|
+
# else: already a root-relative path → check its segments as-is below
|
|
56
|
+
return any(part in SKIP_DIRS for part in p.split("/"))
|
|
57
|
+
|
|
58
|
+
|
|
34
59
|
class RepoContext:
|
|
35
60
|
"""Walk the tree once; cache file text; serve cheap queries to every extractor."""
|
|
36
61
|
|
|
@@ -47,9 +72,11 @@ class RepoContext:
|
|
|
47
72
|
|
|
48
73
|
def _walk(self) -> None:
|
|
49
74
|
n = 0
|
|
75
|
+
self.truncated = False # set when MAX_FILES is hit → recon is PARTIAL, surface it
|
|
50
76
|
for p in self.root.rglob("*"):
|
|
51
77
|
if n >= MAX_FILES:
|
|
52
|
-
|
|
78
|
+
self.truncated = True # rglob order is filesystem-dependent → which files drop is
|
|
79
|
+
break # nondeterministic; the consumer MUST know coverage is partial
|
|
53
80
|
# match SKIP_DIRS against parts RELATIVE to the scan root — otherwise a
|
|
54
81
|
# repo located under e.g. ~/.cache or any dir named like a skip-dir would
|
|
55
82
|
# have its whole tree skipped.
|
{websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/integrations.py
RENAMED
|
@@ -14,10 +14,16 @@ from pathlib import Path
|
|
|
14
14
|
from .base import Extractor, RepoContext
|
|
15
15
|
|
|
16
16
|
WEBHOOK_PATH = re.compile(r"webhook|/hook|/callback|/inbound", re.I)
|
|
17
|
+
# Signals that a handler ACTUALLY verifies an inbound signature. The bare word `signature` used to
|
|
18
|
+
# be here and was over-broad: a comment like "no signature verification" — or any stray mention —
|
|
19
|
+
# SUPPRESSED the finding (a false negative, the worst failure for a security tool). Keep crypto
|
|
20
|
+
# primitives, known signature HEADER names (reading one implies verification intent), webhook
|
|
21
|
+
# libraries, and VERB-prefixed signature idioms (verify/check/validate/compute…Signature) — drop
|
|
22
|
+
# the standalone word. Erring toward MORE flagging is the safe direction; the human verifies.
|
|
17
23
|
SIG_VERIFY = re.compile(
|
|
18
|
-
r"createHmac|\bhmac\b|timingSafeEqual|
|
|
19
|
-
r"
|
|
20
|
-
r"
|
|
24
|
+
r"createHmac|\bhmac\b|timingSafeEqual|X-Hub-Signature|X-Signature|Stripe-Signature|"
|
|
25
|
+
r"\bsvix\b|constant_time_compare|compare_digest|verifyWebhook|webhookSecret|"
|
|
26
|
+
r"(?:verif|check|validate|assert|compute|expected|valid)\w*[_-]?[Ss]ignature", re.I)
|
|
21
27
|
|
|
22
28
|
SDKS = {"stripe": "Stripe", "twilio": "Twilio", "@sendgrid": "SendGrid", "messagebird": "MessageBird/Bird",
|
|
23
29
|
"@slack": "Slack", "openai": "OpenAI", "@anthropic": "Anthropic", "octokit": "GitHub",
|
|
@@ -25,7 +25,7 @@ import subprocess
|
|
|
25
25
|
import tempfile
|
|
26
26
|
from pathlib import Path
|
|
27
27
|
|
|
28
|
-
from .base import SKIP_DIRS, Extractor, RepoContext
|
|
28
|
+
from .base import SKIP_DIRS, Extractor, RepoContext, path_in_skip_dir
|
|
29
29
|
|
|
30
30
|
# Noir is a subprocess that scans the raw tree — it does NOT know the walker's SKIP_DIRS,
|
|
31
31
|
# so without this it grinds through (and emits routes from) build output (.next, cdk.out,
|
|
@@ -35,8 +35,11 @@ from .base import SKIP_DIRS, Extractor, RepoContext
|
|
|
35
35
|
_NOIR_SKIP_GLOBS = ",".join(f"**/{d}/**" for d in sorted(SKIP_DIRS))
|
|
36
36
|
|
|
37
37
|
|
|
38
|
-
def _in_skip_dir(code_path: str) -> bool:
|
|
39
|
-
|
|
38
|
+
def _in_skip_dir(code_path: str, root=None) -> bool:
|
|
39
|
+
# Delegates to the shared, root-relative helper. Noir emits ABSOLUTE code_paths, so we MUST
|
|
40
|
+
# pass the scan root — otherwise a repo under a skip-named ancestor (e.g. .claude/worktrees,
|
|
41
|
+
# vendor/, target/) has EVERY route dropped (bug-005 recurrence; proven on a `target/` path).
|
|
42
|
+
return path_in_skip_dir(code_path, root)
|
|
40
43
|
|
|
41
44
|
WRITE_VERBS = {"POST", "PUT", "PATCH", "DELETE"}
|
|
42
45
|
EXCLUDE_GLOBS = "*.test.ts,*.test.tsx,*.spec.ts,*.test.js,*.spec.js,*_test.go,*_test.py,test_*.py,*.stories.tsx"
|
|
@@ -223,10 +226,14 @@ class RoutesExtractor(Extractor):
|
|
|
223
226
|
|
|
224
227
|
def extract(self, ctx: RepoContext, facts: dict) -> dict:
|
|
225
228
|
eps = _noir_scan(ctx.root, getattr(ctx, "excludes", None))
|
|
226
|
-
if eps
|
|
229
|
+
if eps: # noir ran AND found routes
|
|
227
230
|
routes, spec_derived = _normalize_noir(eps)
|
|
228
231
|
engine = "noir"
|
|
229
|
-
|
|
232
|
+
elif eps is not None: # noir ran but found ZERO — back it up with the regex
|
|
233
|
+
fb = _fallback(ctx) # pass so a framework noir can't parse doesn't become a
|
|
234
|
+
routes, spec_derived = fb, [] # silent blind spot (0 routes → no authz, no probes)
|
|
235
|
+
engine = "noir (0 routes) → regex-fallback backstop" if fb else "noir (0 routes)"
|
|
236
|
+
else: # noir absent
|
|
230
237
|
routes, spec_derived = _fallback(ctx), []
|
|
231
238
|
engine = "regex-fallback (install OWASP Noir for full coverage: brew install noir)"
|
|
232
239
|
# honor user --exclude against route code_paths too (Noir's own --exclude-path glob is
|
|
@@ -234,8 +241,10 @@ class RoutesExtractor(Extractor):
|
|
|
234
241
|
if getattr(ctx, "excludes", None):
|
|
235
242
|
routes = [r for r in routes if not ctx._excluded(r.get("code_path", ""))]
|
|
236
243
|
# Noir doesn't honor SKIP_DIRS — drop any route it found under build output / deps /
|
|
237
|
-
# nested worktrees (e.g. .claude/worktrees/* doubling the whole app).
|
|
238
|
-
|
|
244
|
+
# nested worktrees (e.g. .claude/worktrees/* doubling the whole app). Pass ctx.root so
|
|
245
|
+
# SKIP_DIRS is matched RELATIVE to the scan root (a skip-named ANCESTOR must not nuke
|
|
246
|
+
# the whole route list).
|
|
247
|
+
routes = [r for r in routes if not _in_skip_dir(r.get("code_path", ""), ctx.root)]
|
|
239
248
|
by_method: dict = {}
|
|
240
249
|
by_tech: dict = {}
|
|
241
250
|
for r in routes:
|
|
@@ -33,6 +33,9 @@ STANDARDS = {
|
|
|
33
33
|
"ssrf": (["CWE-918 SSRF"], "ASVS V12.6", ["API7:2023 SSRF"]),
|
|
34
34
|
"secret": (["CWE-798 Hard-coded Credentials"], "ASVS V2.10", ["API8:2023 Misconfiguration"]),
|
|
35
35
|
"sqli": (["CWE-89 SQL Injection"], "ASVS V5.3.4", ["API8:2023"]),
|
|
36
|
+
"nosql-injection": (["CWE-943 Improper Neutralization of Data within a Query"], "ASVS V5.3.4", ["API8:2023"]),
|
|
37
|
+
"redos": (["CWE-1333 Inefficient Regular Expression Complexity (ReDoS)"], "ASVS V5.2.4", []),
|
|
38
|
+
"eval-injection": (["CWE-95 Eval Injection", "CWE-94 Code Injection"], "ASVS V5.2.4", []),
|
|
36
39
|
"command-injection": (["CWE-78 OS Command Injection"], "ASVS V5.3.8", []),
|
|
37
40
|
"path-traversal": (["CWE-22 Path Traversal"], "ASVS V12.3", []),
|
|
38
41
|
"ssti": (["CWE-1336 SSTI"], "ASVS V5.2.5", []),
|
|
@@ -41,6 +44,9 @@ STANDARDS = {
|
|
|
41
44
|
"xxe": (["CWE-611 XXE"], "ASVS V5.5.2", []),
|
|
42
45
|
"prototype-pollution": (["CWE-1321 Prototype Pollution"], "ASVS V5.1", []),
|
|
43
46
|
"mass-assignment": (["CWE-915 Mass Assignment"], "ASVS V5.1.2", ["API3:2023 BOPLA"]),
|
|
47
|
+
"webhook-forgery": (["CWE-345 Insufficient Verification of Data Authenticity",
|
|
48
|
+
"CWE-347 Improper Verification of Cryptographic Signature"],
|
|
49
|
+
"ASVS V13.4", ["API8:2023 Misconfiguration"]),
|
|
44
50
|
"cve": (["CWE-1395 Vulnerable Dependency"], "ASVS V14.2.1", ["API8:2023"]),
|
|
45
51
|
"iac": (["CWE-1188 Insecure Default"], "ASVS V14.1", []),
|
|
46
52
|
"client-exposure": (["CWE-200 Information Exposure"], "ASVS V14.3", []),
|
|
@@ -69,10 +75,19 @@ REMEDIATION = {
|
|
|
69
75
|
"missing-auth": "Add an auth guard to the handler (e.g. requireAuth()/getServerSession()), or a "
|
|
70
76
|
"middleware matcher over /api/(.*) with an explicit public allowlist so it can't be forgotten.",
|
|
71
77
|
"bola": "Enforce object ownership: verify the authenticated principal owns/can access the resource id (tenant scope).",
|
|
78
|
+
"webhook-forgery": "Verify the provider's signature (HMAC over the RAW body, constant-time compare) before "
|
|
79
|
+
"processing, reject stale timestamps / replays, and fail closed when the signature header "
|
|
80
|
+
"is absent — don't trust an unsigned inbound webhook.",
|
|
72
81
|
"unsafe-auth-decoder": "Verify the token/signature before trusting it for an auth/identity decision — use a "
|
|
73
82
|
"verifying decode (e.g. jwt.verify with the key / a checked session), never an *Unsafe* "
|
|
74
83
|
"or decode-only path whose output then feeds requireAuth/requireAdmin.",
|
|
75
84
|
"ssrf": "Validate + allowlist outbound URLs; block RFC1918/IMDS/file://; never fetch a raw user-supplied URL.",
|
|
85
|
+
"nosql-injection": "Never pass raw req.body into a query/operator position; reject $-prefixed keys, use a typed "
|
|
86
|
+
"query builder or schema validation, and cast expected types before querying.",
|
|
87
|
+
"redos": "Bound the regex (no nested/ambiguous quantifiers), cap input length, or use a linear-time engine "
|
|
88
|
+
"(RE2) — and never build a pattern from unsanitized user input.",
|
|
89
|
+
"eval-injection": "Remove eval()/new Function()/exec on user input; use a safe parser, a typed dispatch table, "
|
|
90
|
+
"or an explicit allowlist of operations instead.",
|
|
76
91
|
"secret": "Rotate the credential, remove from code/history, load from a secrets manager.",
|
|
77
92
|
"cve": "Upgrade the dependency to the fixed version.",
|
|
78
93
|
"iac": "Apply the hardening (non-root user, pin actions to a SHA, enforce TLS, etc.).",
|
|
@@ -105,6 +120,12 @@ SEV_RANK = {"CRITICAL": 4, "HIGH": 3, "MEDIUM": 2, "LOW": 1, "INFO": 0}
|
|
|
105
120
|
CONF_RANK = {"HIGH": 2, "MEDIUM": 1, "LOW": 0}
|
|
106
121
|
WRITE_VERBS = {"POST", "PUT", "PATCH", "DELETE"}
|
|
107
122
|
|
|
123
|
+
# surface.py sink keys → STANDARDS/attack-class keys where they differ, so a sink cites its SPECIFIC
|
|
124
|
+
# CWE instead of falling back to the generic "sast" (CWE-710). sql-injection is the high-value case
|
|
125
|
+
# (surface.py emits `sql-injection`; STANDARDS keys it `sqli`). nosql-injection/redos/eval-injection
|
|
126
|
+
# now have their own STANDARDS entries, so they resolve directly.
|
|
127
|
+
_SINK_ATTACK = {"sql-injection": "sqli"}
|
|
128
|
+
|
|
108
129
|
|
|
109
130
|
def _cite(cls):
|
|
110
131
|
cwe, asvs, api = STANDARDS.get(cls, ([], "", []))
|
|
@@ -241,8 +262,11 @@ def build_ledger(facts: dict, unified: dict | None, dynamic: dict | None = None,
|
|
|
241
262
|
+ " Confirm reachability with the forged-token / hs256 probe (it seeds this literal)."}]))
|
|
242
263
|
|
|
243
264
|
# ---- 2. Static scanner findings (de-duplicated `unified`) ----
|
|
265
|
+
# Consume the FULL ranked set (`all`), not the briefing's short `top` slice — else a
|
|
266
|
+
# HIGH/CRITICAL CVE/secret ranked #16+ never reaches the ledger/REPORT/calibration. Falls
|
|
267
|
+
# back to `top` for older callers/tests that only pass that key.
|
|
244
268
|
cat_to_class = {"sca": "cve", "secret": "secret", "iac": "iac", "sast": "sast"}
|
|
245
|
-
for t in (unified or {}).get("top", []):
|
|
269
|
+
for t in ((unified or {}).get("all") or (unified or {}).get("top", [])):
|
|
246
270
|
cat = t.get("category", "")
|
|
247
271
|
cls = cat_to_class.get(cat, "sast")
|
|
248
272
|
sev = t.get("severity", "MEDIUM")
|
|
@@ -257,8 +281,13 @@ def build_ledger(facts: dict, unified: dict | None, dynamic: dict | None = None,
|
|
|
257
281
|
# down-rank them (the inflation the field test flagged) rather than ranking them MEDIUM.
|
|
258
282
|
_ds = {d.lower() for d in (facts.get("stack", {}).get("datastores") or [])}
|
|
259
283
|
_nosql = {"dynamodb", "dynamo", "mongodb", "mongo", "firestore", "cosmos", "cosmosdb", "couchdb", "cassandra"}
|
|
260
|
-
|
|
261
|
-
|
|
284
|
+
# Include the ORM-ish labels stack.py actually emits (prisma(sql)/sql-orm) — and treat any label
|
|
285
|
+
# CONTAINING "sql" (but not "nosql") as SQL — so a SQL-ORM app + Mongo isn't misread as nosql-only
|
|
286
|
+
# and its SQLi findings wrongly down-ranked.
|
|
287
|
+
_sql = {"postgres", "postgresql", "mysql", "mariadb", "sqlite", "mssql", "sqlserver", "aurora",
|
|
288
|
+
"oracle", "cockroach", "prisma(sql)", "sql-orm"}
|
|
289
|
+
has_sql = bool(_ds & _sql) or any("sql" in d and "nosql" not in d for d in _ds)
|
|
290
|
+
is_nosql_only = bool(_ds & _nosql) and not has_sql
|
|
262
291
|
for cls, info in (facts.get("surface", {}).get("sinks", {}) or {}).items():
|
|
263
292
|
sev = "MEDIUM"
|
|
264
293
|
if cls == "error-disclosure":
|
|
@@ -275,7 +304,8 @@ def build_ledger(facts: dict, unified: dict | None, dynamic: dict | None = None,
|
|
|
275
304
|
if cls == "ssrf-outbound-http":
|
|
276
305
|
sev = "LOW" # var-arg only — weaker than the user-gated `ssrf` class
|
|
277
306
|
else:
|
|
278
|
-
|
|
307
|
+
_acls = _SINK_ATTACK.get(cls, cls)
|
|
308
|
+
attack = _acls if _acls in STANDARDS else "sast"
|
|
279
309
|
ev = [{"layer": "recon", "detail": f"user-input-gated {cls} in {info.get('count')} file(s)"}]
|
|
280
310
|
if cls in ("sqli", "sql-injection") and is_nosql_only:
|
|
281
311
|
sev = "LOW"
|
|
@@ -348,7 +378,19 @@ def build_ledger(facts: dict, unified: dict | None, dynamic: dict | None = None,
|
|
|
348
378
|
(_ci.get("sensitive_display") or ["client"])[0],
|
|
349
379
|
[{"layer": "recon", "detail": fnd.get("detail", "")}]))
|
|
350
380
|
|
|
351
|
-
# ---- 9.
|
|
381
|
+
# ---- 9. Inbound webhooks with no signature verification (forgery / replay) ----
|
|
382
|
+
# Recon found webhook handlers with no HMAC/signature check. This was surfaced in the briefing
|
|
383
|
+
# but — alone among the recon signals — never entered the ranked, calibrated ledger. Wire it in
|
|
384
|
+
# for parity (MEDIUM: heuristic — the check may live in middleware, so verify).
|
|
385
|
+
for wh in (facts.get("integrations", {}) or {}).get("webhooks_without_sig_verification", []):
|
|
386
|
+
out.append(_f(f"Webhook without signature verification: {wh}", "integrations",
|
|
387
|
+
"webhook-forgery", "MEDIUM", "MEDIUM", wh,
|
|
388
|
+
[{"layer": "recon", "detail": "no signature-verification code (HMAC / timingSafeEqual / "
|
|
389
|
+
"Stripe-Signature / svix / compare_digest) found in this webhook handler — a forged or "
|
|
390
|
+
"replayed request could be processed as authentic. Confirm it isn't handled in middleware, "
|
|
391
|
+
"then run the webhook-forgery probe."}]))
|
|
392
|
+
|
|
393
|
+
# ---- 10. Upload security — polyglot / MIME-spoof / serve-side stored XSS (PTREQ0013000 #2b) ----
|
|
352
394
|
for fnd in (facts.get("upload_security", {}) or {}).get("findings", []):
|
|
353
395
|
kind = fnd.get("kind", "")
|
|
354
396
|
cls = "content-sniffing" if kind == "serve-no-nosniff" else "unrestricted-upload"
|
|
@@ -356,7 +398,7 @@ def build_ledger(facts: dict, unified: dict | None, dynamic: dict | None = None,
|
|
|
356
398
|
fnd.get("severity", "MEDIUM"), "MEDIUM", fnd.get("file", ""),
|
|
357
399
|
[{"layer": "recon", "detail": fnd.get("detail", "")}]))
|
|
358
400
|
|
|
359
|
-
# ----
|
|
401
|
+
# ---- 11. PII output-boundary — unmasked customer data + dead masking controls (#8) ----
|
|
360
402
|
for fnd in (facts.get("pii_exposure", {}) or {}).get("findings", []):
|
|
361
403
|
out.append(_f(f"{fnd.get('kind')}: {fnd.get('file')}", "pii", "pii-exposure",
|
|
362
404
|
fnd.get("severity", "MEDIUM"), "MEDIUM", fnd.get("file", ""),
|
|
@@ -20,7 +20,7 @@ import subprocess
|
|
|
20
20
|
from dataclasses import dataclass
|
|
21
21
|
from pathlib import Path
|
|
22
22
|
|
|
23
|
-
from .extractors.base import SKIP_DIRS
|
|
23
|
+
from .extractors.base import SKIP_DIRS, path_in_skip_dir
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
@dataclass(frozen=True)
|
|
@@ -45,9 +45,14 @@ class Scanner:
|
|
|
45
45
|
EXCLUDE_DIRS = tuple(sorted(SKIP_DIRS))
|
|
46
46
|
|
|
47
47
|
|
|
48
|
-
def _in_skip_dir(path: str) -> bool:
|
|
49
|
-
"""True if
|
|
50
|
-
|
|
48
|
+
def _in_skip_dir(path: str, root=None) -> bool:
|
|
49
|
+
"""True if `path` is under a SKIP_DIR, measured RELATIVE to the scan `root` when given.
|
|
50
|
+
|
|
51
|
+
Delegates to the shared helper. Trivy/Semgrep can emit ABSOLUTE paths, so pass `target`
|
|
52
|
+
(the scanned repo) or a repo living under a skip-named ancestor has its real findings
|
|
53
|
+
dropped as 'contamination' (bug-005/066 recurrence). `root=None` keeps the legacy
|
|
54
|
+
raw-segment behavior for relative inputs (and the existing single-arg unit test)."""
|
|
55
|
+
return path_in_skip_dir(path, root)
|
|
51
56
|
|
|
52
57
|
|
|
53
58
|
def _trivy(target: Path, out: Path, excludes=()) -> list:
|
|
@@ -355,7 +360,7 @@ def normalize_findings(scan_results: list, outdir: Path, target: Path | None = N
|
|
|
355
360
|
# build output, the tool's own websec-out) → drop anything under a SKIP_DIR. The
|
|
356
361
|
# correctness guarantee behind the best-effort flags; also catches gitleaks (no skip flag).
|
|
357
362
|
before = len(raw)
|
|
358
|
-
raw = [f for f in raw if not _in_skip_dir(f.get("file", ""))]
|
|
363
|
+
raw = [f for f in raw if not _in_skip_dir(f.get("file", ""), target)]
|
|
359
364
|
contamination_dropped = before - len(raw)
|
|
360
365
|
|
|
361
366
|
# bug-066 (b): working-tree secrets (trivy fs) in GITIGNORED files are local-only / never
|
|
@@ -392,11 +397,18 @@ def normalize_findings(scan_results: list, outdir: Path, target: Path | None = N
|
|
|
392
397
|
for f in deduped:
|
|
393
398
|
by_sev[f["severity"]] = by_sev.get(f["severity"], 0) + 1
|
|
394
399
|
by_cat[f["category"]] = by_cat.get(f["category"], 0) + 1
|
|
400
|
+
summaries = [{"severity": f["severity"], "category": f["category"], "title": f["title"],
|
|
401
|
+
"file": f["file"], "tools": f["tools"]} for f in deduped]
|
|
395
402
|
return {"total_raw": len(raw), "total": len(deduped),
|
|
396
403
|
"cross_tool_or_dup_merged": len(raw) - len(deduped),
|
|
397
404
|
"contamination_dropped": contamination_dropped,
|
|
398
405
|
"local_only_downgraded": local_only_downgraded,
|
|
399
406
|
"by_severity": by_sev, "by_category": by_cat,
|
|
400
|
-
|
|
401
|
-
|
|
407
|
+
# `top` = a short slice for the human briefing; `all` = the FULL ranked set the
|
|
408
|
+
# findings ledger consumes. The ledger must NOT silently drop a HIGH/CRITICAL static
|
|
409
|
+
# finding ranked #16+ — that undercounted the ledger + calibration on scan-heavy repos
|
|
410
|
+
# while the CLI printed ledger.total as if complete. (cli excludes `all` from manifest
|
|
411
|
+
# to avoid duplicating findings.json.)
|
|
412
|
+
"top": summaries[:15],
|
|
413
|
+
"all": summaries}
|
|
402
414
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: websec-validator
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app.
|
|
5
5
|
Author: Ricardo Accioly
|
|
6
6
|
License: MIT
|
|
@@ -16,7 +16,7 @@ from unittest import mock
|
|
|
16
16
|
ROOT = Path(__file__).resolve().parents[1]
|
|
17
17
|
sys.path.insert(0, str(ROOT / "src"))
|
|
18
18
|
|
|
19
|
-
from websec_validator import dynamic, findings, probes, scanners # noqa: E402
|
|
19
|
+
from websec_validator import calibration, dynamic, findings, probes, scanners # noqa: E402
|
|
20
20
|
from websec_validator.extractors.auth import AuthExtractor # noqa: E402
|
|
21
21
|
from websec_validator.extractors.authz import AuthzExtractor # noqa: E402
|
|
22
22
|
from websec_validator.extractors.base import RepoContext # noqa: E402
|
|
@@ -79,6 +79,45 @@ class ScannerHygieneTests(unittest.TestCase):
|
|
|
79
79
|
self.assertTrue(scanners._in_skip_dir("node_modules/dep/a.js"))
|
|
80
80
|
self.assertFalse(scanners._in_skip_dir("src/app/api/route.ts"))
|
|
81
81
|
|
|
82
|
+
def test_skipdir_matched_relative_to_root_not_absolute(self):
|
|
83
|
+
# Regression: a repo living UNDER a skip-named ANCESTOR (.claude/worktrees, vendor/,
|
|
84
|
+
# target/) had every absolute-path route/finding silently dropped, because SKIP_DIRS
|
|
85
|
+
# was matched against the ABSOLUTE path's segments (bug-005 recurrence). Match relative
|
|
86
|
+
# to the scan root instead. Proven empirically: identical fixture → 2 routes at a clean
|
|
87
|
+
# path, 0 routes under a `target/` ancestor.
|
|
88
|
+
from websec_validator.extractors.base import path_in_skip_dir
|
|
89
|
+
with tempfile.TemporaryDirectory() as d:
|
|
90
|
+
root = Path(d) / "target" / "app" # 'target' is a SKIP_DIR — but it's an ANCESTOR
|
|
91
|
+
(root / "src").mkdir(parents=True)
|
|
92
|
+
real = root / "src" / "routes.js"
|
|
93
|
+
real.write_text("x")
|
|
94
|
+
self.assertIn("target", str(real).split("/")) # the trap segment is present...
|
|
95
|
+
self.assertFalse(path_in_skip_dir(str(real), root)) # ...but NOT below the root → keep it
|
|
96
|
+
nm = root / "node_modules" / "dep.js" # a genuine skip-dir BELOW the root
|
|
97
|
+
nm.parent.mkdir(parents=True)
|
|
98
|
+
nm.write_text("x")
|
|
99
|
+
self.assertTrue(path_in_skip_dir(str(nm), root)) # still correctly skipped
|
|
100
|
+
# backward-compat: no root → legacy raw-segment behavior (single-arg call sites/tests)
|
|
101
|
+
self.assertTrue(path_in_skip_dir("node_modules/dep/a.js"))
|
|
102
|
+
self.assertFalse(path_in_skip_dir("src/app/api/route.ts"))
|
|
103
|
+
|
|
104
|
+
def test_normalize_keeps_findings_when_repo_under_skipdir_ancestor(self):
|
|
105
|
+
# End-to-end consequence: a trivy finding with an ABSOLUTE path under a skip-named
|
|
106
|
+
# ancestor must SURVIVE when `target` is that repo root (else real secrets vanish on
|
|
107
|
+
# anyone whose repo lives under e.g. ~/dev/vendor-portal/ or a .claude worktree).
|
|
108
|
+
with tempfile.TemporaryDirectory() as d:
|
|
109
|
+
root = Path(d) / "vendor" / "app" # 'vendor' ancestor
|
|
110
|
+
(root / "src").mkdir(parents=True)
|
|
111
|
+
abs_file = str(root / "src" / "config.ts")
|
|
112
|
+
trivy = {"Results": [{"Target": abs_file, "Secrets": [
|
|
113
|
+
{"RuleID": "private-key", "Title": "k", "Match": "-----BEGIN", "StartLine": 1}]}]}
|
|
114
|
+
(root / "trivy.json").write_text(json.dumps(trivy))
|
|
115
|
+
res = [{"key": "trivy", "output": str(root / "trivy.json"), "name": "Trivy", "category": "sca"}]
|
|
116
|
+
summary = scanners.normalize_findings(res, root, target=root)
|
|
117
|
+
files = [f["file"] for f in json.loads((root / "findings.json").read_text())]
|
|
118
|
+
self.assertIn(abs_file, files) # NOT dropped despite the 'vendor' ancestor
|
|
119
|
+
self.assertEqual(summary["contamination_dropped"], 0)
|
|
120
|
+
|
|
82
121
|
def test_exclude_dirs_includes_agent_tooling(self):
|
|
83
122
|
self.assertIn(".claude", scanners.EXCLUDE_DIRS)
|
|
84
123
|
self.assertIn(".worktrees", scanners.EXCLUDE_DIRS)
|
|
@@ -124,6 +163,51 @@ class ScannerHygieneTests(unittest.TestCase):
|
|
|
124
163
|
self.assertEqual(summary["local_only_downgraded"], 1)
|
|
125
164
|
|
|
126
165
|
|
|
166
|
+
class CrossTenantNumericIdTests(unittest.TestCase):
|
|
167
|
+
def test_numeric_tenant_id_does_not_crash(self):
|
|
168
|
+
# fix #6: tenant ids are often numeric (auto-increment); str.replace's 2nd arg must be a str,
|
|
169
|
+
# so an int tenant would crash this authenticated path uncaught. Coerce with str().
|
|
170
|
+
cfg = {"target": "http://t", "tenant_path_param": "groupId", "roles": {}}
|
|
171
|
+
facts = {"routes": {"endpoints": [{"method": "GET", "path": "/api/groups/{groupId}/items"}]}}
|
|
172
|
+
captured = []
|
|
173
|
+
|
|
174
|
+
def fake_mint(c, role):
|
|
175
|
+
return {"token": f"tok-{role}", "tenant": 1 if role == "agentA" else 2, "email": f"{role}@x"}
|
|
176
|
+
|
|
177
|
+
def fake_request(method, url, token=None, timeout=20, data=None, cookie=None):
|
|
178
|
+
captured.append(url)
|
|
179
|
+
return 403, "x"
|
|
180
|
+
with mock.patch.object(dynamic, "mint", fake_mint), mock.patch.object(dynamic, "_request", fake_request):
|
|
181
|
+
r = dynamic.cross_tenant_bola(cfg, facts)
|
|
182
|
+
self.assertNotIn("error", r) # numeric ids didn't crash the replace
|
|
183
|
+
self.assertTrue(any(u.endswith("/api/groups/2/items") for u in captured)) # int coerced into the path
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class WriteAuthEnforcement500Tests(unittest.TestCase):
|
|
187
|
+
def test_500_is_inconclusive_not_no_auth_gate(self):
|
|
188
|
+
# a 500 may be the AUTH layer throwing, not the handler running unauth — must NOT become a
|
|
189
|
+
# no-auth-gate verdict (would escalate to a HIGH missing-auth finding AND poison the
|
|
190
|
+
# calibration oracle with a confirmed-real sample). Matches the forged-token engine.
|
|
191
|
+
facts = {"routes": {"endpoints": [{"method": "POST", "path": "/api/x"}]}}
|
|
192
|
+
|
|
193
|
+
def fake(method, url, token=None, timeout=20, data=None, cookie=None):
|
|
194
|
+
return 500, "err"
|
|
195
|
+
with mock.patch.object(dynamic, "_request", fake):
|
|
196
|
+
r = dynamic.write_auth_enforcement("http://t", facts)
|
|
197
|
+
self.assertEqual(r["results"][0]["verdict"], "http-500") # inconclusive, not no-auth-gate
|
|
198
|
+
self.assertEqual(r["no_auth_gate"], []) # so it feeds no missing-auth finding
|
|
199
|
+
self.assertEqual(calibration.samples_from_dynamic({"write_auth_enforcement": r}), []) # oracle clean
|
|
200
|
+
|
|
201
|
+
def test_400_still_no_auth_gate(self): # regression guard: real reached-handler codes unaffected
|
|
202
|
+
facts = {"routes": {"endpoints": [{"method": "POST", "path": "/api/y"}]}}
|
|
203
|
+
|
|
204
|
+
def fake(method, url, token=None, timeout=20, data=None, cookie=None):
|
|
205
|
+
return 400, "bad"
|
|
206
|
+
with mock.patch.object(dynamic, "_request", fake):
|
|
207
|
+
r = dynamic.write_auth_enforcement("http://t", facts)
|
|
208
|
+
self.assertTrue(r["results"][0]["verdict"].startswith("no-auth-gate"))
|
|
209
|
+
|
|
210
|
+
|
|
127
211
|
class ProbeRegistrationTests(unittest.TestCase):
|
|
128
212
|
def test_forged_token_always_staged(self):
|
|
129
213
|
self.assertIn("forged-token", probes.ALWAYS)
|
|
@@ -22,6 +22,7 @@ from websec_validator.extractors.authz import AuthzExtractor # noqa: E
|
|
|
22
22
|
from websec_validator.extractors.base import RepoContext # noqa: E402
|
|
23
23
|
from websec_validator.extractors.stack import StackExtractor # noqa: E402
|
|
24
24
|
from websec_validator.extractors.schemas import SchemasExtractor # noqa: E402
|
|
25
|
+
from websec_validator.extractors.integrations import IntegrationsExtractor # noqa: E402
|
|
25
26
|
from websec_validator.extractors.surface import SINKS, SurfaceExtractor # noqa: E402
|
|
26
27
|
from websec_validator.extractors.tenant import TenantExtractor # noqa: E402
|
|
27
28
|
|
|
@@ -95,6 +96,29 @@ class SchemasTests(unittest.TestCase):
|
|
|
95
96
|
self.assertEqual(out["orms"], [])
|
|
96
97
|
|
|
97
98
|
|
|
99
|
+
class IntegrationsTests(unittest.TestCase):
|
|
100
|
+
def _run(self, handler_src):
|
|
101
|
+
d = Path(tempfile.mkdtemp())
|
|
102
|
+
(d / "h.js").write_text(handler_src)
|
|
103
|
+
facts = {"routes": {"endpoints": [
|
|
104
|
+
{"method": "POST", "path": "/webhooks/stripe", "code_path": str(d / "h.js")}]}}
|
|
105
|
+
return IntegrationsExtractor().extract(RepoContext(d), facts)
|
|
106
|
+
|
|
107
|
+
def test_unverified_webhook_flagged_despite_signature_word_in_comment(self):
|
|
108
|
+
# the bare-word `signature` SIG_VERIFY alternative used to SUPPRESS the finding when a
|
|
109
|
+
# comment merely mentioned signatures — a false negative. Only real verification counts now.
|
|
110
|
+
out = self._run("// no signature verification here\n"
|
|
111
|
+
"router.post('/webhooks/stripe', (req,res)=>res.json({ok:1}));\n")
|
|
112
|
+
self.assertEqual(len(out["webhooks_without_sig_verification"]), 1)
|
|
113
|
+
|
|
114
|
+
def test_genuinely_verified_webhook_not_flagged(self):
|
|
115
|
+
out = self._run("const crypto=require('crypto');\n"
|
|
116
|
+
"router.post('/webhooks/stripe', (req,res)=>{\n"
|
|
117
|
+
" const h=crypto.createHmac('sha256',k).update(req.body).digest('hex');\n"
|
|
118
|
+
" if(h!==req.headers['stripe-signature']) return res.status(401).end();\n});\n")
|
|
119
|
+
self.assertEqual(out["webhooks_without_sig_verification"], [])
|
|
120
|
+
|
|
121
|
+
|
|
98
122
|
class CalibrationTests(unittest.TestCase):
|
|
99
123
|
def test_wilson_interval(self):
|
|
100
124
|
self.assertEqual(calibration.wilson(0, 0), (0.0, 1.0)) # no data → full ignorance
|
|
@@ -392,6 +416,54 @@ class LedgerTests(unittest.TestCase):
|
|
|
392
416
|
self.assertEqual(led["total"], 0)
|
|
393
417
|
self.assertEqual(led["suppressed"], 1)
|
|
394
418
|
|
|
419
|
+
def test_webhook_without_sig_enters_ledger(self):
|
|
420
|
+
# parity fix: unverified webhooks were surfaced in the briefing but never ranked/calibrated.
|
|
421
|
+
facts = {"integrations": {"webhooks_without_sig_verification": ["POST /webhooks/stripe (h.ts)"]}}
|
|
422
|
+
led = findings.build_ledger(facts, None, None, [])
|
|
423
|
+
hit = [f for f in led["findings"] if f["attack_class"] == "webhook-forgery"]
|
|
424
|
+
self.assertEqual(len(hit), 1)
|
|
425
|
+
self.assertEqual(hit[0]["severity"], "MEDIUM")
|
|
426
|
+
self.assertIn("CWE-345 Insufficient Verification of Data Authenticity", hit[0]["standards"]["cwe"])
|
|
427
|
+
self.assertTrue(hit[0]["remediation"])
|
|
428
|
+
|
|
429
|
+
def test_sink_attack_class_maps_to_specific_cwe(self):
|
|
430
|
+
# surface.py emits `sql-injection`/`nosql-injection`/`redos`/`eval-injection`; each must cite
|
|
431
|
+
# its SPECIFIC CWE, not fall back to the generic "sast" (CWE-710).
|
|
432
|
+
facts = {"surface": {"sinks": {
|
|
433
|
+
"sql-injection": {"count": 1, "files": ["a.ts"]},
|
|
434
|
+
"nosql-injection": {"count": 1, "files": ["b.ts"]},
|
|
435
|
+
"redos": {"count": 1, "files": ["c.ts"]},
|
|
436
|
+
"eval-injection": {"count": 1, "files": ["d.ts"]}}},
|
|
437
|
+
"stack": {"datastores": ["postgres"]}}
|
|
438
|
+
by = {f["title"]: f for f in findings.build_ledger(facts, None, None, [])["findings"]}
|
|
439
|
+
self.assertEqual(by["sql-injection sink (1 site(s))"]["attack_class"], "sqli")
|
|
440
|
+
self.assertIn("CWE-89 SQL Injection", by["sql-injection sink (1 site(s))"]["standards"]["cwe"])
|
|
441
|
+
self.assertEqual(by["nosql-injection sink (1 site(s))"]["attack_class"], "nosql-injection")
|
|
442
|
+
self.assertTrue(by["nosql-injection sink (1 site(s))"]["standards"]["cwe"][0].startswith("CWE-943"))
|
|
443
|
+
self.assertEqual(by["eval-injection sink (1 site(s))"]["attack_class"], "eval-injection")
|
|
444
|
+
# a specific remediation, not the generic default
|
|
445
|
+
self.assertNotEqual(by["redos sink (1 site(s))"]["remediation"], "Review and remediate per the cited standard.")
|
|
446
|
+
|
|
447
|
+
def test_sqli_not_downranked_when_sql_orm_present(self):
|
|
448
|
+
# fix #9: stack.py emits `sql-orm`/`prisma(sql)` labels; findings._sql must count them as SQL
|
|
449
|
+
# so a SQL-ORM + Mongo app isn't misread as nosql-only and its SQLi wrongly cut to LOW.
|
|
450
|
+
sinks = {"surface": {"sinks": {"sql-injection": {"count": 1, "files": ["db.ts"]}}}}
|
|
451
|
+
led = findings.build_ledger({**sinks, "stack": {"datastores": ["sql-orm", "mongo"]}}, None, None, [])
|
|
452
|
+
self.assertEqual([f for f in led["findings"] if "sql-injection" in f["title"]][0]["severity"], "MEDIUM")
|
|
453
|
+
led2 = findings.build_ledger({**sinks, "stack": {"datastores": ["mongo"]}}, None, None, [])
|
|
454
|
+
self.assertEqual([f for f in led2["findings"] if "sql-injection" in f["title"]][0]["severity"], "LOW")
|
|
455
|
+
|
|
456
|
+
def test_ledger_consumes_full_static_set_not_just_top15(self):
|
|
457
|
+
# 20 HIGH static findings → all 20 must reach the ledger (was silently capped at top-15,
|
|
458
|
+
# dropping HIGH/CRITICAL CVEs/secrets ranked #16+ from the ledger + calibration).
|
|
459
|
+
allf = [{"severity": "HIGH", "category": "sca", "title": f"CVE-{i}", "file": f"p{i}", "tools": ["trivy"]}
|
|
460
|
+
for i in range(20)]
|
|
461
|
+
led = findings.build_ledger({}, {"top": allf[:15], "all": allf}, None, [])
|
|
462
|
+
self.assertEqual(len([f for f in led["findings"] if f["attack_class"] == "cve"]), 20)
|
|
463
|
+
# back-compat: a caller passing only `top` still works
|
|
464
|
+
led2 = findings.build_ledger({}, {"top": allf[:15]}, None, [])
|
|
465
|
+
self.assertEqual(len([f for f in led2["findings"] if f["attack_class"] == "cve"]), 15)
|
|
466
|
+
|
|
395
467
|
|
|
396
468
|
if __name__ == "__main__":
|
|
397
469
|
unittest.main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/client_exposure.py
RENAMED
|
File without changes
|
|
File without changes
|
{websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/graphql.py
RENAMED
|
File without changes
|
|
File without changes
|
{websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/pii_exposure.py
RENAMED
|
File without changes
|
|
File without changes
|
{websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/schemas.py
RENAMED
|
File without changes
|
|
File without changes
|
{websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/surface.py
RENAMED
|
File without changes
|
|
File without changes
|
{websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/extractors/upload_security.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/_lib.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/s3-assess.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{websec_validator-0.4.0 → websec_validator-0.4.1}/src/websec_validator.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|