websec-validator 0.3.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {websec_validator-0.3.0/src/websec_validator.egg-info → websec_validator-0.4.0}/PKG-INFO +18 -14
- {websec_validator-0.3.0 → websec_validator-0.4.0}/README.md +17 -13
- {websec_validator-0.3.0 → websec_validator-0.4.0}/pyproject.toml +1 -1
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/briefing.py +22 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/__init__.py +4 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/client_integrity.py +32 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/graphql.py +26 -9
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/iac_ci.py +21 -6
- websec_validator-0.4.0/src/websec_validator/extractors/pii_exposure.py +98 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/policy_consistency.py +38 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/surface.py +13 -0
- websec_validator-0.4.0/src/websec_validator/extractors/upload_security.py +89 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/findings.py +48 -2
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/probes.py +14 -0
- websec_validator-0.4.0/src/websec_validator/templates/probes/password-reuse.sh +40 -0
- websec_validator-0.4.0/src/websec_validator/templates/probes/pii-output-diff.sh +48 -0
- websec_validator-0.4.0/src/websec_validator/templates/probes/upload-matrix.sh +44 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0/src/websec_validator.egg-info}/PKG-INFO +18 -14
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator.egg-info/SOURCES.txt +5 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/tests/test_pentest_regressions.py +126 -2
- {websec_validator-0.3.0 → websec_validator-0.4.0}/LICENSE +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/setup.cfg +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/__init__.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/calibration.json +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/calibration.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/cli.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/constitution.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/corpus.json +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/dynamic.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/auth.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/authz.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/base.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/client_exposure.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/integrations.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/routes.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/schemas.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/stack.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/tenant.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/proof.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/recon.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/report.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/rules/error-stack-disclosure.yml +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/rules/insecure-default-secret.yml +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/scanners.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/_lib.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/appsync-cswsh.sh +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/appsync-introspection.sh +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/appsync-subscription-bola.sh +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/bola-cross-tenant.sh +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/bola-write-verbs.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/client-integrity-checklist.sh +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/compare-roles.sh +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/dlp-bypass-offline.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/error-disclosure-probe.sh +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/forged-token.sh +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/hs256-brute-force.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/jwt-attacks.sh +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/mass-assignment.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/race-conditions.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/rate-limit-burst.sh +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/s3-assess.sh +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/ssrf-probes.sh +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/unauth-baseline.sh +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/webhook-forgery.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/reports/FINDINGS-SUMMARY.md.template +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/reports/access-control-matrix.md.template +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/reports/findings-triage.md.template +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/reports/pentest-handover-brief.md.template +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/reports/per-tool-FINDINGS.md.template +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator.egg-info/dependency_links.txt +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator.egg-info/entry_points.txt +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator.egg-info/top_level.txt +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/tests/test_hardening.py +0 -0
- {websec_validator-0.3.0 → websec_validator-0.4.0}/tests/test_recon.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: websec-validator
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app.
|
|
5
5
|
Author: Ricardo Accioly
|
|
6
6
|
License: MIT
|
|
@@ -82,7 +82,7 @@ Then point your agent at the output: **"Read `websec-out/AGENT-BRIEFING.md` and
|
|
|
82
82
|
|
|
83
83
|
> That's the whole user surface: **`run`** (plus the optional, advanced **`dynamic`** live-probing step below). `recon`/`proof`/`calibrate` exist for developing the tool itself and are hidden from `--help` — you never need them.
|
|
84
84
|
|
|
85
|
-
## What it extracts (
|
|
85
|
+
## What it extracts (15 deterministic extractors, no LLM)
|
|
86
86
|
|
|
87
87
|
| | Dimension | Notable output |
|
|
88
88
|
|---|---|---|
|
|
@@ -91,13 +91,15 @@ Then point your agent at the output: **"Read `websec-out/AGENT-BRIEFING.md` and
|
|
|
91
91
|
| auth | scheme + login surface + **insecure-default signing secrets** | multi-scheme; flags a hard-coded `JWT_SECRET \|\| 'dev-secret'` fallback (forgeable JWT) |
|
|
92
92
|
| **authz** | access-control map | guard coverage + **write endpoints with no visible guard** + roles |
|
|
93
93
|
| tenant | multi-tenancy key candidates | the BOLA boundary, by frequency |
|
|
94
|
-
| **password_policy** | cross-route
|
|
95
|
-
| surface | 14 sink classes |
|
|
94
|
+
| **password_policy** | cross-route consistency **+ reuse/history** | complexity drift across routes **+ a set-password path that hashes without a reuse check** |
|
|
95
|
+
| surface | 14 sink classes **+ redirect-SSRF** | user-input-gated sinks + var-arg SSRF + error-disclosure **+ follows-redirects-without-per-hop-guard** |
|
|
96
|
+
| **upload_security** | unrestricted upload + unsafe serve | deny-list-only, stored-name-from-filename, trust-client-MIME, accept-SVG, **serve without `nosniff`** |
|
|
96
97
|
| schemas | data models + **privileged fields** | Pydantic/SQLAlchemy/Django/Prisma/Mongoose/TypeORM/Zod → `role`/`isAdmin`/`groupId` for mass-assignment targeting |
|
|
97
|
-
| iac_ci | IaC + CI/CD | GHA injection, unpinned actions,
|
|
98
|
+
| iac_ci | IaC + CI/CD | GHA injection, unpinned actions, tfstate, **CDK AppSync `API_KEY` anonymous-default-auth + WAF-as-control smell** |
|
|
98
99
|
| client_exposure | browser leakage | public-var secrets by **name + value-shape (`da2-…`) + CDK build-injection**, server-secret-in-client, source maps |
|
|
99
|
-
| **client_integrity** | tamperable display
|
|
100
|
-
|
|
|
100
|
+
| **client_integrity** | tamperable display + **WS auth model** | wallet value without strict CSP / out-of-band anchor **+ the CSWSH determinant (ambient-cookie WS auth)** |
|
|
101
|
+
| **pii_exposure** | unmasked PII at the output boundary | `res.json(rawEntity)` with PII + **a masking control defined but with zero live call sites** (value-shape, not field-name) |
|
|
102
|
+
| graphql | GraphQL surface | introspection (**AppSync `introspectionConfig: DISABLED`-aware**) / playground / depth-limit **+ AppSync subscription-authz (cross-group BOLA)** |
|
|
101
103
|
| integrations | third-party + webhooks | webhooks missing signature verification |
|
|
102
104
|
|
|
103
105
|
Plus **derived targeting** — IDOR / SSRF / open-redirect / upload / write / auth-endpoint
|
|
@@ -206,13 +208,15 @@ publisher** with project `websec-validator`, owner `raccioly`, repo `websec-vali
|
|
|
206
208
|
|
|
207
209
|
## Status / roadmap
|
|
208
210
|
|
|
209
|
-
**Done:**
|
|
210
|
-
managed-AppSync / VTL boundary**
|
|
211
|
-
|
|
212
|
-
Semgrep rules**, tailored probe staging, agent briefing, traceable findings ledger
|
|
213
|
-
confidence (CJE — Wilson CIs)**, proof harness, test suite, **Docker bundle** (all
|
|
214
|
-
arch-aware), **dynamic phase v1** (authenticated read-only cross-tenant BOLA —
|
|
215
|
-
reproduced a hand-pentest's 14/14).
|
|
211
|
+
**Done:** 15-extractor recon (incl. schema/entity → mass-assignment targeting, the **AWS-CDK /
|
|
212
|
+
managed-AppSync / VTL boundary**, **upload-security** + **PII-output-boundary** + **redirect-SSRF**
|
|
213
|
+
+ **password-reuse** classes, and a **man-in-the-browser / tamperable-display** class), cross-tool
|
|
214
|
+
de-dup + **bundled Semgrep rules**, tailored probe staging, agent briefing, traceable findings ledger
|
|
215
|
+
with **calibrated confidence (CJE — Wilson CIs)**, proof harness, test suite, **Docker bundle** (all
|
|
216
|
+
scanners + Noir, arch-aware), **dynamic phase v1** (authenticated read-only cross-tenant BOLA —
|
|
217
|
+
validated live, reproduced a hand-pentest's 14/14). Validated against the **PTREQ0013000 pen test +
|
|
218
|
+
retest** (incl. correcting two findings the retest disproved: AppSync introspection *is* disablable
|
|
219
|
+
engine-level, and API_KEY-default is anonymous-auth, not CSWSH).
|
|
216
220
|
**Next:** dynamic write-verb BOLA + JWT/auth probes + ZAP/Nuclei two-role diff (gated, they mutate),
|
|
217
221
|
calibration on hand-labeled real repos (more representative base rate), ASVS index lookup, optional
|
|
218
222
|
model-SDK adapters for no-agent fallback.
|
|
@@ -70,7 +70,7 @@ Then point your agent at the output: **"Read `websec-out/AGENT-BRIEFING.md` and
|
|
|
70
70
|
|
|
71
71
|
> That's the whole user surface: **`run`** (plus the optional, advanced **`dynamic`** live-probing step below). `recon`/`proof`/`calibrate` exist for developing the tool itself and are hidden from `--help` — you never need them.
|
|
72
72
|
|
|
73
|
-
## What it extracts (
|
|
73
|
+
## What it extracts (15 deterministic extractors, no LLM)
|
|
74
74
|
|
|
75
75
|
| | Dimension | Notable output |
|
|
76
76
|
|---|---|---|
|
|
@@ -79,13 +79,15 @@ Then point your agent at the output: **"Read `websec-out/AGENT-BRIEFING.md` and
|
|
|
79
79
|
| auth | scheme + login surface + **insecure-default signing secrets** | multi-scheme; flags a hard-coded `JWT_SECRET \|\| 'dev-secret'` fallback (forgeable JWT) |
|
|
80
80
|
| **authz** | access-control map | guard coverage + **write endpoints with no visible guard** + roles |
|
|
81
81
|
| tenant | multi-tenancy key candidates | the BOLA boundary, by frequency |
|
|
82
|
-
| **password_policy** | cross-route
|
|
83
|
-
| surface | 14 sink classes |
|
|
82
|
+
| **password_policy** | cross-route consistency **+ reuse/history** | complexity drift across routes **+ a set-password path that hashes without a reuse check** |
|
|
83
|
+
| surface | 14 sink classes **+ redirect-SSRF** | user-input-gated sinks + var-arg SSRF + error-disclosure **+ follows-redirects-without-per-hop-guard** |
|
|
84
|
+
| **upload_security** | unrestricted upload + unsafe serve | deny-list-only, stored-name-from-filename, trust-client-MIME, accept-SVG, **serve without `nosniff`** |
|
|
84
85
|
| schemas | data models + **privileged fields** | Pydantic/SQLAlchemy/Django/Prisma/Mongoose/TypeORM/Zod → `role`/`isAdmin`/`groupId` for mass-assignment targeting |
|
|
85
|
-
| iac_ci | IaC + CI/CD | GHA injection, unpinned actions,
|
|
86
|
+
| iac_ci | IaC + CI/CD | GHA injection, unpinned actions, tfstate, **CDK AppSync `API_KEY` anonymous-default-auth + WAF-as-control smell** |
|
|
86
87
|
| client_exposure | browser leakage | public-var secrets by **name + value-shape (`da2-…`) + CDK build-injection**, server-secret-in-client, source maps |
|
|
87
|
-
| **client_integrity** | tamperable display
|
|
88
|
-
|
|
|
88
|
+
| **client_integrity** | tamperable display + **WS auth model** | wallet value without strict CSP / out-of-band anchor **+ the CSWSH determinant (ambient-cookie WS auth)** |
|
|
89
|
+
| **pii_exposure** | unmasked PII at the output boundary | `res.json(rawEntity)` with PII + **a masking control defined but with zero live call sites** (value-shape, not field-name) |
|
|
90
|
+
| graphql | GraphQL surface | introspection (**AppSync `introspectionConfig: DISABLED`-aware**) / playground / depth-limit **+ AppSync subscription-authz (cross-group BOLA)** |
|
|
89
91
|
| integrations | third-party + webhooks | webhooks missing signature verification |
|
|
90
92
|
|
|
91
93
|
Plus **derived targeting** — IDOR / SSRF / open-redirect / upload / write / auth-endpoint
|
|
@@ -194,13 +196,15 @@ publisher** with project `websec-validator`, owner `raccioly`, repo `websec-vali
|
|
|
194
196
|
|
|
195
197
|
## Status / roadmap
|
|
196
198
|
|
|
197
|
-
**Done:**
|
|
198
|
-
managed-AppSync / VTL boundary**
|
|
199
|
-
|
|
200
|
-
Semgrep rules**, tailored probe staging, agent briefing, traceable findings ledger
|
|
201
|
-
confidence (CJE — Wilson CIs)**, proof harness, test suite, **Docker bundle** (all
|
|
202
|
-
arch-aware), **dynamic phase v1** (authenticated read-only cross-tenant BOLA —
|
|
203
|
-
reproduced a hand-pentest's 14/14).
|
|
199
|
+
**Done:** 15-extractor recon (incl. schema/entity → mass-assignment targeting, the **AWS-CDK /
|
|
200
|
+
managed-AppSync / VTL boundary**, **upload-security** + **PII-output-boundary** + **redirect-SSRF**
|
|
201
|
+
+ **password-reuse** classes, and a **man-in-the-browser / tamperable-display** class), cross-tool
|
|
202
|
+
de-dup + **bundled Semgrep rules**, tailored probe staging, agent briefing, traceable findings ledger
|
|
203
|
+
with **calibrated confidence (CJE — Wilson CIs)**, proof harness, test suite, **Docker bundle** (all
|
|
204
|
+
scanners + Noir, arch-aware), **dynamic phase v1** (authenticated read-only cross-tenant BOLA —
|
|
205
|
+
validated live, reproduced a hand-pentest's 14/14). Validated against the **PTREQ0013000 pen test +
|
|
206
|
+
retest** (incl. correcting two findings the retest disproved: AppSync introspection *is* disablable
|
|
207
|
+
engine-level, and API_KEY-default is anonymous-auth, not CSWSH).
|
|
204
208
|
**Next:** dynamic write-verb BOLA + JWT/auth probes + ZAP/Nuclei two-role diff (gated, they mutate),
|
|
205
209
|
calibration on hand-labeled real repos (more representative base rate), ASVS index lookup, optional
|
|
206
210
|
model-SDK adapters for no-agent fallback.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "websec-validator"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.4.0"
|
|
8
8
|
description = "Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -69,6 +69,20 @@ def render(facts: dict, scanners: dict, scan_results: list, probe_manifest: list
|
|
|
69
69
|
pp_line = f"looks consistent across {len(pp['password_blocks'])} validator block(s)"
|
|
70
70
|
else:
|
|
71
71
|
pp_line = "_no password validators detected_"
|
|
72
|
+
if ((pp.get("password_reuse") or {}).get("gap")):
|
|
73
|
+
pp_line += " · ⚠ NO reuse/history control (#6)"
|
|
74
|
+
|
|
75
|
+
up = facts.get("upload_security", {})
|
|
76
|
+
up_findings = up.get("findings", [])
|
|
77
|
+
up_section = ("\n".join(f"- **{f.get('severity')}** {f.get('kind')} — `{f.get('file')}`" for f in up_findings[:20])
|
|
78
|
+
if up_findings else
|
|
79
|
+
("_upload handler(s) present; allow-list + nosniff look ok — spot-check_" if up.get("upload_handlers")
|
|
80
|
+
else "_no upload handlers detected_"))
|
|
81
|
+
pii = facts.get("pii_exposure", {})
|
|
82
|
+
pii_findings = pii.get("findings", [])
|
|
83
|
+
pii_section = ("\n".join(f"- **{f.get('severity')}** {f.get('kind')} — `{f.get('file')}`" for f in pii_findings[:20])
|
|
84
|
+
if pii_findings else "_no obvious raw-PII responses / dead masking controls_")
|
|
85
|
+
ws_line = (facts.get("client_integrity", {}) or {}).get("websocket_auth", "no websocket detected")
|
|
72
86
|
|
|
73
87
|
gql = facts.get("graphql", {})
|
|
74
88
|
if gql.get("present"):
|
|
@@ -181,6 +195,14 @@ Production source maps exposed: {client.get("production_source_maps", False)}
|
|
|
181
195
|
**Client integrity — man-in-the-browser / tamperable display:**
|
|
182
196
|
{ci_section}
|
|
183
197
|
|
|
198
|
+
**WebSocket auth model (CSWSH determinant — is it an ambient cookie?):** {ws_line}
|
|
199
|
+
|
|
200
|
+
**File-upload security (#2b — sniff bytes, derive stored name, nosniff on serve):**
|
|
201
|
+
{up_section}
|
|
202
|
+
|
|
203
|
+
**PII output boundary (#8 — verify by VALUE SHAPE, not field name):**
|
|
204
|
+
{pii_section}
|
|
205
|
+
|
|
184
206
|
**Third-party integrations:** {integ_line}
|
|
185
207
|
{wh_line}
|
|
186
208
|
|
{websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/__init__.py
RENAMED
|
@@ -17,12 +17,14 @@ from .client_integrity import ClientIntegrityExtractor
|
|
|
17
17
|
from .graphql import GraphQLExtractor
|
|
18
18
|
from .iac_ci import IacCiExtractor
|
|
19
19
|
from .integrations import IntegrationsExtractor
|
|
20
|
+
from .pii_exposure import PiiExposureExtractor
|
|
20
21
|
from .policy_consistency import PolicyConsistencyExtractor
|
|
21
22
|
from .routes import RoutesExtractor
|
|
22
23
|
from .schemas import SchemasExtractor
|
|
23
24
|
from .stack import StackExtractor
|
|
24
25
|
from .surface import SurfaceExtractor
|
|
25
26
|
from .tenant import TenantExtractor
|
|
27
|
+
from .upload_security import UploadSecurityExtractor
|
|
26
28
|
|
|
27
29
|
# Order matters: stack first (others read facts['stack']); authz after routes
|
|
28
30
|
# (reads facts['routes']).
|
|
@@ -34,10 +36,12 @@ REGISTRY: list[Extractor] = [
|
|
|
34
36
|
TenantExtractor(),
|
|
35
37
|
PolicyConsistencyExtractor(),
|
|
36
38
|
SurfaceExtractor(),
|
|
39
|
+
UploadSecurityExtractor(),
|
|
37
40
|
SchemasExtractor(),
|
|
38
41
|
IacCiExtractor(),
|
|
39
42
|
ClientExposureExtractor(),
|
|
40
43
|
ClientIntegrityExtractor(),
|
|
44
|
+
PiiExposureExtractor(),
|
|
41
45
|
GraphQLExtractor(),
|
|
42
46
|
IntegrationsExtractor(),
|
|
43
47
|
]
|
|
@@ -48,6 +48,16 @@ OOB_ANCHOR = re.compile(
|
|
|
48
48
|
r"|out[_-]of[_-]band|toChecksumAddress|getAddress\(|checksumAddress|\beip[_-]?55\b|verifyAddress"
|
|
49
49
|
r"|address[_-]?verif|verif\w*[_-]?address|sendVerificationEmail|canonical[_-]?address", re.I)
|
|
50
50
|
|
|
51
|
+
# WebSocket / realtime auth model — the CSWSH determinant (PTREQ0013000 #4). CSWSH is only
|
|
52
|
+
# exploitable when the socket authenticates via an AMBIENT COOKIE the browser auto-attaches
|
|
53
|
+
# cross-origin. A token placed in the connection payload / subprotocol and stored origin-scoped is
|
|
54
|
+
# NOT exploitable (SOP blocks a cross-origin page from reading it). This lets us ANSWER a CSWSH
|
|
55
|
+
# scanner flag instead of guessing — the retest pushed back on exactly this and won.
|
|
56
|
+
WS_USAGE = re.compile(r"new\s+WebSocket\(|socket\.io|graphql-ws|subscriptions-transport-ws|appsync-realtime"
|
|
57
|
+
r"|\bwss?://", re.I)
|
|
58
|
+
WS_COOKIE_AUTH = re.compile(r"withCredentials\s*:\s*true|credentials\s*:\s*['\"]include['\"]"
|
|
59
|
+
r"|document\.cookie[\s\S]{0,80}?(?:socket|ws\b|websocket)", re.I)
|
|
60
|
+
|
|
51
61
|
|
|
52
62
|
class ClientIntegrityExtractor(Extractor):
|
|
53
63
|
name = "client_integrity"
|
|
@@ -57,6 +67,7 @@ class ClientIntegrityExtractor(Extractor):
|
|
|
57
67
|
sensitive, qr_files, clip_files = [], [], []
|
|
58
68
|
csp_present = csp_self = csp_nonce = csp_unsafe = False
|
|
59
69
|
oob = []
|
|
70
|
+
ws_usage = ws_cookie = False
|
|
60
71
|
for _p, rel, text in ctx.iter_code():
|
|
61
72
|
if SENSITIVE_VALUE.search(text):
|
|
62
73
|
if len(sensitive) < 30:
|
|
@@ -75,10 +86,15 @@ class ClientIntegrityExtractor(Extractor):
|
|
|
75
86
|
csp_unsafe = True
|
|
76
87
|
if OOB_ANCHOR.search(text) and len(oob) < 20:
|
|
77
88
|
oob.append(rel)
|
|
89
|
+
if WS_USAGE.search(text):
|
|
90
|
+
ws_usage = True
|
|
91
|
+
if WS_COOKIE_AUTH.search(text):
|
|
92
|
+
ws_cookie = True
|
|
78
93
|
|
|
79
94
|
# strict = a real `script-src 'self'` (+ a nonce / strict-dynamic) with NO unsafe-inline/eval
|
|
80
95
|
strict_csp = bool(csp_present and csp_self and csp_nonce and not csp_unsafe)
|
|
81
96
|
out_of_band = bool(oob)
|
|
97
|
+
ws_cookie_auth = bool(ws_usage and ws_cookie) # the CSWSH determinant (ambient-cookie WS auth)
|
|
82
98
|
|
|
83
99
|
findings = []
|
|
84
100
|
present = bool(sensitive)
|
|
@@ -109,8 +125,24 @@ class ClientIntegrityExtractor(Extractor):
|
|
|
109
125
|
"cryptographically tamper-proof on the web — the goal is detectable, not "
|
|
110
126
|
"impossible (the limit that hardware wallets exist to solve)."})
|
|
111
127
|
|
|
128
|
+
# CSWSH is ONLY real when the WS auth is an ambient cookie (PTREQ0013000 #4). This lets us
|
|
129
|
+
# answer a CSWSH scanner flag instead of guessing — a bearer token in the payload is not it.
|
|
130
|
+
if ws_cookie_auth:
|
|
131
|
+
findings.append({
|
|
132
|
+
"severity": "MEDIUM", "confidence": "LOW", "attack_class": "cswsh",
|
|
133
|
+
"issue": "WebSocket authenticated via an ambient cookie (Cross-Site WebSocket Hijacking)",
|
|
134
|
+
"detail": "A WebSocket/realtime connection appears to authenticate via a cookie "
|
|
135
|
+
"(withCredentials / credentials:'include'), which the browser auto-attaches "
|
|
136
|
+
"cross-origin — so a page on any origin can open an authenticated socket (CSWSH, #4). "
|
|
137
|
+
"Validate the Origin on the handshake, or move the credential into the connection "
|
|
138
|
+
"payload / subprotocol and store it origin-scoped (not a cookie). If WS auth is "
|
|
139
|
+
"already a token in the payload, CSWSH is NOT exploitable."})
|
|
140
|
+
|
|
112
141
|
return {
|
|
113
142
|
"sensitive_display": sorted(set(sensitive)),
|
|
143
|
+
"websocket_auth": ("cookie (CSWSH-exposed — validate Origin)" if ws_cookie_auth
|
|
144
|
+
else "token-or-none (CSWSH not exploitable)" if ws_usage
|
|
145
|
+
else "no websocket detected"),
|
|
114
146
|
"qr_generation": sorted(set(qr_files)),
|
|
115
147
|
"clipboard_copy": sorted(set(clip_files)),
|
|
116
148
|
"strict_csp": strict_csp,
|
{websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/graphql.py
RENAMED
|
@@ -33,6 +33,11 @@ TENANT_ARG = re.compile(r"\b(\w+)\s*\(([^)]*\b(?:groupId|group_id|orgId|org_id|t
|
|
|
33
33
|
# Identity-binding signals in a VTL resolver — the field is tied to the CALLER, not a free arg.
|
|
34
34
|
VTL_AUTHZ = re.compile(r"\$ctx(?:tx)?\.identity|\$context\.identity|identity\.(?:sub|username|claims|resolverContext)"
|
|
35
35
|
r"|util\.unauthorized|\bgroupIds?\b[\s\S]{0,80}?\bcontains\b|#if\s*\(\s*!?\s*\$ctx\.identity")
|
|
36
|
+
# Engine-level introspection disable on aws-cdk-lib appsync.GraphqlApi. The PTREQ0013000 RETEST
|
|
37
|
+
# proved this IS available and un-bypassable (unlike a WAF string-match) — so a correctly-configured
|
|
38
|
+
# AppSync API must NOT be flagged. This corrects the 0.3.0 false positive that always cried wolf.
|
|
39
|
+
APPSYNC_INTROSPECTION_OFF = re.compile(r"introspectionConfig\s*:\s*[\w.]*\bDISABLED\b")
|
|
40
|
+
APPSYNC_LIMITING = re.compile(r"\bqueryDepthLimit\b|\bresolverCountLimit\b")
|
|
36
41
|
|
|
37
42
|
|
|
38
43
|
class GraphQLExtractor(Extractor):
|
|
@@ -47,10 +52,15 @@ class GraphQLExtractor(Extractor):
|
|
|
47
52
|
|
|
48
53
|
introspection, playground, limiting, code_hit = "unknown", False, False, False
|
|
49
54
|
appsync, aws_directives = False, False
|
|
55
|
+
appsync_introspection_off = appsync_limiting = False
|
|
50
56
|
schema_texts = [] # (rel, text) for SDL files — parsed for Subscription authz
|
|
51
57
|
for _p, rel, text in ctx.iter_code():
|
|
52
58
|
if APPSYNC_MARK.search(text):
|
|
53
59
|
appsync = True
|
|
60
|
+
if APPSYNC_INTROSPECTION_OFF.search(text):
|
|
61
|
+
appsync_introspection_off = True
|
|
62
|
+
if APPSYNC_LIMITING.search(text):
|
|
63
|
+
appsync_limiting = True
|
|
54
64
|
if rel.endswith((".graphql", ".gql")):
|
|
55
65
|
schema_texts.append((rel, text))
|
|
56
66
|
if AWS_AUTH_DIRECTIVE.search(text):
|
|
@@ -74,14 +84,20 @@ class GraphQLExtractor(Extractor):
|
|
|
74
84
|
findings = []
|
|
75
85
|
sub_authz = []
|
|
76
86
|
if managed:
|
|
77
|
-
# AppSync
|
|
78
|
-
# `
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
87
|
+
# AppSync introspection CAN be disabled engine-level via
|
|
88
|
+
# `introspectionConfig: IntrospectionConfig.DISABLED` (aws-cdk-lib) — un-bypassable, unlike
|
|
89
|
+
# a WAF byte-match. Only flag when it is NOT disabled (retest correction to the 0.3.0 FP).
|
|
90
|
+
if not appsync_introspection_off:
|
|
91
|
+
findings.append({"severity": "MEDIUM", "issue": "AppSync GraphQL introspection not disabled engine-level",
|
|
92
|
+
"attack_class": "graphql",
|
|
93
|
+
"detail": "Set `introspectionConfig: appsync.IntrospectionConfig.DISABLED` so the engine "
|
|
94
|
+
"rejects __schema/__type regardless of encoding. A WAF byte-match on `__schema` "
|
|
95
|
+
"is NOT sufficient — bypassable via Unicode/JSON escapes and it only fronts one "
|
|
96
|
+
"endpoint (PTREQ0013000 #2). Run the appsync-introspection probe to confirm."})
|
|
97
|
+
if not (appsync_limiting or limiting):
|
|
98
|
+
findings.append({"severity": "LOW", "issue": "AppSync has no query depth / resolver-count limit",
|
|
99
|
+
"attack_class": "graphql",
|
|
100
|
+
"detail": "add `queryDepthLimit` + `resolverCountLimit` (alias / deep-query DoS guard)."})
|
|
85
101
|
sub_authz = self._subscription_authz(ctx, schema_texts, findings)
|
|
86
102
|
else:
|
|
87
103
|
if introspection in ("enabled", "unknown"):
|
|
@@ -103,7 +119,8 @@ class GraphQLExtractor(Extractor):
|
|
|
103
119
|
or (["AppSync GraphQL API (HTTP + realtime WebSocket)"] if managed
|
|
104
120
|
else ["(server detected; endpoint not routed by Noir)"]),
|
|
105
121
|
"schema_files": schema_files[:20],
|
|
106
|
-
"introspection": "appsync-
|
|
122
|
+
"introspection": (("appsync-disabled" if appsync_introspection_off else "appsync-reachable")
|
|
123
|
+
if managed else introspection),
|
|
107
124
|
"playground_enabled": playground, "query_limiting_detected": limiting,
|
|
108
125
|
"subscription_authz": sub_authz,
|
|
109
126
|
"findings": findings,
|
|
@@ -29,6 +29,12 @@ APPSYNC_DEFAULT_APIKEY = re.compile(
|
|
|
29
29
|
APPSYNC_APIKEY_MODE = re.compile(r"AuthorizationType\.API_KEY|authorizationType\s*:\s*['\"]?API_KEY")
|
|
30
30
|
WAFV2 = re.compile(r"wafv2\.CfnWebACL|\bCfnWebACL\b|aws_wafv2|wafv2\.CfnWebACLAssociation")
|
|
31
31
|
WAF_ASSOC = re.compile(r"CfnWebACLAssociation|WebACLAssociation")
|
|
32
|
+
# WAF used as the PRIMARY control for an app-layer flaw — a bypassable band-aid, not a remediation
|
|
33
|
+
# (PTREQ0013000 #2/#11). A byteMatchStatement/regex matching `__schema`, SQL keywords or `<script`
|
|
34
|
+
# means the app-layer bug is still there; the string-match is evadable via encoding + only one door.
|
|
35
|
+
WAF_APPLAYER_MATCH = re.compile(
|
|
36
|
+
r"(?:byteMatchStatement|searchString|RegexPatternSet|regexString)[\s\S]{0,220}?"
|
|
37
|
+
r"(__schema|__type|UNION\s+SELECT|information_schema|<script|onerror=|\bor\s+1\s*=\s*1\b|sleep\s*\()", re.I)
|
|
32
38
|
|
|
33
39
|
|
|
34
40
|
class IacCiExtractor(Extractor):
|
|
@@ -69,7 +75,7 @@ class IacCiExtractor(Extractor):
|
|
|
69
75
|
findings.append({"severity": "HIGH", "kind": "terraform-state-committed", "file": ctx.rel(tf),
|
|
70
76
|
"detail": "tfstate may contain plaintext secrets (DB passwords, keys) — must not be committed"})
|
|
71
77
|
|
|
72
|
-
# --- CDK / managed-AppSync auth (#4
|
|
78
|
+
# --- CDK / managed-AppSync auth (#4 anonymous default-auth; WAF-as-control smell #2) ---
|
|
73
79
|
appsync_files, waf_present, waf_assoc = [], False, False
|
|
74
80
|
for _p, rel, text in ctx.iter_code():
|
|
75
81
|
if not rel.endswith((".ts", ".js", ".mjs", ".cjs")):
|
|
@@ -78,15 +84,24 @@ class IacCiExtractor(Extractor):
|
|
|
78
84
|
waf_present = True
|
|
79
85
|
if WAF_ASSOC.search(text):
|
|
80
86
|
waf_assoc = True
|
|
87
|
+
if WAF_APPLAYER_MATCH.search(text):
|
|
88
|
+
tok = (WAF_APPLAYER_MATCH.search(text).group(1) or "").strip()
|
|
89
|
+
findings.append({"severity": "MEDIUM", "kind": "waf-as-app-control", "file": rel,
|
|
90
|
+
"detail": f"A WAF string/regex match on an app-layer attack token ({tok!r}) is used as a "
|
|
91
|
+
"control. A WAF is a bypassable compensating control, never the remediation: "
|
|
92
|
+
"string-matches are evaded by encoding (the retest bypassed `__schema` with a "
|
|
93
|
+
"Unicode escape) and only cover one endpoint. Fix at the app/engine layer "
|
|
94
|
+
"(disable introspection, parametrize queries) and keep the WAF as defense-in-depth."})
|
|
81
95
|
if APPSYNC_API.search(text):
|
|
82
96
|
appsync_files.append(rel)
|
|
83
97
|
if APPSYNC_DEFAULT_APIKEY.search(text):
|
|
84
98
|
findings.append({"severity": "HIGH", "kind": "appsync-apikey-default", "file": rel,
|
|
85
|
-
"detail": "AppSync defaultAuthorization is API_KEY — the realtime
|
|
86
|
-
"
|
|
87
|
-
"
|
|
88
|
-
"USER_POOL/OIDC/IAM/LAMBDA; keep API_KEY (if needed)
|
|
89
|
-
"
|
|
99
|
+
"detail": "AppSync defaultAuthorization is API_KEY — the API (HTTP + realtime) accepts "
|
|
100
|
+
"a static key by default, and that key typically ships to the browser, so "
|
|
101
|
+
"this is effectively ANONYMOUS/unauthenticated access. Make the default "
|
|
102
|
+
"USER_POOL/OIDC/IAM/LAMBDA; keep API_KEY (if needed) to a scoped additional "
|
|
103
|
+
"mode. (NB: this is NOT in itself CSWSH — that needs cookie-based WS auth; "
|
|
104
|
+
"see the client_integrity websocket-auth check.)"})
|
|
90
105
|
elif APPSYNC_APIKEY_MODE.search(text):
|
|
91
106
|
findings.append({"severity": "MEDIUM", "kind": "appsync-apikey-mode", "file": rel,
|
|
92
107
|
"detail": "AppSync accepts an API_KEY authorization mode — confirm it is NOT the "
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""PII output-boundary extractor — unmasked customer data in API responses (PTREQ0013000 #8).
|
|
2
|
+
|
|
3
|
+
Two high-signal static tells the retest taught us:
|
|
4
|
+
|
|
5
|
+
1. **Dead security control.** A masking helper / `view_full`-style permission EXISTS in the codebase
|
|
6
|
+
but has ZERO call sites in the live request handlers — it was wired only into offline export paths.
|
|
7
|
+
A control defined-but-never-called is worse than none (it reads as "handled"). This is very
|
|
8
|
+
distinctive and cheap to find: collect `mask*/redact*/canViewFull*` definitions, count live (non-
|
|
9
|
+
test) call sites, flag the ones with none.
|
|
10
|
+
|
|
11
|
+
2. **Raw entity to the client.** A controller does `res.json(entity)` on a raw ORM/repo object that
|
|
12
|
+
carries PII fields, with no DTO/serializer/masker — so phone/email ship in cleartext, *including*
|
|
13
|
+
indirect carriers (a phone embedded in a composed `messageBirdId`, a denormalized `lastMessage`).
|
|
14
|
+
The decisive verification is **value-shape, not field-name** — a field allow-list misses the
|
|
15
|
+
indirect carriers — so the probe asserts no phone/email *value* reaches a non-privileged caller.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import re
|
|
21
|
+
|
|
22
|
+
from .base import Extractor, RepoContext
|
|
23
|
+
|
|
24
|
+
# helper/permission DEFINITIONS (function/arrow/def) — not variable assignments to a call result
|
|
25
|
+
MASK_DEF = re.compile(
|
|
26
|
+
r"(?:function\s+|export\s+(?:async\s+)?function\s+|def\s+)"
|
|
27
|
+
r"(mask\w+|redact\w+|canViewFull\w+|scrub\w+|anonymi[sz]e\w+|toPublic\w+|sanitize\w*Pii)\b"
|
|
28
|
+
r"|(?:const|let|export\s+const)\s+(mask\w+|redact\w+|canViewFull\w+|toPublic\w+)\s*=\s*(?:async\s*)?\(", re.I)
|
|
29
|
+
PII_FIELD = re.compile(r"\b(?:phone|phoneNumber|msisdn|mobile|email|emailAddress|ssn|socialSecurity"
|
|
30
|
+
r"|dob|dateOfBirth|birthDate|creditCard|cardNumber|taxId|nationalId)\b", re.I)
|
|
31
|
+
# returning a raw variable / a fresh ORM read straight to the client
|
|
32
|
+
RES_RAW = re.compile(r"res\.(?:json|send)\s*\(\s*(?:await\s+)?[A-Za-z_$][\w$]*\s*\)"
|
|
33
|
+
r"|res\.(?:json|send)\s*\(\s*await\s+[\w.]+\.(?:find|findOne|findById|findAll|get|query)\s*\(")
|
|
34
|
+
MASK_CALL_NEAR = re.compile(r"mask\w+\(|redact\w+\(|toPublic\w+\(|canViewFull\w+\(|\.serialize\(|toDto\(|\bDTO\b|pick\(", re.I)
|
|
35
|
+
TESTFILE = re.compile(r"(?:^|/)(?:tests?|__tests__|spec)/|\.(?:test|spec)\.", re.I)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class PiiExposureExtractor(Extractor):
|
|
39
|
+
name = "pii_exposure"
|
|
40
|
+
category = "exposure"
|
|
41
|
+
|
|
42
|
+
def extract(self, ctx: RepoContext, facts: dict) -> dict:
|
|
43
|
+
texts = []
|
|
44
|
+
helpers: dict = {} # name -> def file
|
|
45
|
+
for _p, rel, text in ctx.iter_code():
|
|
46
|
+
texts.append((rel, text))
|
|
47
|
+
for m in MASK_DEF.finditer(text):
|
|
48
|
+
nm = m.group(1) or m.group(2)
|
|
49
|
+
if nm and len(nm) > 4 and nm not in helpers:
|
|
50
|
+
helpers[nm] = rel
|
|
51
|
+
|
|
52
|
+
findings = []
|
|
53
|
+
|
|
54
|
+
# 1. dead masking/permission control — defined but no LIVE (non-test) call site
|
|
55
|
+
dead = []
|
|
56
|
+
for nm, deffile in helpers.items():
|
|
57
|
+
callrx = re.compile(r"\b" + re.escape(nm) + r"\s*\(")
|
|
58
|
+
live = sum(1 for rel, text in texts
|
|
59
|
+
if rel != deffile and not TESTFILE.search(rel) and callrx.search(text))
|
|
60
|
+
if live == 0:
|
|
61
|
+
dead.append(nm)
|
|
62
|
+
findings.append({"severity": "HIGH", "kind": "dead-pii-control", "file": deffile,
|
|
63
|
+
"detail": f"`{nm}` (a masking/PII-permission control) is defined but has NO live "
|
|
64
|
+
"call site outside its own file/tests — a security control that exists but "
|
|
65
|
+
"isn't wired into the request handlers (it was likely only on export/report "
|
|
66
|
+
"paths). Apply it at the live API output boundary, or remove the false "
|
|
67
|
+
"sense of safety (PTREQ0013000 #8)."})
|
|
68
|
+
|
|
69
|
+
# 2. raw entity with PII to the client, no masker/DTO in the handler
|
|
70
|
+
raw_leaks = []
|
|
71
|
+
for rel, text in texts:
|
|
72
|
+
if TESTFILE.search(rel):
|
|
73
|
+
continue
|
|
74
|
+
if PII_FIELD.search(text) and RES_RAW.search(text) and not MASK_CALL_NEAR.search(text):
|
|
75
|
+
if len(raw_leaks) < 30:
|
|
76
|
+
raw_leaks.append(rel)
|
|
77
|
+
findings.append({"severity": "MEDIUM", "kind": "raw-entity-pii-response", "file": rel,
|
|
78
|
+
"detail": "A handler returns a raw entity (`res.json(entity)`) in a file that "
|
|
79
|
+
"handles PII fields, with no DTO/serializer/masker — phone/email likely "
|
|
80
|
+
"ship in cleartext. Mask at ONE output boundary (a DTO), gated by a "
|
|
81
|
+
"permission. VERIFY BY VALUE SHAPE (no phone/email value in the JSON), "
|
|
82
|
+
"not field name — indirect carriers (composed IDs, denormalized fields) "
|
|
83
|
+
"leak too (the `messageBirdId`-embeds-the-phone class, #8)."})
|
|
84
|
+
|
|
85
|
+
by_sev: dict = {}
|
|
86
|
+
for f in findings:
|
|
87
|
+
by_sev[f["severity"]] = by_sev.get(f["severity"], 0) + 1
|
|
88
|
+
return {
|
|
89
|
+
"findings": findings,
|
|
90
|
+
"dead_controls": dead,
|
|
91
|
+
"raw_pii_responses": raw_leaks,
|
|
92
|
+
"masking_helpers": sorted(helpers.keys())[:20],
|
|
93
|
+
"by_severity": by_sev,
|
|
94
|
+
"note": ("PII output-boundary review: " + (f"{len(dead)} masking control(s) defined but unused; " if dead else "")
|
|
95
|
+
+ (f"{len(raw_leaks)} handler(s) return a raw PII entity. " if raw_leaks else "no obvious raw-PII responses. ")
|
|
96
|
+
+ "Probe with a per-role response diff asserting NO phone/email VALUE (/\\+?\\d{7,}/ or an email "
|
|
97
|
+
"regex) reaches a non-privileged caller — across nested objects, IDs, and exports (#8)."),
|
|
98
|
+
}
|
|
@@ -42,6 +42,17 @@ _RE_STRONG = re.compile(r"isStrongPassword", re.I)
|
|
|
42
42
|
|
|
43
43
|
_ALL = ("min", "upper", "lower", "digit", "special")
|
|
44
44
|
|
|
45
|
+
# Password REUSE / history — a DIFFERENT control from complexity (PTREQ0013000 #6, which we initially
|
|
46
|
+
# misread as complexity). A set-password path that hashes a new password with no comparison to the
|
|
47
|
+
# current / previous hashes lets a user re-set the same password. Signals:
|
|
48
|
+
HASH_NEW = re.compile(r"bcrypt(?:js)?\.hash|argon2\.hash|\bscrypt\b|pbkdf2|hashPassword\(|\.setPassword\(", re.I)
|
|
49
|
+
REUSE_CHECK = re.compile(r"isPasswordReused|passwordHistory|password_history|previousPasswords|prior[_-]?hashes"
|
|
50
|
+
r"|bcrypt(?:js)?\.compare[\s\S]{0,200}?(?:history|previous|current|old)", re.I)
|
|
51
|
+
PW_HASH_FIELD = re.compile(r"\b(?:passwordHash|password_hash|hashedPassword|pwdHash|passwordDigest)\b")
|
|
52
|
+
PW_HISTORY_FIELD = re.compile(r"\b(?:passwordHistory|password_history|previousPasswords|passwordHistoryHashes|priorPasswords)\b")
|
|
53
|
+
SET_PW_CTX = re.compile(r"changePassword|updatePassword|setPassword|resetPassword|updateProfile|adminUpdate"
|
|
54
|
+
r"|set[_-]?password|change[_-]?password", re.I)
|
|
55
|
+
|
|
45
56
|
|
|
46
57
|
def _classes(window: str) -> set:
|
|
47
58
|
"""The character-class requirement set enforced in one validation window."""
|
|
@@ -68,7 +79,23 @@ class PolicyConsistencyExtractor(Extractor):
|
|
|
68
79
|
def extract(self, ctx: RepoContext, facts: dict) -> dict:
|
|
69
80
|
blocks = [] # (file, frozenset(classes))
|
|
70
81
|
seen = set()
|
|
82
|
+
hashes = reuse_check = set_ctx = model_pwhash = model_history = False
|
|
71
83
|
for _p, rel, text in ctx.iter_code():
|
|
84
|
+
# Reuse signals live in camelCase compounds (changePassword/passwordHash) that PW_FIELD's
|
|
85
|
+
# \bword\b boundaries miss — so track them on a cheap substring pre-check, NOT behind the
|
|
86
|
+
# complexity gate below (that bug initially made the reuse check silently never fire).
|
|
87
|
+
low = text.lower()
|
|
88
|
+
if "password" in low or "bcrypt" in low or "argon2" in low or "scrypt" in low or "pbkdf2" in low:
|
|
89
|
+
if HASH_NEW.search(text):
|
|
90
|
+
hashes = True
|
|
91
|
+
if REUSE_CHECK.search(text):
|
|
92
|
+
reuse_check = True
|
|
93
|
+
if SET_PW_CTX.search(text):
|
|
94
|
+
set_ctx = True
|
|
95
|
+
if PW_HASH_FIELD.search(text):
|
|
96
|
+
model_pwhash = True
|
|
97
|
+
if PW_HISTORY_FIELD.search(text):
|
|
98
|
+
model_history = True
|
|
72
99
|
if not PW_FIELD.search(text):
|
|
73
100
|
continue
|
|
74
101
|
# FORWARD-only window, capped at the next password field — validation follows the field
|
|
@@ -110,11 +137,22 @@ class PolicyConsistencyExtractor(Extractor):
|
|
|
110
137
|
if len(smax) < 3:
|
|
111
138
|
weak_policy = strongest
|
|
112
139
|
|
|
140
|
+
# Password REUSE / history (#6) — the DIFFERENT control: a set-password path that hashes a new
|
|
141
|
+
# password with no reuse comparison, and/or a passwordHash model with no history field, lets a
|
|
142
|
+
# user re-set the same/old password. (Complexity is the drift check above; this is reuse.)
|
|
143
|
+
reuse_gap = bool(hashes and (set_ctx or model_pwhash) and not reuse_check and not model_history)
|
|
144
|
+
password_reuse = {
|
|
145
|
+
"hashes_passwords": hashes, "has_set_password_path": set_ctx,
|
|
146
|
+
"has_reuse_check": reuse_check, "model_has_passwordHash": model_pwhash,
|
|
147
|
+
"model_has_history": model_history, "gap": reuse_gap,
|
|
148
|
+
}
|
|
149
|
+
|
|
113
150
|
return {
|
|
114
151
|
"password_blocks": blocks[:20],
|
|
115
152
|
"strongest_policy": strongest,
|
|
116
153
|
"drift": drift, # MEDIUM in findings.py — inconsistent siblings (#6)
|
|
117
154
|
"weak_policy": weak_policy, # LOW — uniformly weak, no strong sibling to compare
|
|
155
|
+
"password_reuse": password_reuse, # MEDIUM — no reuse/history control on set-password (#6)
|
|
118
156
|
"consistent": not drift,
|
|
119
157
|
"note": ("Password-policy DRIFT: a sibling route enforces fewer character classes than the "
|
|
120
158
|
"strongest one found — align them (the WU #6 regression). " if drift else
|
{websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/surface.py
RENAMED
|
@@ -67,6 +67,14 @@ SINKS = {
|
|
|
67
67
|
}
|
|
68
68
|
|
|
69
69
|
|
|
70
|
+
# SSRF-via-redirect (PTREQ0013000 #1): axios/requests FOLLOW redirects by DEFAULT, so an outbound
|
|
71
|
+
# client on a variable URL re-validates only the FIRST hop unless it pins maxRedirects:0 or adds a
|
|
72
|
+
# per-hop guard. One of these present = the chain is guarded; absent next to an SSRF sink = the lead
|
|
73
|
+
# (allow-list on the input URL is necessary but never sufficient — a 302 to 169.254.169.254 wins).
|
|
74
|
+
REDIRECT_GUARD = re.compile(r"beforeRedirect|maxRedirects\s*:\s*0\b|allow_redirects\s*=\s*False"
|
|
75
|
+
r"|validateRedirect|isAllowed\w*Url|on[_-]?redirect|checkRedirect", re.I)
|
|
76
|
+
|
|
77
|
+
|
|
70
78
|
class SurfaceExtractor(Extractor):
|
|
71
79
|
name = "surface"
|
|
72
80
|
category = "sinks"
|
|
@@ -78,6 +86,7 @@ class SurfaceExtractor(Extractor):
|
|
|
78
86
|
|
|
79
87
|
found: dict = {k: [] for k in SINKS}
|
|
80
88
|
counts: dict = {k: 0 for k in SINKS}
|
|
89
|
+
ssrf_redirect: list = [] # SSRF sink in a file with NO per-hop redirect guard (#1)
|
|
81
90
|
for _p, rel, text in ctx.iter_code():
|
|
82
91
|
for cls, (_probe, gate, rx) in SINKS.items():
|
|
83
92
|
if gate == "sql" and not has_sql:
|
|
@@ -88,12 +97,16 @@ class SurfaceExtractor(Extractor):
|
|
|
88
97
|
counts[cls] += 1
|
|
89
98
|
if len(found[cls]) < 60:
|
|
90
99
|
found[cls].append(rel)
|
|
100
|
+
if (len(ssrf_redirect) < 40 and not REDIRECT_GUARD.search(text)
|
|
101
|
+
and (SINKS["ssrf-outbound-http"][2].search(text) or SINKS["ssrf"][2].search(text))):
|
|
102
|
+
ssrf_redirect.append(rel)
|
|
91
103
|
|
|
92
104
|
sinks = {k: {"probe": SINKS[k][0], "count": counts[k], "files": found[k]}
|
|
93
105
|
for k in SINKS if counts[k]}
|
|
94
106
|
return {
|
|
95
107
|
"sinks": sinks,
|
|
96
108
|
"sink_counts": {k: counts[k] for k in SINKS if counts[k]},
|
|
109
|
+
"ssrf_redirect_unguarded": ssrf_redirect, # validate EVERY hop, not just the input URL (#1)
|
|
97
110
|
"datastore_class": ("sql" if has_sql else ("nosql" if has_nosql else "unknown")),
|
|
98
111
|
"note": "Each sink hit is user-input-gated (req./request./concat/interp), so these are "
|
|
99
112
|
"higher-confidence leads. Cross-reference the files with routes.targeting to pick "
|