websec-validator 0.3.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {websec_validator-0.3.0/src/websec_validator.egg-info → websec_validator-0.4.0}/PKG-INFO +18 -14
  2. {websec_validator-0.3.0 → websec_validator-0.4.0}/README.md +17 -13
  3. {websec_validator-0.3.0 → websec_validator-0.4.0}/pyproject.toml +1 -1
  4. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/briefing.py +22 -0
  5. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/__init__.py +4 -0
  6. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/client_integrity.py +32 -0
  7. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/graphql.py +26 -9
  8. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/iac_ci.py +21 -6
  9. websec_validator-0.4.0/src/websec_validator/extractors/pii_exposure.py +98 -0
  10. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/policy_consistency.py +38 -0
  11. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/surface.py +13 -0
  12. websec_validator-0.4.0/src/websec_validator/extractors/upload_security.py +89 -0
  13. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/findings.py +48 -2
  14. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/probes.py +14 -0
  15. websec_validator-0.4.0/src/websec_validator/templates/probes/password-reuse.sh +40 -0
  16. websec_validator-0.4.0/src/websec_validator/templates/probes/pii-output-diff.sh +48 -0
  17. websec_validator-0.4.0/src/websec_validator/templates/probes/upload-matrix.sh +44 -0
  18. {websec_validator-0.3.0 → websec_validator-0.4.0/src/websec_validator.egg-info}/PKG-INFO +18 -14
  19. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator.egg-info/SOURCES.txt +5 -0
  20. {websec_validator-0.3.0 → websec_validator-0.4.0}/tests/test_pentest_regressions.py +126 -2
  21. {websec_validator-0.3.0 → websec_validator-0.4.0}/LICENSE +0 -0
  22. {websec_validator-0.3.0 → websec_validator-0.4.0}/setup.cfg +0 -0
  23. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/__init__.py +0 -0
  24. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/calibration.json +0 -0
  25. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/calibration.py +0 -0
  26. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/cli.py +0 -0
  27. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/constitution.py +0 -0
  28. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/corpus.json +0 -0
  29. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/dynamic.py +0 -0
  30. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/auth.py +0 -0
  31. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/authz.py +0 -0
  32. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/base.py +0 -0
  33. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/client_exposure.py +0 -0
  34. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/integrations.py +0 -0
  35. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/routes.py +0 -0
  36. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/schemas.py +0 -0
  37. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/stack.py +0 -0
  38. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/extractors/tenant.py +0 -0
  39. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/proof.py +0 -0
  40. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/recon.py +0 -0
  41. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/report.py +0 -0
  42. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/rules/error-stack-disclosure.yml +0 -0
  43. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/rules/insecure-default-secret.yml +0 -0
  44. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/scanners.py +0 -0
  45. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/_lib.py +0 -0
  46. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/appsync-cswsh.sh +0 -0
  47. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/appsync-introspection.sh +0 -0
  48. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/appsync-subscription-bola.sh +0 -0
  49. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/bola-cross-tenant.sh +0 -0
  50. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/bola-write-verbs.py +0 -0
  51. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/client-integrity-checklist.sh +0 -0
  52. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/compare-roles.sh +0 -0
  53. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/dlp-bypass-offline.py +0 -0
  54. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/error-disclosure-probe.sh +0 -0
  55. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/forged-token.sh +0 -0
  56. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/hs256-brute-force.py +0 -0
  57. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/jwt-attacks.sh +0 -0
  58. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/mass-assignment.py +0 -0
  59. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/race-conditions.py +0 -0
  60. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/rate-limit-burst.sh +0 -0
  61. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/s3-assess.sh +0 -0
  62. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/ssrf-probes.sh +0 -0
  63. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/unauth-baseline.sh +0 -0
  64. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/probes/webhook-forgery.py +0 -0
  65. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/reports/FINDINGS-SUMMARY.md.template +0 -0
  66. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/reports/access-control-matrix.md.template +0 -0
  67. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/reports/findings-triage.md.template +0 -0
  68. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/reports/pentest-handover-brief.md.template +0 -0
  69. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator/templates/reports/per-tool-FINDINGS.md.template +0 -0
  70. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator.egg-info/dependency_links.txt +0 -0
  71. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator.egg-info/entry_points.txt +0 -0
  72. {websec_validator-0.3.0 → websec_validator-0.4.0}/src/websec_validator.egg-info/top_level.txt +0 -0
  73. {websec_validator-0.3.0 → websec_validator-0.4.0}/tests/test_hardening.py +0 -0
  74. {websec_validator-0.3.0 → websec_validator-0.4.0}/tests/test_recon.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: websec-validator
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app.
5
5
  Author: Ricardo Accioly
6
6
  License: MIT
@@ -82,7 +82,7 @@ Then point your agent at the output: **"Read `websec-out/AGENT-BRIEFING.md` and
82
82
 
83
83
  > That's the whole user surface: **`run`** (plus the optional, advanced **`dynamic`** live-probing step below). `recon`/`proof`/`calibrate` exist for developing the tool itself and are hidden from `--help` — you never need them.
84
84
 
85
- ## What it extracts (13 deterministic extractors, no LLM)
85
+ ## What it extracts (15 deterministic extractors, no LLM)
86
86
 
87
87
  | | Dimension | Notable output |
88
88
  |---|---|---|
@@ -91,13 +91,15 @@ Then point your agent at the output: **"Read `websec-out/AGENT-BRIEFING.md` and
91
91
  | auth | scheme + login surface + **insecure-default signing secrets** | multi-scheme; flags a hard-coded `JWT_SECRET \|\| 'dev-secret'` fallback (forgeable JWT) |
92
92
  | **authz** | access-control map | guard coverage + **write endpoints with no visible guard** + roles |
93
93
  | tenant | multi-tenancy key candidates | the BOLA boundary, by frequency |
94
- | **password_policy** | cross-route policy consistency | flags a route enforcing fewer character classes than the strongest sibling (policy drift) |
95
- | surface | 14 sink classes | 12 user-input-gated (SSRF/SQLi/traversal/SSTI/…) **+ var-arg SSRF + response-side error-disclosure** |
94
+ | **password_policy** | cross-route consistency **+ reuse/history** | complexity drift across routes **+ a set-password path that hashes without a reuse check** |
95
+ | surface | 14 sink classes **+ redirect-SSRF** | user-input-gated sinks + var-arg SSRF + error-disclosure **+ follows-redirects-without-per-hop-guard** |
96
+ | **upload_security** | unrestricted upload + unsafe serve | deny-list-only, stored-name-from-filename, trust-client-MIME, accept-SVG, **serve without `nosniff`** |
96
97
  | schemas | data models + **privileged fields** | Pydantic/SQLAlchemy/Django/Prisma/Mongoose/TypeORM/Zod → `role`/`isAdmin`/`groupId` for mass-assignment targeting |
97
- | iac_ci | IaC + CI/CD | GHA injection, unpinned actions, Dockerfile-root, tfstate **+ CDK AppSync `API_KEY` default-auth (CSWSH)** |
98
+ | iac_ci | IaC + CI/CD | GHA injection, unpinned actions, tfstate, **CDK AppSync `API_KEY` anonymous-default-auth + WAF-as-control smell** |
98
99
  | client_exposure | browser leakage | public-var secrets by **name + value-shape (`da2-…`) + CDK build-injection**, server-secret-in-client, source maps |
99
- | **client_integrity** | tamperable display (man-in-the-browser) | a fund-redirecting value (wallet address/QR) shown without a strict CSP / out-of-band anchor |
100
- | graphql | GraphQL surface | introspection / playground / depth-limit **+ AppSync subscription-authz (cross-group BOLA) + WAF-bypass-aware introspection** |
100
+ | **client_integrity** | tamperable display + **WS auth model** | wallet value without strict CSP / out-of-band anchor **+ the CSWSH determinant (ambient-cookie WS auth)** |
101
+ | **pii_exposure** | unmasked PII at the output boundary | `res.json(rawEntity)` with PII + **a masking control defined but with zero live call sites** (value-shape, not field-name) |
102
+ | graphql | GraphQL surface | introspection (**AppSync `introspectionConfig: DISABLED`-aware**) / playground / depth-limit **+ AppSync subscription-authz (cross-group BOLA)** |
101
103
  | integrations | third-party + webhooks | webhooks missing signature verification |
102
104
 
103
105
  Plus **derived targeting** — IDOR / SSRF / open-redirect / upload / write / auth-endpoint
@@ -206,13 +208,15 @@ publisher** with project `websec-validator`, owner `raccioly`, repo `websec-vali
206
208
 
207
209
  ## Status / roadmap
208
210
 
209
- **Done:** 13-extractor recon (incl. schema/entity → mass-assignment targeting, the **AWS-CDK /
210
- managed-AppSync / VTL boundary** CSWSH, cross-group subscription BOLA, forgeable-JWT default
211
- secrets and a **man-in-the-browser / tamperable-display** class), cross-tool de-dup + **bundled
212
- Semgrep rules**, tailored probe staging, agent briefing, traceable findings ledger with **calibrated
213
- confidence (CJE — Wilson CIs)**, proof harness, test suite, **Docker bundle** (all scanners + Noir,
214
- arch-aware), **dynamic phase v1** (authenticated read-only cross-tenant BOLA — validated live,
215
- reproduced a hand-pentest's 14/14).
211
+ **Done:** 15-extractor recon (incl. schema/entity → mass-assignment targeting, the **AWS-CDK /
212
+ managed-AppSync / VTL boundary**, **upload-security** + **PII-output-boundary** + **redirect-SSRF**
213
+ + **password-reuse** classes, and a **man-in-the-browser / tamperable-display** class), cross-tool
214
+ de-dup + **bundled Semgrep rules**, tailored probe staging, agent briefing, traceable findings ledger
215
+ with **calibrated confidence (CJE — Wilson CIs)**, proof harness, test suite, **Docker bundle** (all
216
+ scanners + Noir, arch-aware), **dynamic phase v1** (authenticated read-only cross-tenant BOLA —
217
+ validated live, reproduced a hand-pentest's 14/14). Validated against the **PTREQ0013000 pen test +
218
+ retest** (incl. correcting two findings the retest disproved: AppSync introspection *is* disablable
219
+ engine-level, and API_KEY-default is anonymous-auth, not CSWSH).
216
220
  **Next:** dynamic write-verb BOLA + JWT/auth probes + ZAP/Nuclei two-role diff (gated, they mutate),
217
221
  calibration on hand-labeled real repos (more representative base rate), ASVS index lookup, optional
218
222
  model-SDK adapters for no-agent fallback.
@@ -70,7 +70,7 @@ Then point your agent at the output: **"Read `websec-out/AGENT-BRIEFING.md` and
70
70
 
71
71
  > That's the whole user surface: **`run`** (plus the optional, advanced **`dynamic`** live-probing step below). `recon`/`proof`/`calibrate` exist for developing the tool itself and are hidden from `--help` — you never need them.
72
72
 
73
- ## What it extracts (13 deterministic extractors, no LLM)
73
+ ## What it extracts (15 deterministic extractors, no LLM)
74
74
 
75
75
  | | Dimension | Notable output |
76
76
  |---|---|---|
@@ -79,13 +79,15 @@ Then point your agent at the output: **"Read `websec-out/AGENT-BRIEFING.md` and
79
79
  | auth | scheme + login surface + **insecure-default signing secrets** | multi-scheme; flags a hard-coded `JWT_SECRET \|\| 'dev-secret'` fallback (forgeable JWT) |
80
80
  | **authz** | access-control map | guard coverage + **write endpoints with no visible guard** + roles |
81
81
  | tenant | multi-tenancy key candidates | the BOLA boundary, by frequency |
82
- | **password_policy** | cross-route policy consistency | flags a route enforcing fewer character classes than the strongest sibling (policy drift) |
83
- | surface | 14 sink classes | 12 user-input-gated (SSRF/SQLi/traversal/SSTI/…) **+ var-arg SSRF + response-side error-disclosure** |
82
+ | **password_policy** | cross-route consistency **+ reuse/history** | complexity drift across routes **+ a set-password path that hashes without a reuse check** |
83
+ | surface | 14 sink classes **+ redirect-SSRF** | user-input-gated sinks + var-arg SSRF + error-disclosure **+ follows-redirects-without-per-hop-guard** |
84
+ | **upload_security** | unrestricted upload + unsafe serve | deny-list-only, stored-name-from-filename, trust-client-MIME, accept-SVG, **serve without `nosniff`** |
84
85
  | schemas | data models + **privileged fields** | Pydantic/SQLAlchemy/Django/Prisma/Mongoose/TypeORM/Zod → `role`/`isAdmin`/`groupId` for mass-assignment targeting |
85
- | iac_ci | IaC + CI/CD | GHA injection, unpinned actions, Dockerfile-root, tfstate **+ CDK AppSync `API_KEY` default-auth (CSWSH)** |
86
+ | iac_ci | IaC + CI/CD | GHA injection, unpinned actions, tfstate, **CDK AppSync `API_KEY` anonymous-default-auth + WAF-as-control smell** |
86
87
  | client_exposure | browser leakage | public-var secrets by **name + value-shape (`da2-…`) + CDK build-injection**, server-secret-in-client, source maps |
87
- | **client_integrity** | tamperable display (man-in-the-browser) | a fund-redirecting value (wallet address/QR) shown without a strict CSP / out-of-band anchor |
88
- | graphql | GraphQL surface | introspection / playground / depth-limit **+ AppSync subscription-authz (cross-group BOLA) + WAF-bypass-aware introspection** |
88
+ | **client_integrity** | tamperable display + **WS auth model** | wallet value without strict CSP / out-of-band anchor **+ the CSWSH determinant (ambient-cookie WS auth)** |
89
+ | **pii_exposure** | unmasked PII at the output boundary | `res.json(rawEntity)` with PII + **a masking control defined but with zero live call sites** (value-shape, not field-name) |
90
+ | graphql | GraphQL surface | introspection (**AppSync `introspectionConfig: DISABLED`-aware**) / playground / depth-limit **+ AppSync subscription-authz (cross-group BOLA)** |
89
91
  | integrations | third-party + webhooks | webhooks missing signature verification |
90
92
 
91
93
  Plus **derived targeting** — IDOR / SSRF / open-redirect / upload / write / auth-endpoint
@@ -194,13 +196,15 @@ publisher** with project `websec-validator`, owner `raccioly`, repo `websec-vali
194
196
 
195
197
  ## Status / roadmap
196
198
 
197
- **Done:** 13-extractor recon (incl. schema/entity → mass-assignment targeting, the **AWS-CDK /
198
- managed-AppSync / VTL boundary** CSWSH, cross-group subscription BOLA, forgeable-JWT default
199
- secrets and a **man-in-the-browser / tamperable-display** class), cross-tool de-dup + **bundled
200
- Semgrep rules**, tailored probe staging, agent briefing, traceable findings ledger with **calibrated
201
- confidence (CJE — Wilson CIs)**, proof harness, test suite, **Docker bundle** (all scanners + Noir,
202
- arch-aware), **dynamic phase v1** (authenticated read-only cross-tenant BOLA — validated live,
203
- reproduced a hand-pentest's 14/14).
199
+ **Done:** 15-extractor recon (incl. schema/entity → mass-assignment targeting, the **AWS-CDK /
200
+ managed-AppSync / VTL boundary**, **upload-security** + **PII-output-boundary** + **redirect-SSRF**
201
+ + **password-reuse** classes, and a **man-in-the-browser / tamperable-display** class), cross-tool
202
+ de-dup + **bundled Semgrep rules**, tailored probe staging, agent briefing, traceable findings ledger
203
+ with **calibrated confidence (CJE — Wilson CIs)**, proof harness, test suite, **Docker bundle** (all
204
+ scanners + Noir, arch-aware), **dynamic phase v1** (authenticated read-only cross-tenant BOLA —
205
+ validated live, reproduced a hand-pentest's 14/14). Validated against the **PTREQ0013000 pen test +
206
+ retest** (incl. correcting two findings the retest disproved: AppSync introspection *is* disablable
207
+ engine-level, and API_KEY-default is anonymous-auth, not CSWSH).
204
208
  **Next:** dynamic write-verb BOLA + JWT/auth probes + ZAP/Nuclei two-role diff (gated, they mutate),
205
209
  calibration on hand-labeled real repos (more representative base rate), ASVS index lookup, optional
206
210
  model-SDK adapters for no-agent fallback.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "websec-validator"
7
- version = "0.3.0"
7
+ version = "0.4.0"
8
8
  description = "Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -69,6 +69,20 @@ def render(facts: dict, scanners: dict, scan_results: list, probe_manifest: list
69
69
  pp_line = f"looks consistent across {len(pp['password_blocks'])} validator block(s)"
70
70
  else:
71
71
  pp_line = "_no password validators detected_"
72
+ if ((pp.get("password_reuse") or {}).get("gap")):
73
+ pp_line += " · ⚠ NO reuse/history control (#6)"
74
+
75
+ up = facts.get("upload_security", {})
76
+ up_findings = up.get("findings", [])
77
+ up_section = ("\n".join(f"- **{f.get('severity')}** {f.get('kind')} — `{f.get('file')}`" for f in up_findings[:20])
78
+ if up_findings else
79
+ ("_upload handler(s) present; allow-list + nosniff look ok — spot-check_" if up.get("upload_handlers")
80
+ else "_no upload handlers detected_"))
81
+ pii = facts.get("pii_exposure", {})
82
+ pii_findings = pii.get("findings", [])
83
+ pii_section = ("\n".join(f"- **{f.get('severity')}** {f.get('kind')} — `{f.get('file')}`" for f in pii_findings[:20])
84
+ if pii_findings else "_no obvious raw-PII responses / dead masking controls_")
85
+ ws_line = (facts.get("client_integrity", {}) or {}).get("websocket_auth", "no websocket detected")
72
86
 
73
87
  gql = facts.get("graphql", {})
74
88
  if gql.get("present"):
@@ -181,6 +195,14 @@ Production source maps exposed: {client.get("production_source_maps", False)}
181
195
  **Client integrity — man-in-the-browser / tamperable display:**
182
196
  {ci_section}
183
197
 
198
+ **WebSocket auth model (CSWSH determinant — is it an ambient cookie?):** {ws_line}
199
+
200
+ **File-upload security (#2b — sniff bytes, derive stored name, nosniff on serve):**
201
+ {up_section}
202
+
203
+ **PII output boundary (#8 — verify by VALUE SHAPE, not field name):**
204
+ {pii_section}
205
+
184
206
  **Third-party integrations:** {integ_line}
185
207
  {wh_line}
186
208
 
@@ -17,12 +17,14 @@ from .client_integrity import ClientIntegrityExtractor
17
17
  from .graphql import GraphQLExtractor
18
18
  from .iac_ci import IacCiExtractor
19
19
  from .integrations import IntegrationsExtractor
20
+ from .pii_exposure import PiiExposureExtractor
20
21
  from .policy_consistency import PolicyConsistencyExtractor
21
22
  from .routes import RoutesExtractor
22
23
  from .schemas import SchemasExtractor
23
24
  from .stack import StackExtractor
24
25
  from .surface import SurfaceExtractor
25
26
  from .tenant import TenantExtractor
27
+ from .upload_security import UploadSecurityExtractor
26
28
 
27
29
  # Order matters: stack first (others read facts['stack']); authz after routes
28
30
  # (reads facts['routes']).
@@ -34,10 +36,12 @@ REGISTRY: list[Extractor] = [
34
36
  TenantExtractor(),
35
37
  PolicyConsistencyExtractor(),
36
38
  SurfaceExtractor(),
39
+ UploadSecurityExtractor(),
37
40
  SchemasExtractor(),
38
41
  IacCiExtractor(),
39
42
  ClientExposureExtractor(),
40
43
  ClientIntegrityExtractor(),
44
+ PiiExposureExtractor(),
41
45
  GraphQLExtractor(),
42
46
  IntegrationsExtractor(),
43
47
  ]
@@ -48,6 +48,16 @@ OOB_ANCHOR = re.compile(
48
48
  r"|out[_-]of[_-]band|toChecksumAddress|getAddress\(|checksumAddress|\beip[_-]?55\b|verifyAddress"
49
49
  r"|address[_-]?verif|verif\w*[_-]?address|sendVerificationEmail|canonical[_-]?address", re.I)
50
50
 
51
+ # WebSocket / realtime auth model — the CSWSH determinant (PTREQ0013000 #4). CSWSH is only
52
+ # exploitable when the socket authenticates via an AMBIENT COOKIE the browser auto-attaches
53
+ # cross-origin. A token placed in the connection payload / subprotocol and stored origin-scoped is
54
+ # NOT exploitable (SOP blocks a cross-origin page from reading it). This lets us ANSWER a CSWSH
55
+ # scanner flag instead of guessing — the retest pushed back on exactly this and won.
56
+ WS_USAGE = re.compile(r"new\s+WebSocket\(|socket\.io|graphql-ws|subscriptions-transport-ws|appsync-realtime"
57
+ r"|\bwss?://", re.I)
58
+ WS_COOKIE_AUTH = re.compile(r"withCredentials\s*:\s*true|credentials\s*:\s*['\"]include['\"]"
59
+ r"|document\.cookie[\s\S]{0,80}?(?:socket|ws\b|websocket)", re.I)
60
+
51
61
 
52
62
  class ClientIntegrityExtractor(Extractor):
53
63
  name = "client_integrity"
@@ -57,6 +67,7 @@ class ClientIntegrityExtractor(Extractor):
57
67
  sensitive, qr_files, clip_files = [], [], []
58
68
  csp_present = csp_self = csp_nonce = csp_unsafe = False
59
69
  oob = []
70
+ ws_usage = ws_cookie = False
60
71
  for _p, rel, text in ctx.iter_code():
61
72
  if SENSITIVE_VALUE.search(text):
62
73
  if len(sensitive) < 30:
@@ -75,10 +86,15 @@ class ClientIntegrityExtractor(Extractor):
75
86
  csp_unsafe = True
76
87
  if OOB_ANCHOR.search(text) and len(oob) < 20:
77
88
  oob.append(rel)
89
+ if WS_USAGE.search(text):
90
+ ws_usage = True
91
+ if WS_COOKIE_AUTH.search(text):
92
+ ws_cookie = True
78
93
 
79
94
  # strict = a real `script-src 'self'` (+ a nonce / strict-dynamic) with NO unsafe-inline/eval
80
95
  strict_csp = bool(csp_present and csp_self and csp_nonce and not csp_unsafe)
81
96
  out_of_band = bool(oob)
97
+ ws_cookie_auth = bool(ws_usage and ws_cookie) # the CSWSH determinant (ambient-cookie WS auth)
82
98
 
83
99
  findings = []
84
100
  present = bool(sensitive)
@@ -109,8 +125,24 @@ class ClientIntegrityExtractor(Extractor):
109
125
  "cryptographically tamper-proof on the web — the goal is detectable, not "
110
126
  "impossible (the limit that hardware wallets exist to solve)."})
111
127
 
128
+ # CSWSH is ONLY real when the WS auth is an ambient cookie (PTREQ0013000 #4). This lets us
129
+ # answer a CSWSH scanner flag instead of guessing — a bearer token in the payload is not it.
130
+ if ws_cookie_auth:
131
+ findings.append({
132
+ "severity": "MEDIUM", "confidence": "LOW", "attack_class": "cswsh",
133
+ "issue": "WebSocket authenticated via an ambient cookie (Cross-Site WebSocket Hijacking)",
134
+ "detail": "A WebSocket/realtime connection appears to authenticate via a cookie "
135
+ "(withCredentials / credentials:'include'), which the browser auto-attaches "
136
+ "cross-origin — so a page on any origin can open an authenticated socket (CSWSH, #4). "
137
+ "Validate the Origin on the handshake, or move the credential into the connection "
138
+ "payload / subprotocol and store it origin-scoped (not a cookie). If WS auth is "
139
+ "already a token in the payload, CSWSH is NOT exploitable."})
140
+
112
141
  return {
113
142
  "sensitive_display": sorted(set(sensitive)),
143
+ "websocket_auth": ("cookie (CSWSH-exposed — validate Origin)" if ws_cookie_auth
144
+ else "token-or-none (CSWSH not exploitable)" if ws_usage
145
+ else "no websocket detected"),
114
146
  "qr_generation": sorted(set(qr_files)),
115
147
  "clipboard_copy": sorted(set(clip_files)),
116
148
  "strict_csp": strict_csp,
@@ -33,6 +33,11 @@ TENANT_ARG = re.compile(r"\b(\w+)\s*\(([^)]*\b(?:groupId|group_id|orgId|org_id|t
33
33
  # Identity-binding signals in a VTL resolver — the field is tied to the CALLER, not a free arg.
34
34
  VTL_AUTHZ = re.compile(r"\$ctx(?:tx)?\.identity|\$context\.identity|identity\.(?:sub|username|claims|resolverContext)"
35
35
  r"|util\.unauthorized|\bgroupIds?\b[\s\S]{0,80}?\bcontains\b|#if\s*\(\s*!?\s*\$ctx\.identity")
36
+ # Engine-level introspection disable on aws-cdk-lib appsync.GraphqlApi. The PTREQ0013000 RETEST
37
+ # proved this IS available and un-bypassable (unlike a WAF string-match) — so a correctly-configured
38
+ # AppSync API must NOT be flagged. This corrects the 0.3.0 false positive that always cried wolf.
39
+ APPSYNC_INTROSPECTION_OFF = re.compile(r"introspectionConfig\s*:\s*[\w.]*\bDISABLED\b")
40
+ APPSYNC_LIMITING = re.compile(r"\bqueryDepthLimit\b|\bresolverCountLimit\b")
36
41
 
37
42
 
38
43
  class GraphQLExtractor(Extractor):
@@ -47,10 +52,15 @@ class GraphQLExtractor(Extractor):
47
52
 
48
53
  introspection, playground, limiting, code_hit = "unknown", False, False, False
49
54
  appsync, aws_directives = False, False
55
+ appsync_introspection_off = appsync_limiting = False
50
56
  schema_texts = [] # (rel, text) for SDL files — parsed for Subscription authz
51
57
  for _p, rel, text in ctx.iter_code():
52
58
  if APPSYNC_MARK.search(text):
53
59
  appsync = True
60
+ if APPSYNC_INTROSPECTION_OFF.search(text):
61
+ appsync_introspection_off = True
62
+ if APPSYNC_LIMITING.search(text):
63
+ appsync_limiting = True
54
64
  if rel.endswith((".graphql", ".gql")):
55
65
  schema_texts.append((rel, text))
56
66
  if AWS_AUTH_DIRECTIVE.search(text):
@@ -74,14 +84,20 @@ class GraphQLExtractor(Extractor):
74
84
  findings = []
75
85
  sub_authz = []
76
86
  if managed:
77
- # AppSync exposes introspection and it is NOT disablable at the API layer (no Apollo-style
78
- # `introspection:false`). The report's #2 proved the WAF that "blocks" it is bypassable.
79
- findings.append({"severity": "MEDIUM", "issue": "AppSync GraphQL introspection reachable",
80
- "attack_class": "graphql",
81
- "detail": "AppSync exposes schema introspection; it can't be disabled at the API layer. "
82
- "If a WAF blocks the keyword, that string-match is bypassable via Unicode-escape "
83
- "/ junk-byte padding (PTREQ0013000 #2). Enforce field-level @aws_* auth + run the "
84
- "appsync-introspection probe (it attempts the bypass) don't rely on the WAF."})
87
+ # AppSync introspection CAN be disabled engine-level via
88
+ # `introspectionConfig: IntrospectionConfig.DISABLED` (aws-cdk-lib) un-bypassable, unlike
89
+ # a WAF byte-match. Only flag when it is NOT disabled (retest correction to the 0.3.0 FP).
90
+ if not appsync_introspection_off:
91
+ findings.append({"severity": "MEDIUM", "issue": "AppSync GraphQL introspection not disabled engine-level",
92
+ "attack_class": "graphql",
93
+ "detail": "Set `introspectionConfig: appsync.IntrospectionConfig.DISABLED` so the engine "
94
+ "rejects __schema/__type regardless of encoding. A WAF byte-match on `__schema` "
95
+ "is NOT sufficient — bypassable via Unicode/JSON escapes and it only fronts one "
96
+ "endpoint (PTREQ0013000 #2). Run the appsync-introspection probe to confirm."})
97
+ if not (appsync_limiting or limiting):
98
+ findings.append({"severity": "LOW", "issue": "AppSync has no query depth / resolver-count limit",
99
+ "attack_class": "graphql",
100
+ "detail": "add `queryDepthLimit` + `resolverCountLimit` (alias / deep-query DoS guard)."})
85
101
  sub_authz = self._subscription_authz(ctx, schema_texts, findings)
86
102
  else:
87
103
  if introspection in ("enabled", "unknown"):
@@ -103,7 +119,8 @@ class GraphQLExtractor(Extractor):
103
119
  or (["AppSync GraphQL API (HTTP + realtime WebSocket)"] if managed
104
120
  else ["(server detected; endpoint not routed by Noir)"]),
105
121
  "schema_files": schema_files[:20],
106
- "introspection": "appsync-reachable" if managed else introspection,
122
+ "introspection": (("appsync-disabled" if appsync_introspection_off else "appsync-reachable")
123
+ if managed else introspection),
107
124
  "playground_enabled": playground, "query_limiting_detected": limiting,
108
125
  "subscription_authz": sub_authz,
109
126
  "findings": findings,
@@ -29,6 +29,12 @@ APPSYNC_DEFAULT_APIKEY = re.compile(
29
29
  APPSYNC_APIKEY_MODE = re.compile(r"AuthorizationType\.API_KEY|authorizationType\s*:\s*['\"]?API_KEY")
30
30
  WAFV2 = re.compile(r"wafv2\.CfnWebACL|\bCfnWebACL\b|aws_wafv2|wafv2\.CfnWebACLAssociation")
31
31
  WAF_ASSOC = re.compile(r"CfnWebACLAssociation|WebACLAssociation")
32
+ # WAF used as the PRIMARY control for an app-layer flaw — a bypassable band-aid, not a remediation
33
+ # (PTREQ0013000 #2/#11). A byteMatchStatement/regex matching `__schema`, SQL keywords or `<script`
34
+ # means the app-layer bug is still there; the string-match is evadable via encoding + only one door.
35
+ WAF_APPLAYER_MATCH = re.compile(
36
+ r"(?:byteMatchStatement|searchString|RegexPatternSet|regexString)[\s\S]{0,220}?"
37
+ r"(__schema|__type|UNION\s+SELECT|information_schema|<script|onerror=|\bor\s+1\s*=\s*1\b|sleep\s*\()", re.I)
32
38
 
33
39
 
34
40
  class IacCiExtractor(Extractor):
@@ -69,7 +75,7 @@ class IacCiExtractor(Extractor):
69
75
  findings.append({"severity": "HIGH", "kind": "terraform-state-committed", "file": ctx.rel(tf),
70
76
  "detail": "tfstate may contain plaintext secrets (DB passwords, keys) — must not be committed"})
71
77
 
72
- # --- CDK / managed-AppSync auth (#4 CSWSH; surfaces the #2/#5 boundary) ---
78
+ # --- CDK / managed-AppSync auth (#4 anonymous default-auth; WAF-as-control smell #2) ---
73
79
  appsync_files, waf_present, waf_assoc = [], False, False
74
80
  for _p, rel, text in ctx.iter_code():
75
81
  if not rel.endswith((".ts", ".js", ".mjs", ".cjs")):
@@ -78,15 +84,24 @@ class IacCiExtractor(Extractor):
78
84
  waf_present = True
79
85
  if WAF_ASSOC.search(text):
80
86
  waf_assoc = True
87
+ if WAF_APPLAYER_MATCH.search(text):
88
+ tok = (WAF_APPLAYER_MATCH.search(text).group(1) or "").strip()
89
+ findings.append({"severity": "MEDIUM", "kind": "waf-as-app-control", "file": rel,
90
+ "detail": f"A WAF string/regex match on an app-layer attack token ({tok!r}) is used as a "
91
+ "control. A WAF is a bypassable compensating control, never the remediation: "
92
+ "string-matches are evaded by encoding (the retest bypassed `__schema` with a "
93
+ "Unicode escape) and only cover one endpoint. Fix at the app/engine layer "
94
+ "(disable introspection, parametrize queries) and keep the WAF as defense-in-depth."})
81
95
  if APPSYNC_API.search(text):
82
96
  appsync_files.append(rel)
83
97
  if APPSYNC_DEFAULT_APIKEY.search(text):
84
98
  findings.append({"severity": "HIGH", "kind": "appsync-apikey-default", "file": rel,
85
- "detail": "AppSync defaultAuthorization is API_KEY — the realtime WebSocket "
86
- "accepts a static key with no Origin/cookie binding (Cross-Site "
87
- "WebSocket Hijacking + anonymous subscribe). Make the default "
88
- "USER_POOL/OIDC/IAM/LAMBDA; keep API_KEY (if needed) as a scoped "
89
- "additional mode only."})
99
+ "detail": "AppSync defaultAuthorization is API_KEY — the API (HTTP + realtime) accepts "
100
+ "a static key by default, and that key typically ships to the browser, so "
101
+ "this is effectively ANONYMOUS/unauthenticated access. Make the default "
102
+ "USER_POOL/OIDC/IAM/LAMBDA; keep API_KEY (if needed) to a scoped additional "
103
+ "mode. (NB: this is NOT in itself CSWSH — that needs cookie-based WS auth; "
104
+ "see the client_integrity websocket-auth check.)"})
90
105
  elif APPSYNC_APIKEY_MODE.search(text):
91
106
  findings.append({"severity": "MEDIUM", "kind": "appsync-apikey-mode", "file": rel,
92
107
  "detail": "AppSync accepts an API_KEY authorization mode — confirm it is NOT the "
@@ -0,0 +1,98 @@
1
+ """PII output-boundary extractor — unmasked customer data in API responses (PTREQ0013000 #8).
2
+
3
+ Two high-signal static tells the retest taught us:
4
+
5
+ 1. **Dead security control.** A masking helper / `view_full`-style permission EXISTS in the codebase
6
+ but has ZERO call sites in the live request handlers — it was wired only into offline export paths.
7
+ A control defined-but-never-called is worse than none (it reads as "handled"). This is very
8
+ distinctive and cheap to find: collect `mask*/redact*/canViewFull*` definitions, count live (non-
9
+ test) call sites, flag the ones with none.
10
+
11
+ 2. **Raw entity to the client.** A controller does `res.json(entity)` on a raw ORM/repo object that
12
+ carries PII fields, with no DTO/serializer/masker — so phone/email ship in cleartext, *including*
13
+ indirect carriers (a phone embedded in a composed `messageBirdId`, a denormalized `lastMessage`).
14
+ The decisive verification is **value-shape, not field-name** — a field allow-list misses the
15
+ indirect carriers — so the probe asserts no phone/email *value* reaches a non-privileged caller.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import re
21
+
22
+ from .base import Extractor, RepoContext
23
+
24
+ # helper/permission DEFINITIONS (function/arrow/def) — not variable assignments to a call result
25
+ MASK_DEF = re.compile(
26
+ r"(?:function\s+|export\s+(?:async\s+)?function\s+|def\s+)"
27
+ r"(mask\w+|redact\w+|canViewFull\w+|scrub\w+|anonymi[sz]e\w+|toPublic\w+|sanitize\w*Pii)\b"
28
+ r"|(?:const|let|export\s+const)\s+(mask\w+|redact\w+|canViewFull\w+|toPublic\w+)\s*=\s*(?:async\s*)?\(", re.I)
29
+ PII_FIELD = re.compile(r"\b(?:phone|phoneNumber|msisdn|mobile|email|emailAddress|ssn|socialSecurity"
30
+ r"|dob|dateOfBirth|birthDate|creditCard|cardNumber|taxId|nationalId)\b", re.I)
31
+ # returning a raw variable / a fresh ORM read straight to the client
32
+ RES_RAW = re.compile(r"res\.(?:json|send)\s*\(\s*(?:await\s+)?[A-Za-z_$][\w$]*\s*\)"
33
+ r"|res\.(?:json|send)\s*\(\s*await\s+[\w.]+\.(?:find|findOne|findById|findAll|get|query)\s*\(")
34
+ MASK_CALL_NEAR = re.compile(r"mask\w+\(|redact\w+\(|toPublic\w+\(|canViewFull\w+\(|\.serialize\(|toDto\(|\bDTO\b|pick\(", re.I)
35
+ TESTFILE = re.compile(r"(?:^|/)(?:tests?|__tests__|spec)/|\.(?:test|spec)\.", re.I)
36
+
37
+
38
+ class PiiExposureExtractor(Extractor):
39
+ name = "pii_exposure"
40
+ category = "exposure"
41
+
42
+ def extract(self, ctx: RepoContext, facts: dict) -> dict:
43
+ texts = []
44
+ helpers: dict = {} # name -> def file
45
+ for _p, rel, text in ctx.iter_code():
46
+ texts.append((rel, text))
47
+ for m in MASK_DEF.finditer(text):
48
+ nm = m.group(1) or m.group(2)
49
+ if nm and len(nm) > 4 and nm not in helpers:
50
+ helpers[nm] = rel
51
+
52
+ findings = []
53
+
54
+ # 1. dead masking/permission control — defined but no LIVE (non-test) call site
55
+ dead = []
56
+ for nm, deffile in helpers.items():
57
+ callrx = re.compile(r"\b" + re.escape(nm) + r"\s*\(")
58
+ live = sum(1 for rel, text in texts
59
+ if rel != deffile and not TESTFILE.search(rel) and callrx.search(text))
60
+ if live == 0:
61
+ dead.append(nm)
62
+ findings.append({"severity": "HIGH", "kind": "dead-pii-control", "file": deffile,
63
+ "detail": f"`{nm}` (a masking/PII-permission control) is defined but has NO live "
64
+ "call site outside its own file/tests — a security control that exists but "
65
+ "isn't wired into the request handlers (it was likely only on export/report "
66
+ "paths). Apply it at the live API output boundary, or remove the false "
67
+ "sense of safety (PTREQ0013000 #8)."})
68
+
69
+ # 2. raw entity with PII to the client, no masker/DTO in the handler
70
+ raw_leaks = []
71
+ for rel, text in texts:
72
+ if TESTFILE.search(rel):
73
+ continue
74
+ if PII_FIELD.search(text) and RES_RAW.search(text) and not MASK_CALL_NEAR.search(text):
75
+ if len(raw_leaks) < 30:
76
+ raw_leaks.append(rel)
77
+ findings.append({"severity": "MEDIUM", "kind": "raw-entity-pii-response", "file": rel,
78
+ "detail": "A handler returns a raw entity (`res.json(entity)`) in a file that "
79
+ "handles PII fields, with no DTO/serializer/masker — phone/email likely "
80
+ "ship in cleartext. Mask at ONE output boundary (a DTO), gated by a "
81
+ "permission. VERIFY BY VALUE SHAPE (no phone/email value in the JSON), "
82
+ "not field name — indirect carriers (composed IDs, denormalized fields) "
83
+ "leak too (the `messageBirdId`-embeds-the-phone class, #8)."})
84
+
85
+ by_sev: dict = {}
86
+ for f in findings:
87
+ by_sev[f["severity"]] = by_sev.get(f["severity"], 0) + 1
88
+ return {
89
+ "findings": findings,
90
+ "dead_controls": dead,
91
+ "raw_pii_responses": raw_leaks,
92
+ "masking_helpers": sorted(helpers.keys())[:20],
93
+ "by_severity": by_sev,
94
+ "note": ("PII output-boundary review: " + (f"{len(dead)} masking control(s) defined but unused; " if dead else "")
95
+ + (f"{len(raw_leaks)} handler(s) return a raw PII entity. " if raw_leaks else "no obvious raw-PII responses. ")
96
+ + "Probe with a per-role response diff asserting NO phone/email VALUE (/\\+?\\d{7,}/ or an email "
97
+ "regex) reaches a non-privileged caller — across nested objects, IDs, and exports (#8)."),
98
+ }
@@ -42,6 +42,17 @@ _RE_STRONG = re.compile(r"isStrongPassword", re.I)
42
42
 
43
43
  _ALL = ("min", "upper", "lower", "digit", "special")
44
44
 
45
+ # Password REUSE / history — a DIFFERENT control from complexity (PTREQ0013000 #6, which we initially
46
+ # misread as complexity). A set-password path that hashes a new password with no comparison to the
47
+ # current / previous hashes lets a user re-set the same password. Signals:
48
+ HASH_NEW = re.compile(r"bcrypt(?:js)?\.hash|argon2\.hash|\bscrypt\b|pbkdf2|hashPassword\(|\.setPassword\(", re.I)
49
+ REUSE_CHECK = re.compile(r"isPasswordReused|passwordHistory|password_history|previousPasswords|prior[_-]?hashes"
50
+ r"|bcrypt(?:js)?\.compare[\s\S]{0,200}?(?:history|previous|current|old)", re.I)
51
+ PW_HASH_FIELD = re.compile(r"\b(?:passwordHash|password_hash|hashedPassword|pwdHash|passwordDigest)\b")
52
+ PW_HISTORY_FIELD = re.compile(r"\b(?:passwordHistory|password_history|previousPasswords|passwordHistoryHashes|priorPasswords)\b")
53
+ SET_PW_CTX = re.compile(r"changePassword|updatePassword|setPassword|resetPassword|updateProfile|adminUpdate"
54
+ r"|set[_-]?password|change[_-]?password", re.I)
55
+
45
56
 
46
57
  def _classes(window: str) -> set:
47
58
  """The character-class requirement set enforced in one validation window."""
@@ -68,7 +79,23 @@ class PolicyConsistencyExtractor(Extractor):
68
79
  def extract(self, ctx: RepoContext, facts: dict) -> dict:
69
80
  blocks = [] # (file, frozenset(classes))
70
81
  seen = set()
82
+ hashes = reuse_check = set_ctx = model_pwhash = model_history = False
71
83
  for _p, rel, text in ctx.iter_code():
84
+ # Reuse signals live in camelCase compounds (changePassword/passwordHash) that PW_FIELD's
85
+ # \bword\b boundaries miss — so track them on a cheap substring pre-check, NOT behind the
86
+ # complexity gate below (that bug initially made the reuse check silently never fire).
87
+ low = text.lower()
88
+ if "password" in low or "bcrypt" in low or "argon2" in low or "scrypt" in low or "pbkdf2" in low:
89
+ if HASH_NEW.search(text):
90
+ hashes = True
91
+ if REUSE_CHECK.search(text):
92
+ reuse_check = True
93
+ if SET_PW_CTX.search(text):
94
+ set_ctx = True
95
+ if PW_HASH_FIELD.search(text):
96
+ model_pwhash = True
97
+ if PW_HISTORY_FIELD.search(text):
98
+ model_history = True
72
99
  if not PW_FIELD.search(text):
73
100
  continue
74
101
  # FORWARD-only window, capped at the next password field — validation follows the field
@@ -110,11 +137,22 @@ class PolicyConsistencyExtractor(Extractor):
110
137
  if len(smax) < 3:
111
138
  weak_policy = strongest
112
139
 
140
+ # Password REUSE / history (#6) — the DIFFERENT control: a set-password path that hashes a new
141
+ # password with no reuse comparison, and/or a passwordHash model with no history field, lets a
142
+ # user re-set the same/old password. (Complexity is the drift check above; this is reuse.)
143
+ reuse_gap = bool(hashes and (set_ctx or model_pwhash) and not reuse_check and not model_history)
144
+ password_reuse = {
145
+ "hashes_passwords": hashes, "has_set_password_path": set_ctx,
146
+ "has_reuse_check": reuse_check, "model_has_passwordHash": model_pwhash,
147
+ "model_has_history": model_history, "gap": reuse_gap,
148
+ }
149
+
113
150
  return {
114
151
  "password_blocks": blocks[:20],
115
152
  "strongest_policy": strongest,
116
153
  "drift": drift, # MEDIUM in findings.py — inconsistent siblings (#6)
117
154
  "weak_policy": weak_policy, # LOW — uniformly weak, no strong sibling to compare
155
+ "password_reuse": password_reuse, # MEDIUM — no reuse/history control on set-password (#6)
118
156
  "consistent": not drift,
119
157
  "note": ("Password-policy DRIFT: a sibling route enforces fewer character classes than the "
120
158
  "strongest one found — align them (the WU #6 regression). " if drift else
@@ -67,6 +67,14 @@ SINKS = {
67
67
  }
68
68
 
69
69
 
70
+ # SSRF-via-redirect (PTREQ0013000 #1): axios/requests FOLLOW redirects by DEFAULT, so an outbound
71
+ # client on a variable URL re-validates only the FIRST hop unless it pins maxRedirects:0 or adds a
72
+ # per-hop guard. One of these present = the chain is guarded; absent next to an SSRF sink = the lead
73
+ # (allow-list on the input URL is necessary but never sufficient — a 302 to 169.254.169.254 wins).
74
+ REDIRECT_GUARD = re.compile(r"beforeRedirect|maxRedirects\s*:\s*0\b|allow_redirects\s*=\s*False"
75
+ r"|validateRedirect|isAllowed\w*Url|on[_-]?redirect|checkRedirect", re.I)
76
+
77
+
70
78
  class SurfaceExtractor(Extractor):
71
79
  name = "surface"
72
80
  category = "sinks"
@@ -78,6 +86,7 @@ class SurfaceExtractor(Extractor):
78
86
 
79
87
  found: dict = {k: [] for k in SINKS}
80
88
  counts: dict = {k: 0 for k in SINKS}
89
+ ssrf_redirect: list = [] # SSRF sink in a file with NO per-hop redirect guard (#1)
81
90
  for _p, rel, text in ctx.iter_code():
82
91
  for cls, (_probe, gate, rx) in SINKS.items():
83
92
  if gate == "sql" and not has_sql:
@@ -88,12 +97,16 @@ class SurfaceExtractor(Extractor):
88
97
  counts[cls] += 1
89
98
  if len(found[cls]) < 60:
90
99
  found[cls].append(rel)
100
+ if (len(ssrf_redirect) < 40 and not REDIRECT_GUARD.search(text)
101
+ and (SINKS["ssrf-outbound-http"][2].search(text) or SINKS["ssrf"][2].search(text))):
102
+ ssrf_redirect.append(rel)
91
103
 
92
104
  sinks = {k: {"probe": SINKS[k][0], "count": counts[k], "files": found[k]}
93
105
  for k in SINKS if counts[k]}
94
106
  return {
95
107
  "sinks": sinks,
96
108
  "sink_counts": {k: counts[k] for k in SINKS if counts[k]},
109
+ "ssrf_redirect_unguarded": ssrf_redirect, # validate EVERY hop, not just the input URL (#1)
97
110
  "datastore_class": ("sql" if has_sql else ("nosql" if has_nosql else "unknown")),
98
111
  "note": "Each sink hit is user-input-gated (req./request./concat/interp), so these are "
99
112
  "higher-confidence leads. Cross-reference the files with routes.targeting to pick "