websec-validator 0.3.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {websec_validator-0.3.0/src/websec_validator.egg-info → websec_validator-0.4.1}/PKG-INFO +18 -14
  2. {websec_validator-0.3.0 → websec_validator-0.4.1}/README.md +17 -13
  3. {websec_validator-0.3.0 → websec_validator-0.4.1}/pyproject.toml +1 -1
  4. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/__init__.py +8 -4
  5. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/briefing.py +29 -1
  6. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/cli.py +6 -1
  7. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/dynamic.py +25 -6
  8. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/extractors/__init__.py +9 -1
  9. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/extractors/authz.py +8 -1
  10. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/extractors/base.py +28 -1
  11. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/extractors/client_integrity.py +32 -0
  12. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/extractors/graphql.py +26 -9
  13. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/extractors/iac_ci.py +21 -6
  14. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/extractors/integrations.py +9 -3
  15. websec_validator-0.4.1/src/websec_validator/extractors/pii_exposure.py +98 -0
  16. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/extractors/policy_consistency.py +38 -0
  17. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/extractors/routes.py +16 -7
  18. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/extractors/surface.py +13 -0
  19. websec_validator-0.4.1/src/websec_validator/extractors/upload_security.py +89 -0
  20. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/findings.py +94 -6
  21. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/probes.py +14 -0
  22. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/scanners.py +19 -7
  23. websec_validator-0.4.1/src/websec_validator/templates/probes/password-reuse.sh +40 -0
  24. websec_validator-0.4.1/src/websec_validator/templates/probes/pii-output-diff.sh +48 -0
  25. websec_validator-0.4.1/src/websec_validator/templates/probes/upload-matrix.sh +44 -0
  26. {websec_validator-0.3.0 → websec_validator-0.4.1/src/websec_validator.egg-info}/PKG-INFO +18 -14
  27. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator.egg-info/SOURCES.txt +5 -0
  28. {websec_validator-0.3.0 → websec_validator-0.4.1}/tests/test_hardening.py +85 -1
  29. {websec_validator-0.3.0 → websec_validator-0.4.1}/tests/test_pentest_regressions.py +126 -2
  30. {websec_validator-0.3.0 → websec_validator-0.4.1}/tests/test_recon.py +72 -0
  31. {websec_validator-0.3.0 → websec_validator-0.4.1}/LICENSE +0 -0
  32. {websec_validator-0.3.0 → websec_validator-0.4.1}/setup.cfg +0 -0
  33. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/calibration.json +0 -0
  34. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/calibration.py +0 -0
  35. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/constitution.py +0 -0
  36. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/corpus.json +0 -0
  37. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/extractors/auth.py +0 -0
  38. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/extractors/client_exposure.py +0 -0
  39. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/extractors/schemas.py +0 -0
  40. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/extractors/stack.py +0 -0
  41. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/extractors/tenant.py +0 -0
  42. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/proof.py +0 -0
  43. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/recon.py +0 -0
  44. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/report.py +0 -0
  45. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/rules/error-stack-disclosure.yml +0 -0
  46. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/rules/insecure-default-secret.yml +0 -0
  47. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/_lib.py +0 -0
  48. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/appsync-cswsh.sh +0 -0
  49. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/appsync-introspection.sh +0 -0
  50. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/appsync-subscription-bola.sh +0 -0
  51. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/bola-cross-tenant.sh +0 -0
  52. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/bola-write-verbs.py +0 -0
  53. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/client-integrity-checklist.sh +0 -0
  54. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/compare-roles.sh +0 -0
  55. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/dlp-bypass-offline.py +0 -0
  56. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/error-disclosure-probe.sh +0 -0
  57. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/forged-token.sh +0 -0
  58. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/hs256-brute-force.py +0 -0
  59. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/jwt-attacks.sh +0 -0
  60. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/mass-assignment.py +0 -0
  61. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/race-conditions.py +0 -0
  62. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/rate-limit-burst.sh +0 -0
  63. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/s3-assess.sh +0 -0
  64. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/ssrf-probes.sh +0 -0
  65. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/unauth-baseline.sh +0 -0
  66. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/probes/webhook-forgery.py +0 -0
  67. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/reports/FINDINGS-SUMMARY.md.template +0 -0
  68. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/reports/access-control-matrix.md.template +0 -0
  69. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/reports/findings-triage.md.template +0 -0
  70. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/reports/pentest-handover-brief.md.template +0 -0
  71. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator/templates/reports/per-tool-FINDINGS.md.template +0 -0
  72. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator.egg-info/dependency_links.txt +0 -0
  73. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator.egg-info/entry_points.txt +0 -0
  74. {websec_validator-0.3.0 → websec_validator-0.4.1}/src/websec_validator.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: websec-validator
3
- Version: 0.3.0
3
+ Version: 0.4.1
4
4
  Summary: Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app.
5
5
  Author: Ricardo Accioly
6
6
  License: MIT
@@ -82,7 +82,7 @@ Then point your agent at the output: **"Read `websec-out/AGENT-BRIEFING.md` and
82
82
 
83
83
  > That's the whole user surface: **`run`** (plus the optional, advanced **`dynamic`** live-probing step below). `recon`/`proof`/`calibrate` exist for developing the tool itself and are hidden from `--help` — you never need them.
84
84
 
85
- ## What it extracts (13 deterministic extractors, no LLM)
85
+ ## What it extracts (15 deterministic extractors, no LLM)
86
86
 
87
87
  | | Dimension | Notable output |
88
88
  |---|---|---|
@@ -91,13 +91,15 @@ Then point your agent at the output: **"Read `websec-out/AGENT-BRIEFING.md` and
91
91
  | auth | scheme + login surface + **insecure-default signing secrets** | multi-scheme; flags a hard-coded `JWT_SECRET \|\| 'dev-secret'` fallback (forgeable JWT) |
92
92
  | **authz** | access-control map | guard coverage + **write endpoints with no visible guard** + roles |
93
93
  | tenant | multi-tenancy key candidates | the BOLA boundary, by frequency |
94
- | **password_policy** | cross-route policy consistency | flags a route enforcing fewer character classes than the strongest sibling (policy drift) |
95
- | surface | 14 sink classes | 12 user-input-gated (SSRF/SQLi/traversal/SSTI/…) **+ var-arg SSRF + response-side error-disclosure** |
94
+ | **password_policy** | cross-route consistency **+ reuse/history** | complexity drift across routes **+ a set-password path that hashes without a reuse check** |
95
+ | surface | 14 sink classes **+ redirect-SSRF** | user-input-gated sinks + var-arg SSRF + error-disclosure **+ follows-redirects-without-per-hop-guard** |
96
+ | **upload_security** | unrestricted upload + unsafe serve | deny-list-only, stored-name-from-filename, trust-client-MIME, accept-SVG, **serve without `nosniff`** |
96
97
  | schemas | data models + **privileged fields** | Pydantic/SQLAlchemy/Django/Prisma/Mongoose/TypeORM/Zod → `role`/`isAdmin`/`groupId` for mass-assignment targeting |
97
- | iac_ci | IaC + CI/CD | GHA injection, unpinned actions, Dockerfile-root, tfstate **+ CDK AppSync `API_KEY` default-auth (CSWSH)** |
98
+ | iac_ci | IaC + CI/CD | GHA injection, unpinned actions, tfstate, **CDK AppSync `API_KEY` anonymous-default-auth + WAF-as-control smell** |
98
99
  | client_exposure | browser leakage | public-var secrets by **name + value-shape (`da2-…`) + CDK build-injection**, server-secret-in-client, source maps |
99
- | **client_integrity** | tamperable display (man-in-the-browser) | a fund-redirecting value (wallet address/QR) shown without a strict CSP / out-of-band anchor |
100
- | graphql | GraphQL surface | introspection / playground / depth-limit **+ AppSync subscription-authz (cross-group BOLA) + WAF-bypass-aware introspection** |
100
+ | **client_integrity** | tamperable display + **WS auth model** | wallet value without strict CSP / out-of-band anchor **+ the CSWSH determinant (ambient-cookie WS auth)** |
101
+ | **pii_exposure** | unmasked PII at the output boundary | `res.json(rawEntity)` with PII + **a masking control defined but with zero live call sites** (value-shape, not field-name) |
102
+ | graphql | GraphQL surface | introspection (**AppSync `introspectionConfig: DISABLED`-aware**) / playground / depth-limit **+ AppSync subscription-authz (cross-group BOLA)** |
101
103
  | integrations | third-party + webhooks | webhooks missing signature verification |
102
104
 
103
105
  Plus **derived targeting** — IDOR / SSRF / open-redirect / upload / write / auth-endpoint
@@ -206,13 +208,15 @@ publisher** with project `websec-validator`, owner `raccioly`, repo `websec-vali
206
208
 
207
209
  ## Status / roadmap
208
210
 
209
- **Done:** 13-extractor recon (incl. schema/entity → mass-assignment targeting, the **AWS-CDK /
210
- managed-AppSync / VTL boundary** CSWSH, cross-group subscription BOLA, forgeable-JWT default
211
- secrets and a **man-in-the-browser / tamperable-display** class), cross-tool de-dup + **bundled
212
- Semgrep rules**, tailored probe staging, agent briefing, traceable findings ledger with **calibrated
213
- confidence (CJE — Wilson CIs)**, proof harness, test suite, **Docker bundle** (all scanners + Noir,
214
- arch-aware), **dynamic phase v1** (authenticated read-only cross-tenant BOLA — validated live,
215
- reproduced a hand-pentest's 14/14).
211
+ **Done:** 15-extractor recon (incl. schema/entity → mass-assignment targeting, the **AWS-CDK /
212
+ managed-AppSync / VTL boundary**, **upload-security** + **PII-output-boundary** + **redirect-SSRF**
213
+ + **password-reuse** classes, and a **man-in-the-browser / tamperable-display** class), cross-tool
214
+ de-dup + **bundled Semgrep rules**, tailored probe staging, agent briefing, traceable findings ledger
215
+ with **calibrated confidence (CJE — Wilson CIs)**, proof harness, test suite, **Docker bundle** (all
216
+ scanners + Noir, arch-aware), **dynamic phase v1** (authenticated read-only cross-tenant BOLA —
217
+ validated live, reproduced a hand-pentest's 14/14). Validated against the **PTREQ0013000 pen test +
218
+ retest** (incl. correcting two findings the retest disproved: AppSync introspection *is* disablable
219
+ engine-level, and API_KEY-default is anonymous-auth, not CSWSH).
216
220
  **Next:** dynamic write-verb BOLA + JWT/auth probes + ZAP/Nuclei two-role diff (gated, they mutate),
217
221
  calibration on hand-labeled real repos (more representative base rate), ASVS index lookup, optional
218
222
  model-SDK adapters for no-agent fallback.
@@ -70,7 +70,7 @@ Then point your agent at the output: **"Read `websec-out/AGENT-BRIEFING.md` and
70
70
 
71
71
  > That's the whole user surface: **`run`** (plus the optional, advanced **`dynamic`** live-probing step below). `recon`/`proof`/`calibrate` exist for developing the tool itself and are hidden from `--help` — you never need them.
72
72
 
73
- ## What it extracts (13 deterministic extractors, no LLM)
73
+ ## What it extracts (15 deterministic extractors, no LLM)
74
74
 
75
75
  | | Dimension | Notable output |
76
76
  |---|---|---|
@@ -79,13 +79,15 @@ Then point your agent at the output: **"Read `websec-out/AGENT-BRIEFING.md` and
79
79
  | auth | scheme + login surface + **insecure-default signing secrets** | multi-scheme; flags a hard-coded `JWT_SECRET \|\| 'dev-secret'` fallback (forgeable JWT) |
80
80
  | **authz** | access-control map | guard coverage + **write endpoints with no visible guard** + roles |
81
81
  | tenant | multi-tenancy key candidates | the BOLA boundary, by frequency |
82
- | **password_policy** | cross-route policy consistency | flags a route enforcing fewer character classes than the strongest sibling (policy drift) |
83
- | surface | 14 sink classes | 12 user-input-gated (SSRF/SQLi/traversal/SSTI/…) **+ var-arg SSRF + response-side error-disclosure** |
82
+ | **password_policy** | cross-route consistency **+ reuse/history** | complexity drift across routes **+ a set-password path that hashes without a reuse check** |
83
+ | surface | 14 sink classes **+ redirect-SSRF** | user-input-gated sinks + var-arg SSRF + error-disclosure **+ follows-redirects-without-per-hop-guard** |
84
+ | **upload_security** | unrestricted upload + unsafe serve | deny-list-only, stored-name-from-filename, trust-client-MIME, accept-SVG, **serve without `nosniff`** |
84
85
  | schemas | data models + **privileged fields** | Pydantic/SQLAlchemy/Django/Prisma/Mongoose/TypeORM/Zod → `role`/`isAdmin`/`groupId` for mass-assignment targeting |
85
- | iac_ci | IaC + CI/CD | GHA injection, unpinned actions, Dockerfile-root, tfstate **+ CDK AppSync `API_KEY` default-auth (CSWSH)** |
86
+ | iac_ci | IaC + CI/CD | GHA injection, unpinned actions, tfstate, **CDK AppSync `API_KEY` anonymous-default-auth + WAF-as-control smell** |
86
87
  | client_exposure | browser leakage | public-var secrets by **name + value-shape (`da2-…`) + CDK build-injection**, server-secret-in-client, source maps |
87
- | **client_integrity** | tamperable display (man-in-the-browser) | a fund-redirecting value (wallet address/QR) shown without a strict CSP / out-of-band anchor |
88
- | graphql | GraphQL surface | introspection / playground / depth-limit **+ AppSync subscription-authz (cross-group BOLA) + WAF-bypass-aware introspection** |
88
+ | **client_integrity** | tamperable display + **WS auth model** | wallet value without strict CSP / out-of-band anchor **+ the CSWSH determinant (ambient-cookie WS auth)** |
89
+ | **pii_exposure** | unmasked PII at the output boundary | `res.json(rawEntity)` with PII + **a masking control defined but with zero live call sites** (value-shape, not field-name) |
90
+ | graphql | GraphQL surface | introspection (**AppSync `introspectionConfig: DISABLED`-aware**) / playground / depth-limit **+ AppSync subscription-authz (cross-group BOLA)** |
89
91
  | integrations | third-party + webhooks | webhooks missing signature verification |
90
92
 
91
93
  Plus **derived targeting** — IDOR / SSRF / open-redirect / upload / write / auth-endpoint
@@ -194,13 +196,15 @@ publisher** with project `websec-validator`, owner `raccioly`, repo `websec-vali
194
196
 
195
197
  ## Status / roadmap
196
198
 
197
- **Done:** 13-extractor recon (incl. schema/entity → mass-assignment targeting, the **AWS-CDK /
198
- managed-AppSync / VTL boundary** CSWSH, cross-group subscription BOLA, forgeable-JWT default
199
- secrets and a **man-in-the-browser / tamperable-display** class), cross-tool de-dup + **bundled
200
- Semgrep rules**, tailored probe staging, agent briefing, traceable findings ledger with **calibrated
201
- confidence (CJE — Wilson CIs)**, proof harness, test suite, **Docker bundle** (all scanners + Noir,
202
- arch-aware), **dynamic phase v1** (authenticated read-only cross-tenant BOLA — validated live,
203
- reproduced a hand-pentest's 14/14).
199
+ **Done:** 15-extractor recon (incl. schema/entity → mass-assignment targeting, the **AWS-CDK /
200
+ managed-AppSync / VTL boundary**, **upload-security** + **PII-output-boundary** + **redirect-SSRF**
201
+ + **password-reuse** classes, and a **man-in-the-browser / tamperable-display** class), cross-tool
202
+ de-dup + **bundled Semgrep rules**, tailored probe staging, agent briefing, traceable findings ledger
203
+ with **calibrated confidence (CJE — Wilson CIs)**, proof harness, test suite, **Docker bundle** (all
204
+ scanners + Noir, arch-aware), **dynamic phase v1** (authenticated read-only cross-tenant BOLA —
205
+ validated live, reproduced a hand-pentest's 14/14). Validated against the **PTREQ0013000 pen test +
206
+ retest** (incl. correcting two findings the retest disproved: AppSync introspection *is* disablable
207
+ engine-level, and API_KEY-default is anonymous-auth, not CSWSH).
204
208
  **Next:** dynamic write-verb BOLA + JWT/auth probes + ZAP/Nuclei two-role diff (gated, they mutate),
205
209
  calibration on hand-labeled real repos (more representative base rate), ASVS index lookup, optional
206
210
  model-SDK adapters for no-agent fallback.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "websec-validator"
7
- version = "0.3.0"
7
+ version = "0.4.1"
8
8
  description = "Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -1,11 +1,15 @@
1
1
  """websec-validator — local-first security recon that briefs an AI coding agent.
2
2
 
3
3
  The tool does the deterministic half (read the repo, run the scanners it finds,
4
- stage the probe library tailored to what it discovered) and emits three artifacts:
4
+ stage the probe library tailored to what it discovered) and emits, per immutable run:
5
5
 
6
- 1. findings.json de-duplicated static scanner results
7
- 2. FACTS.json — stack, routes, auth-model candidates, attack surface
8
- 3. AGENT-BRIEFING.mdmarching orders + staged probe scripts for your AI agent
6
+ 1. FACTS.json stack, routes, auth-model candidates, attack surface
7
+ 2. findings.json — de-duplicated static scanner results (when --scan)
8
+ 3. findings-ledger.jsonranked, standards-cited, calibrated findings (recon + static + dynamic)
9
+ 4. AGENT-BRIEFING.md — marching orders + the per-attack-class targeting
10
+ 5. REPORT.md — the human-readable historical record
11
+ 6. CONSTITUTION.md — the app's security invariants as checkable Given/When/Then
12
+ 7. probes/ — the probe library staged against THIS app's real surface
9
13
 
10
14
  It never calls an LLM, never runs a server, and never needs a running instance of
11
15
  the target app. Running the probes and applying fixes is the agent + human's job.
@@ -69,6 +69,20 @@ def render(facts: dict, scanners: dict, scan_results: list, probe_manifest: list
69
69
  pp_line = f"looks consistent across {len(pp['password_blocks'])} validator block(s)"
70
70
  else:
71
71
  pp_line = "_no password validators detected_"
72
+ if ((pp.get("password_reuse") or {}).get("gap")):
73
+ pp_line += " · ⚠ NO reuse/history control (#6)"
74
+
75
+ up = facts.get("upload_security", {})
76
+ up_findings = up.get("findings", [])
77
+ up_section = ("\n".join(f"- **{f.get('severity')}** {f.get('kind')} — `{f.get('file')}`" for f in up_findings[:20])
78
+ if up_findings else
79
+ ("_upload handler(s) present; allow-list + nosniff look ok — spot-check_" if up.get("upload_handlers")
80
+ else "_no upload handlers detected_"))
81
+ pii = facts.get("pii_exposure", {})
82
+ pii_findings = pii.get("findings", [])
83
+ pii_section = ("\n".join(f"- **{f.get('severity')}** {f.get('kind')} — `{f.get('file')}`" for f in pii_findings[:20])
84
+ if pii_findings else "_no obvious raw-PII responses / dead masking controls_")
85
+ ws_line = (facts.get("client_integrity", {}) or {}).get("websocket_auth", "no websocket detected")
72
86
 
73
87
  gql = facts.get("graphql", {})
74
88
  if gql.get("present"):
@@ -113,6 +127,12 @@ def render(facts: dict, scanners: dict, scan_results: list, probe_manifest: list
113
127
  endpoints = routes.get("endpoints", [])
114
128
  inventory = _bullets([f"`{e['method']:6}` {e['path']}" for e in endpoints], cap=80)
115
129
 
130
+ partial_banner = (
131
+ f"\n> ⚠️ **PARTIAL SCAN** — the walker stopped at the {facts.get('file_cap','?')}-file cap "
132
+ f"({facts.get('files_scanned','?')} files read, filesystem order), so recon may be INCOMPLETE on "
133
+ "this repo. Re-run scoped to a subdirectory or with `--exclude` to cover the rest before trusting "
134
+ "an absence of findings.\n" if facts.get("files_truncated") else "")
135
+
116
136
  return f"""# AGENT BRIEFING — security pass for `{facts.get('target','')}`
117
137
 
118
138
  > Generated by **websec-validator v{facts.get('version','')}** — deterministic recon, no LLM.
@@ -127,7 +147,7 @@ def render(facts: dict, scanners: dict, scan_results: list, probe_manifest: list
127
147
 
128
148
  ⚠️ Static findings + recon need **no running app**. The probes need a **live test instance + test
129
149
  credentials** — ask the human, never fabricate, never hit production.
130
-
150
+ {partial_banner}
131
151
  ---
132
152
 
133
153
  ## 1. What this app is (detected)
@@ -181,6 +201,14 @@ Production source maps exposed: {client.get("production_source_maps", False)}
181
201
  **Client integrity — man-in-the-browser / tamperable display:**
182
202
  {ci_section}
183
203
 
204
+ **WebSocket auth model (CSWSH determinant — is it an ambient cookie?):** {ws_line}
205
+
206
+ **File-upload security (#2b — sniff bytes, derive stored name, nosniff on serve):**
207
+ {up_section}
208
+
209
+ **PII output boundary (#8 — verify by VALUE SHAPE, not field name):**
210
+ {pii_section}
211
+
184
212
  **Third-party integrations:** {integ_line}
185
213
  {wh_line}
186
214
 
@@ -134,9 +134,11 @@ def cmd_run(args) -> int:
134
134
  # 5. briefing + comprehensive REPORT.md (immutable run record)
135
135
  (out / "AGENT-BRIEFING.md").write_text(briefing.render(facts, det, scan_results, manifest, unified))
136
136
  (out / "REPORT.md").write_text(report.render(facts, det, scan_results, unified, manifest, ts, ledger))
137
+ # drop the full `all` finding list from the manifest — it's a duplicate of findings.json
138
+ manifest_summary = {k: v for k, v in unified.items() if k != "all"} if unified else None
137
139
  (out / "manifest.json").write_text(json.dumps(
138
140
  {"facts": "FACTS.json", "scanners": det, "scan_results": scan_results,
139
- "findings_summary": unified, "ledger": {"total": ledger["total"], "by_severity": ledger["by_severity"]},
141
+ "findings_summary": manifest_summary, "ledger": {"total": ledger["total"], "by_severity": ledger["by_severity"]},
140
142
  "probes": manifest, "timestamp": ts}, indent=2))
141
143
 
142
144
  print(f"\n✓ run {ts} saved (immutable — nothing overwritten):\n {out}")
@@ -327,6 +329,9 @@ def _which(b):
327
329
 
328
330
 
329
331
  def _print_facts_summary(facts: dict) -> None:
332
+ if facts.get("files_truncated"):
333
+ print(f" ⚠ PARTIAL SCAN — hit the {facts.get('file_cap', '?')}-file cap; recon may be incomplete. "
334
+ "Narrow with --exclude or scan a subdirectory.")
330
335
  st = facts.get("stack", {})
331
336
  rt = facts.get("routes", {})
332
337
  tg = rt.get("targeting", {})
@@ -106,7 +106,9 @@ def cross_tenant_bola(cfg: dict, facts: dict) -> dict:
106
106
  for path in endpoints:
107
107
  # attacker A tries to read B's tenant data, and vice-versa
108
108
  for atk, vic, direction in ((a, b, "A→B"), (b, a, "B→A")):
109
- url = cfg["target"] + path.replace("{" + param + "}", vic["tenant"])
109
+ # str(): a tenant id is often numeric (auto-increment) — str.replace's 2nd arg must be a
110
+ # str, so a JSON int would crash this (uncaught) authenticated path.
111
+ url = cfg["target"] + path.replace("{" + param + "}", str(vic["tenant"]))
110
112
  code, body = _request("GET", url, atk["token"])
111
113
  if code in (401, 403, 404):
112
114
  verdict = "blocked"
@@ -164,7 +166,9 @@ def unauth_reachability(target: str, facts: dict, max_endpoints: int = 50) -> di
164
166
  if e.get("method") != "GET" or "{" in p or SIDE_EFFECTING.search(p):
165
167
  continue
166
168
  eps.append(p)
167
- eps = sorted(set(eps))[:max_endpoints]
169
+ _all_eps = sorted(set(eps))
170
+ eps = _all_eps[:max_endpoints]
171
+ over_cap = max(0, len(_all_eps) - max_endpoints) # disclose, don't silently drop (a missed endpoint = a missed lead)
168
172
 
169
173
  results, skipped = [], [e.get("path") for e in (facts.get("routes") or {}).get("endpoints", [])
170
174
  if e.get("method") == "GET" and SIDE_EFFECTING.search(e.get("path", ""))]
@@ -195,11 +199,13 @@ def unauth_reachability(target: str, facts: dict, max_endpoints: int = 50) -> di
195
199
  "skipped_side_effecting": sorted(set(skipped)),
196
200
  "open_no_auth": openish,
197
201
  "results": results,
202
+ "endpoints_over_cap": over_cap,
198
203
  "fail_open_suspected": fail_open,
199
204
  "authn_trustworthy": not fail_open,
200
205
  "warning": FAIL_OPEN_WARNING if fail_open else "",
201
206
  "summary": f"{len(openish)}/{len(results)} data-read GET endpoints reachable WITHOUT auth"
202
207
  + (" — review whether these should be public" if openish else " — all gated")
208
+ + (f" · ⚠ {over_cap} more over the {max_endpoints}-endpoint cap NOT tested" if over_cap else "")
203
209
  + (" · ⚠ FAIL-OPEN SUSPECTED (nothing enforced auth — results untrustworthy)" if fail_open else ""),
204
210
  }
205
211
 
@@ -219,7 +225,9 @@ def write_auth_enforcement(target: str, facts: dict, max_endpoints: int = 80) ->
219
225
  p = e.get("path", "")
220
226
  if e.get("method") in WRITE_VERBS and not SIDE_EFFECTING.search(p):
221
227
  eps.append((e["method"], p))
222
- eps = sorted(set(eps))[:max_endpoints]
228
+ _all_eps = sorted(set(eps))
229
+ eps = _all_eps[:max_endpoints]
230
+ over_cap = max(0, len(_all_eps) - max_endpoints)
223
231
 
224
232
  results = []
225
233
  for method, path in eps:
@@ -229,9 +237,14 @@ def write_auth_enforcement(target: str, facts: dict, max_endpoints: int = 80) ->
229
237
  verdict = "auth-enforced"
230
238
  elif code in (200, 201, 204):
231
239
  verdict = "EXECUTED-UNAUTH"
232
- elif code in (400, 422, 404, 405, 409, 415, 500):
240
+ elif code in (400, 422, 404, 405, 409, 415):
233
241
  verdict = "no-auth-gate (reached handler/validation)"
234
242
  else:
243
+ # 500 (and any other code) is INCONCLUSIVE: a 500 may be the auth layer itself throwing,
244
+ # not the handler running unauthenticated — so it must NOT become a no-auth-gate verdict
245
+ # (which would escalate to a HIGH missing-auth finding AND poison the calibration oracle
246
+ # with a confirmed-real sample). Matches the forged-token engine, which also excludes 500
247
+ # from "reached handler".
235
248
  verdict = f"http-{code}"
236
249
  results.append({"method": method, "path": path, "status": code, "verdict": verdict})
237
250
 
@@ -248,11 +261,13 @@ def write_auth_enforcement(target: str, facts: dict, max_endpoints: int = 80) ->
248
261
  "no_auth_gate": missing,
249
262
  "executed_unauth": executed,
250
263
  "results": results,
264
+ "endpoints_over_cap": over_cap,
251
265
  "fail_open_suspected": fail_open,
252
266
  "authn_trustworthy": not fail_open,
253
267
  "warning": FAIL_OPEN_WARNING if fail_open else "",
254
268
  "summary": f"{enforced}/{len(results)} write endpoints enforce auth · "
255
269
  f"{len(missing)} reached with no auth gate · {len(executed)} executed unauthenticated"
270
+ + (f" · ⚠ {over_cap} more over the {max_endpoints}-endpoint cap NOT tested" if over_cap else "")
256
271
  + (" · ⚠ FAIL-OPEN SUSPECTED — results untrustworthy" if fail_open else ""),
257
272
  }
258
273
 
@@ -299,7 +314,9 @@ def forged_token_bypass(target: str, facts: dict, cookie_names=None,
299
314
  targets += [(e.get("method"), e.get("path", "")) for e in (facts.get("routes") or {}).get("endpoints", [])
300
315
  if e.get("method") in WRITE_VERBS and "{" not in e.get("path", "")
301
316
  and not SIDE_EFFECTING.search(e.get("path", ""))]
302
- targets = sorted(set(targets))[:max_endpoints]
317
+ _all_targets = sorted(set(targets))
318
+ targets = _all_targets[:max_endpoints]
319
+ over_cap = max(0, len(_all_targets) - max_endpoints)
303
320
 
304
321
  results, bypassed = [], []
305
322
  for method, path in targets:
@@ -335,9 +352,11 @@ def forged_token_bypass(target: str, facts: dict, cookie_names=None,
335
352
  "tested": len(results),
336
353
  "bypassed": bypassed,
337
354
  "results": results,
355
+ "endpoints_over_cap": over_cap,
338
356
  "summary": f"{len(bypassed)}/{len(results)} gated route(s) accepted a forged unsigned token"
339
357
  + (" — ⚠ SIGNATURE NOT VERIFIED (CWE-347 auth bypass)" if bypassed
340
- else " — all rejected the forged token"),
358
+ else " — all rejected the forged token")
359
+ + (f" · ⚠ {over_cap} more over the {max_endpoints}-endpoint cap NOT tested" if over_cap else ""),
341
360
  }
342
361
 
343
362
 
@@ -11,18 +11,20 @@ from pathlib import Path
11
11
 
12
12
  from .auth import AuthExtractor
13
13
  from .authz import AuthzExtractor
14
- from .base import Extractor, RepoContext
14
+ from .base import MAX_FILES, Extractor, RepoContext
15
15
  from .client_exposure import ClientExposureExtractor
16
16
  from .client_integrity import ClientIntegrityExtractor
17
17
  from .graphql import GraphQLExtractor
18
18
  from .iac_ci import IacCiExtractor
19
19
  from .integrations import IntegrationsExtractor
20
+ from .pii_exposure import PiiExposureExtractor
20
21
  from .policy_consistency import PolicyConsistencyExtractor
21
22
  from .routes import RoutesExtractor
22
23
  from .schemas import SchemasExtractor
23
24
  from .stack import StackExtractor
24
25
  from .surface import SurfaceExtractor
25
26
  from .tenant import TenantExtractor
27
+ from .upload_security import UploadSecurityExtractor
26
28
 
27
29
  # Order matters: stack first (others read facts['stack']); authz after routes
28
30
  # (reads facts['routes']).
@@ -34,10 +36,12 @@ REGISTRY: list[Extractor] = [
34
36
  TenantExtractor(),
35
37
  PolicyConsistencyExtractor(),
36
38
  SurfaceExtractor(),
39
+ UploadSecurityExtractor(),
37
40
  SchemasExtractor(),
38
41
  IacCiExtractor(),
39
42
  ClientExposureExtractor(),
40
43
  ClientIntegrityExtractor(),
44
+ PiiExposureExtractor(),
41
45
  GraphQLExtractor(),
42
46
  IntegrationsExtractor(),
43
47
  ]
@@ -51,6 +55,10 @@ def run_all(root: Path, version: str, excludes: list | None = None) -> dict:
51
55
  "version": version,
52
56
  "target": str(root.resolve()),
53
57
  "files_scanned": len(ctx.code_files),
58
+ # PARTIAL-scan guard: the walker stops at MAX_FILES (filesystem order), so on a very large
59
+ # monorepo recon may miss files. Surface it loudly rather than implying full coverage.
60
+ "files_truncated": bool(getattr(ctx, "truncated", False)),
61
+ "file_cap": MAX_FILES,
54
62
  }
55
63
  for ext in REGISTRY:
56
64
  try:
@@ -21,6 +21,12 @@ from .base import Extractor, RepoContext
21
21
 
22
22
  WRITE_VERBS = {"POST", "PUT", "PATCH", "DELETE"}
23
23
 
24
+ # endpoint_guards feeds the missing-auth ledger (findings.build_ledger), so capping it low was a
25
+ # silent coverage cliff: a big monorepo's unguarded write #401 never became a finding. Raised to
26
+ # cover realistic monorepos; truncation beyond this is DISCLOSED (endpoint_guards_truncated), never
27
+ # silent — mirrors constitution.py's "…and N more" pattern.
28
+ _MAX_ENDPOINT_GUARDS = 5000
29
+
24
30
  GUARD = re.compile(
25
31
  r"requireAuth|requirePermission|requireRole|requireGroupAccess|isAuthenticated|"
26
32
  r"@login_required|@jwt_required|@permission_required|@roles_required|ensureAuth|"
@@ -181,7 +187,8 @@ class AuthzExtractor(Extractor):
181
187
  "roles_detected": sorted(r for r in roles if r),
182
188
  "guard_summary": {"with_visible_guard": protected,
183
189
  "no_visible_guard": no_guard, "unknown": unknown},
184
- "endpoint_guards": egs[:400],
190
+ "endpoint_guards": egs[:_MAX_ENDPOINT_GUARDS],
191
+ "endpoint_guards_truncated": max(0, len(egs) - _MAX_ENDPOINT_GUARDS),
185
192
  "write_endpoints_without_visible_guard": sorted(set(no_guard_writes))[:60],
186
193
  "unsafe_auth_decoders": unsafe_decoders[:30],
187
194
  "unverified_signature_routes": unverified_routes,
@@ -31,6 +31,31 @@ MAX_FILES = 12000
31
31
  MAX_BYTES = 2_000_000
32
32
 
33
33
 
34
+ def path_in_skip_dir(path: str, root: "Path | str | None" = None) -> bool:
35
+ """True if `path` lies under a SKIP_DIR segment, measured RELATIVE to the scan root.
36
+
37
+ Checking the ABSOLUTE path's segments is the bug-005/bug-066 trap: when the scanned repo
38
+ itself lives under a skip-named ancestor (e.g. `.claude/worktrees/<id>`, `vendor/`,
39
+ `target/`, `~/.cache`), a segment ABOVE the root matches and the WHOLE tree — every route,
40
+ every finding — is silently dropped. Noir + the static scanners emit ABSOLUTE paths, so any
41
+ traversal that post-filters their output MUST strip the root prefix first (the walker already
42
+ does, via relative_to). Fail OPEN (keep the item) when the path can't be made relative — a
43
+ silent drop is the dangerous direction for a security tool. `root=None` preserves the legacy
44
+ raw-segment behavior for already-relative inputs.
45
+ """
46
+ p = (path or "").replace("\\", "/")
47
+ if not p:
48
+ return False
49
+ if root is not None:
50
+ try:
51
+ p = Path(path).resolve().relative_to(Path(root).resolve()).as_posix()
52
+ except (ValueError, OSError):
53
+ if Path(p).is_absolute():
54
+ return False # absolute but outside the root → don't risk a false drop
55
+ # else: already a root-relative path → check its segments as-is below
56
+ return any(part in SKIP_DIRS for part in p.split("/"))
57
+
58
+
34
59
  class RepoContext:
35
60
  """Walk the tree once; cache file text; serve cheap queries to every extractor."""
36
61
 
@@ -47,9 +72,11 @@ class RepoContext:
47
72
 
48
73
  def _walk(self) -> None:
49
74
  n = 0
75
+ self.truncated = False # set when MAX_FILES is hit → recon is PARTIAL, surface it
50
76
  for p in self.root.rglob("*"):
51
77
  if n >= MAX_FILES:
52
- break
78
+ self.truncated = True # rglob order is filesystem-dependent → which files drop is
79
+ break # nondeterministic; the consumer MUST know coverage is partial
53
80
  # match SKIP_DIRS against parts RELATIVE to the scan root — otherwise a
54
81
  # repo located under e.g. ~/.cache or any dir named like a skip-dir would
55
82
  # have its whole tree skipped.
@@ -48,6 +48,16 @@ OOB_ANCHOR = re.compile(
48
48
  r"|out[_-]of[_-]band|toChecksumAddress|getAddress\(|checksumAddress|\beip[_-]?55\b|verifyAddress"
49
49
  r"|address[_-]?verif|verif\w*[_-]?address|sendVerificationEmail|canonical[_-]?address", re.I)
50
50
 
51
+ # WebSocket / realtime auth model — the CSWSH determinant (PTREQ0013000 #4). CSWSH is only
52
+ # exploitable when the socket authenticates via an AMBIENT COOKIE the browser auto-attaches
53
+ # cross-origin. A token placed in the connection payload / subprotocol and stored origin-scoped is
54
+ # NOT exploitable (SOP blocks a cross-origin page from reading it). This lets us ANSWER a CSWSH
55
+ # scanner flag instead of guessing — the retest pushed back on exactly this and won.
56
+ WS_USAGE = re.compile(r"new\s+WebSocket\(|socket\.io|graphql-ws|subscriptions-transport-ws|appsync-realtime"
57
+ r"|\bwss?://", re.I)
58
+ WS_COOKIE_AUTH = re.compile(r"withCredentials\s*:\s*true|credentials\s*:\s*['\"]include['\"]"
59
+ r"|document\.cookie[\s\S]{0,80}?(?:socket|ws\b|websocket)", re.I)
60
+
51
61
 
52
62
  class ClientIntegrityExtractor(Extractor):
53
63
  name = "client_integrity"
@@ -57,6 +67,7 @@ class ClientIntegrityExtractor(Extractor):
57
67
  sensitive, qr_files, clip_files = [], [], []
58
68
  csp_present = csp_self = csp_nonce = csp_unsafe = False
59
69
  oob = []
70
+ ws_usage = ws_cookie = False
60
71
  for _p, rel, text in ctx.iter_code():
61
72
  if SENSITIVE_VALUE.search(text):
62
73
  if len(sensitive) < 30:
@@ -75,10 +86,15 @@ class ClientIntegrityExtractor(Extractor):
75
86
  csp_unsafe = True
76
87
  if OOB_ANCHOR.search(text) and len(oob) < 20:
77
88
  oob.append(rel)
89
+ if WS_USAGE.search(text):
90
+ ws_usage = True
91
+ if WS_COOKIE_AUTH.search(text):
92
+ ws_cookie = True
78
93
 
79
94
  # strict = a real `script-src 'self'` (+ a nonce / strict-dynamic) with NO unsafe-inline/eval
80
95
  strict_csp = bool(csp_present and csp_self and csp_nonce and not csp_unsafe)
81
96
  out_of_band = bool(oob)
97
+ ws_cookie_auth = bool(ws_usage and ws_cookie) # the CSWSH determinant (ambient-cookie WS auth)
82
98
 
83
99
  findings = []
84
100
  present = bool(sensitive)
@@ -109,8 +125,24 @@ class ClientIntegrityExtractor(Extractor):
109
125
  "cryptographically tamper-proof on the web — the goal is detectable, not "
110
126
  "impossible (the limit that hardware wallets exist to solve)."})
111
127
 
128
+ # CSWSH is ONLY real when the WS auth is an ambient cookie (PTREQ0013000 #4). This lets us
129
+ # answer a CSWSH scanner flag instead of guessing — a bearer token in the payload is not it.
130
+ if ws_cookie_auth:
131
+ findings.append({
132
+ "severity": "MEDIUM", "confidence": "LOW", "attack_class": "cswsh",
133
+ "issue": "WebSocket authenticated via an ambient cookie (Cross-Site WebSocket Hijacking)",
134
+ "detail": "A WebSocket/realtime connection appears to authenticate via a cookie "
135
+ "(withCredentials / credentials:'include'), which the browser auto-attaches "
136
+ "cross-origin — so a page on any origin can open an authenticated socket (CSWSH, #4). "
137
+ "Validate the Origin on the handshake, or move the credential into the connection "
138
+ "payload / subprotocol and store it origin-scoped (not a cookie). If WS auth is "
139
+ "already a token in the payload, CSWSH is NOT exploitable."})
140
+
112
141
  return {
113
142
  "sensitive_display": sorted(set(sensitive)),
143
+ "websocket_auth": ("cookie (CSWSH-exposed — validate Origin)" if ws_cookie_auth
144
+ else "token-or-none (CSWSH not exploitable)" if ws_usage
145
+ else "no websocket detected"),
114
146
  "qr_generation": sorted(set(qr_files)),
115
147
  "clipboard_copy": sorted(set(clip_files)),
116
148
  "strict_csp": strict_csp,
@@ -33,6 +33,11 @@ TENANT_ARG = re.compile(r"\b(\w+)\s*\(([^)]*\b(?:groupId|group_id|orgId|org_id|t
33
33
  # Identity-binding signals in a VTL resolver — the field is tied to the CALLER, not a free arg.
34
34
  VTL_AUTHZ = re.compile(r"\$ctx(?:tx)?\.identity|\$context\.identity|identity\.(?:sub|username|claims|resolverContext)"
35
35
  r"|util\.unauthorized|\bgroupIds?\b[\s\S]{0,80}?\bcontains\b|#if\s*\(\s*!?\s*\$ctx\.identity")
36
+ # Engine-level introspection disable on aws-cdk-lib appsync.GraphqlApi. The PTREQ0013000 RETEST
37
+ # proved this IS available and un-bypassable (unlike a WAF string-match) — so a correctly-configured
38
+ # AppSync API must NOT be flagged. This corrects the 0.3.0 false positive that always cried wolf.
39
+ APPSYNC_INTROSPECTION_OFF = re.compile(r"introspectionConfig\s*:\s*[\w.]*\bDISABLED\b")
40
+ APPSYNC_LIMITING = re.compile(r"\bqueryDepthLimit\b|\bresolverCountLimit\b")
36
41
 
37
42
 
38
43
  class GraphQLExtractor(Extractor):
@@ -47,10 +52,15 @@ class GraphQLExtractor(Extractor):
47
52
 
48
53
  introspection, playground, limiting, code_hit = "unknown", False, False, False
49
54
  appsync, aws_directives = False, False
55
+ appsync_introspection_off = appsync_limiting = False
50
56
  schema_texts = [] # (rel, text) for SDL files — parsed for Subscription authz
51
57
  for _p, rel, text in ctx.iter_code():
52
58
  if APPSYNC_MARK.search(text):
53
59
  appsync = True
60
+ if APPSYNC_INTROSPECTION_OFF.search(text):
61
+ appsync_introspection_off = True
62
+ if APPSYNC_LIMITING.search(text):
63
+ appsync_limiting = True
54
64
  if rel.endswith((".graphql", ".gql")):
55
65
  schema_texts.append((rel, text))
56
66
  if AWS_AUTH_DIRECTIVE.search(text):
@@ -74,14 +84,20 @@ class GraphQLExtractor(Extractor):
74
84
  findings = []
75
85
  sub_authz = []
76
86
  if managed:
77
- # AppSync exposes introspection and it is NOT disablable at the API layer (no Apollo-style
78
- # `introspection:false`). The report's #2 proved the WAF that "blocks" it is bypassable.
79
- findings.append({"severity": "MEDIUM", "issue": "AppSync GraphQL introspection reachable",
80
- "attack_class": "graphql",
81
- "detail": "AppSync exposes schema introspection; it can't be disabled at the API layer. "
82
- "If a WAF blocks the keyword, that string-match is bypassable via Unicode-escape "
83
- "/ junk-byte padding (PTREQ0013000 #2). Enforce field-level @aws_* auth + run the "
84
- "appsync-introspection probe (it attempts the bypass) don't rely on the WAF."})
87
+ # AppSync introspection CAN be disabled engine-level via
88
+ # `introspectionConfig: IntrospectionConfig.DISABLED` (aws-cdk-lib) un-bypassable, unlike
89
+ # a WAF byte-match. Only flag when it is NOT disabled (retest correction to the 0.3.0 FP).
90
+ if not appsync_introspection_off:
91
+ findings.append({"severity": "MEDIUM", "issue": "AppSync GraphQL introspection not disabled engine-level",
92
+ "attack_class": "graphql",
93
+ "detail": "Set `introspectionConfig: appsync.IntrospectionConfig.DISABLED` so the engine "
94
+ "rejects __schema/__type regardless of encoding. A WAF byte-match on `__schema` "
95
+ "is NOT sufficient — bypassable via Unicode/JSON escapes and it only fronts one "
96
+ "endpoint (PTREQ0013000 #2). Run the appsync-introspection probe to confirm."})
97
+ if not (appsync_limiting or limiting):
98
+ findings.append({"severity": "LOW", "issue": "AppSync has no query depth / resolver-count limit",
99
+ "attack_class": "graphql",
100
+ "detail": "add `queryDepthLimit` + `resolverCountLimit` (alias / deep-query DoS guard)."})
85
101
  sub_authz = self._subscription_authz(ctx, schema_texts, findings)
86
102
  else:
87
103
  if introspection in ("enabled", "unknown"):
@@ -103,7 +119,8 @@ class GraphQLExtractor(Extractor):
103
119
  or (["AppSync GraphQL API (HTTP + realtime WebSocket)"] if managed
104
120
  else ["(server detected; endpoint not routed by Noir)"]),
105
121
  "schema_files": schema_files[:20],
106
- "introspection": "appsync-reachable" if managed else introspection,
122
+ "introspection": (("appsync-disabled" if appsync_introspection_off else "appsync-reachable")
123
+ if managed else introspection),
107
124
  "playground_enabled": playground, "query_limiting_detected": limiting,
108
125
  "subscription_authz": sub_authz,
109
126
  "findings": findings,