websec-validator 0.4.2__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {websec_validator-0.4.2/src/websec_validator.egg-info → websec_validator-0.5.0}/PKG-INFO +7 -6
  2. {websec_validator-0.4.2 → websec_validator-0.5.0}/README.md +6 -5
  3. {websec_validator-0.4.2 → websec_validator-0.5.0}/pyproject.toml +1 -1
  4. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/constitution.py +1 -1
  5. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/extractors/__init__.py +2 -0
  6. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/extractors/auth.py +2 -2
  7. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/extractors/base.py +2 -2
  8. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/extractors/client_exposure.py +16 -5
  9. websec_validator-0.5.0/src/websec_validator/extractors/client_integrity.py +272 -0
  10. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/extractors/graphql.py +10 -5
  11. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/extractors/iac_ci.py +2 -2
  12. websec_validator-0.5.0/src/websec_validator/extractors/integrations.py +129 -0
  13. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/extractors/pii_exposure.py +4 -4
  14. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/extractors/policy_consistency.py +2 -2
  15. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/extractors/surface.py +3 -3
  16. websec_validator-0.5.0/src/websec_validator/extractors/transport_security.py +165 -0
  17. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/extractors/upload_security.py +2 -2
  18. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/findings.py +59 -7
  19. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/probes.py +5 -5
  20. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/rules/error-stack-disclosure.yml +1 -1
  21. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/rules/insecure-default-secret.yml +1 -1
  22. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/scanners.py +1 -1
  23. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/appsync-cswsh.sh +1 -1
  24. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/appsync-introspection.sh +1 -1
  25. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/appsync-subscription-bola.sh +1 -1
  26. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/error-disclosure-probe.sh +1 -1
  27. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/password-reuse.sh +1 -1
  28. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/pii-output-diff.sh +2 -2
  29. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/upload-matrix.sh +1 -1
  30. {websec_validator-0.4.2 → websec_validator-0.5.0/src/websec_validator.egg-info}/PKG-INFO +7 -6
  31. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator.egg-info/SOURCES.txt +1 -0
  32. {websec_validator-0.4.2 → websec_validator-0.5.0}/tests/test_pentest_regressions.py +190 -3
  33. websec_validator-0.4.2/src/websec_validator/extractors/client_integrity.py +0 -158
  34. websec_validator-0.4.2/src/websec_validator/extractors/integrations.py +0 -61
  35. {websec_validator-0.4.2 → websec_validator-0.5.0}/LICENSE +0 -0
  36. {websec_validator-0.4.2 → websec_validator-0.5.0}/setup.cfg +0 -0
  37. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/__init__.py +0 -0
  38. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/briefing.py +0 -0
  39. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/calibration.json +0 -0
  40. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/calibration.py +0 -0
  41. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/cli.py +0 -0
  42. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/corpus.json +0 -0
  43. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/dynamic.py +0 -0
  44. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/extractors/authz.py +0 -0
  45. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/extractors/routes.py +0 -0
  46. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/extractors/schemas.py +0 -0
  47. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/extractors/stack.py +0 -0
  48. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/extractors/tenant.py +0 -0
  49. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/proof.py +0 -0
  50. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/recon.py +0 -0
  51. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/report.py +0 -0
  52. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/_lib.py +0 -0
  53. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/bola-cross-tenant.sh +0 -0
  54. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/bola-write-verbs.py +0 -0
  55. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/client-integrity-checklist.sh +0 -0
  56. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/compare-roles.sh +0 -0
  57. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/dlp-bypass-offline.py +0 -0
  58. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/forged-token.sh +0 -0
  59. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/hs256-brute-force.py +0 -0
  60. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/jwt-attacks.sh +0 -0
  61. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/mass-assignment.py +0 -0
  62. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/race-conditions.py +0 -0
  63. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/rate-limit-burst.sh +0 -0
  64. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/s3-assess.sh +0 -0
  65. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/ssrf-probes.sh +0 -0
  66. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/unauth-baseline.sh +0 -0
  67. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/probes/webhook-forgery.py +0 -0
  68. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/reports/FINDINGS-SUMMARY.md.template +0 -0
  69. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/reports/access-control-matrix.md.template +0 -0
  70. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/reports/findings-triage.md.template +0 -0
  71. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/reports/pentest-handover-brief.md.template +0 -0
  72. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator/templates/reports/per-tool-FINDINGS.md.template +0 -0
  73. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator.egg-info/dependency_links.txt +0 -0
  74. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator.egg-info/entry_points.txt +0 -0
  75. {websec_validator-0.4.2 → websec_validator-0.5.0}/src/websec_validator.egg-info/top_level.txt +0 -0
  76. {websec_validator-0.4.2 → websec_validator-0.5.0}/tests/test_hardening.py +0 -0
  77. {websec_validator-0.4.2 → websec_validator-0.5.0}/tests/test_recon.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: websec-validator
3
- Version: 0.4.2
3
+ Version: 0.5.0
4
4
  Summary: Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app.
5
5
  Author: Ricardo Accioly
6
6
  License: MIT
@@ -84,7 +84,7 @@ Then point your agent at the output: **"Read `websec-out/AGENT-BRIEFING.md` and
84
84
 
85
85
  > That's the whole user surface: **`run`** (plus the optional, advanced **`dynamic`** live-probing step below). `recon`/`proof`/`calibrate` exist for developing the tool itself and are hidden from `--help` — you never need them.
86
86
 
87
- ## What it extracts (15 deterministic extractors, no LLM)
87
+ ## What it extracts (16 deterministic extractors, no LLM)
88
88
 
89
89
  | | Dimension | Notable output |
90
90
  |---|---|---|
@@ -99,10 +99,11 @@ Then point your agent at the output: **"Read `websec-out/AGENT-BRIEFING.md` and
99
99
  | schemas | data models + **privileged fields** | Pydantic/SQLAlchemy/Django/Prisma/Mongoose/TypeORM/Zod → `role`/`isAdmin`/`groupId` for mass-assignment targeting |
100
100
  | iac_ci | IaC + CI/CD | GHA injection, unpinned actions, tfstate, **CDK AppSync `API_KEY` anonymous-default-auth + WAF-as-control smell** |
101
101
  | client_exposure | browser leakage | public-var secrets by **name + value-shape (`da2-…`) + CDK build-injection**, server-secret-in-client, source maps |
102
- | **client_integrity** | tamperable display + **WS auth model** | wallet value without strict CSP / out-of-band anchor **+ the CSWSH determinant (ambient-cookie WS auth)** |
102
+ | **client_integrity** | tamperable display (client trust boundary) + **WS auth model** | any security-critical sink value (address/IBAN/2FA-seed/API-key/webhook) the user reads or copies, without strict CSP / out-of-band anchor **+ client-tamper-vector, grindable-fingerprint, over-claimed-control, the CSWSH determinant** |
103
+ | **transport_security** | CSP + HSTS header baseline | missing/weak CSP, inline event handlers, **partial HSTS (set on /api but not the HTML page)** |
103
104
  | **pii_exposure** | unmasked PII at the output boundary | `res.json(rawEntity)` with PII + **a masking control defined but with zero live call sites** (value-shape, not field-name) |
104
105
  | graphql | GraphQL surface | introspection (**AppSync `introspectionConfig: DISABLED`-aware**) / playground / depth-limit **+ AppSync subscription-authz (cross-group BOLA)** |
105
- | integrations | third-party + webhooks | webhooks missing signature verification |
106
+ | integrations | third-party + webhooks **+ outbound-action endpoints** | unsigned webhooks **+ email/SMS/push handlers with no auth or IP-only rate-limit + redundant secret-fetch** |
106
107
 
107
108
  Plus **derived targeting** — IDOR / SSRF / open-redirect / upload / write / auth-endpoint
108
109
  candidates — so probes get pointed at the *exact* endpoints, not fired blindly.
@@ -184,7 +185,7 @@ upload, cross-tenant BOLA, role/authz gaps).
184
185
  ## Tests
185
186
 
186
187
  ```bash
187
- python3 -m unittest discover -s tests # stdlib only, no Noir/network — 103 tests
188
+ python3 -m unittest discover -s tests # stdlib only, no Noir/network — 126 tests
188
189
  ```
189
190
 
190
191
  ## Releasing (maintainer)
@@ -216,7 +217,7 @@ managed-AppSync / VTL boundary**, **upload-security** + **PII-output-boundary**
216
217
  de-dup + **bundled Semgrep rules**, tailored probe staging, agent briefing, traceable findings ledger
217
218
  with **calibrated confidence (CJE — Wilson CIs)**, proof harness, test suite, **Docker bundle** (all
218
219
  scanners + Noir, arch-aware), **dynamic phase v1** (authenticated read-only cross-tenant BOLA —
219
- validated live, reproduced a hand-pentest's 14/14). Validated against the **PTREQ0013000 pen test +
220
+ validated live, reproduced a hand-pentest's 14/14). Validated against the **REF-PENTEST pen test +
220
221
  retest** (incl. correcting two findings the retest disproved: AppSync introspection *is* disablable
221
222
  engine-level, and API_KEY-default is anonymous-auth, not CSWSH).
222
223
  **Next:** dynamic write-verb BOLA + JWT/auth probes + ZAP/Nuclei two-role diff (gated, they mutate),
@@ -72,7 +72,7 @@ Then point your agent at the output: **"Read `websec-out/AGENT-BRIEFING.md` and
72
72
 
73
73
  > That's the whole user surface: **`run`** (plus the optional, advanced **`dynamic`** live-probing step below). `recon`/`proof`/`calibrate` exist for developing the tool itself and are hidden from `--help` — you never need them.
74
74
 
75
- ## What it extracts (15 deterministic extractors, no LLM)
75
+ ## What it extracts (16 deterministic extractors, no LLM)
76
76
 
77
77
  | | Dimension | Notable output |
78
78
  |---|---|---|
@@ -87,10 +87,11 @@ Then point your agent at the output: **"Read `websec-out/AGENT-BRIEFING.md` and
87
87
  | schemas | data models + **privileged fields** | Pydantic/SQLAlchemy/Django/Prisma/Mongoose/TypeORM/Zod → `role`/`isAdmin`/`groupId` for mass-assignment targeting |
88
88
  | iac_ci | IaC + CI/CD | GHA injection, unpinned actions, tfstate, **CDK AppSync `API_KEY` anonymous-default-auth + WAF-as-control smell** |
89
89
  | client_exposure | browser leakage | public-var secrets by **name + value-shape (`da2-…`) + CDK build-injection**, server-secret-in-client, source maps |
90
- | **client_integrity** | tamperable display + **WS auth model** | wallet value without strict CSP / out-of-band anchor **+ the CSWSH determinant (ambient-cookie WS auth)** |
90
+ | **client_integrity** | tamperable display (client trust boundary) + **WS auth model** | any security-critical sink value (address/IBAN/2FA-seed/API-key/webhook) the user reads or copies, without strict CSP / out-of-band anchor **+ client-tamper-vector, grindable-fingerprint, over-claimed-control, the CSWSH determinant** |
91
+ | **transport_security** | CSP + HSTS header baseline | missing/weak CSP, inline event handlers, **partial HSTS (set on /api but not the HTML page)** |
91
92
  | **pii_exposure** | unmasked PII at the output boundary | `res.json(rawEntity)` with PII + **a masking control defined but with zero live call sites** (value-shape, not field-name) |
92
93
  | graphql | GraphQL surface | introspection (**AppSync `introspectionConfig: DISABLED`-aware**) / playground / depth-limit **+ AppSync subscription-authz (cross-group BOLA)** |
93
- | integrations | third-party + webhooks | webhooks missing signature verification |
94
+ | integrations | third-party + webhooks **+ outbound-action endpoints** | unsigned webhooks **+ email/SMS/push handlers with no auth or IP-only rate-limit + redundant secret-fetch** |
94
95
 
95
96
  Plus **derived targeting** — IDOR / SSRF / open-redirect / upload / write / auth-endpoint
96
97
  candidates — so probes get pointed at the *exact* endpoints, not fired blindly.
@@ -172,7 +173,7 @@ upload, cross-tenant BOLA, role/authz gaps).
172
173
  ## Tests
173
174
 
174
175
  ```bash
175
- python3 -m unittest discover -s tests # stdlib only, no Noir/network — 103 tests
176
+ python3 -m unittest discover -s tests # stdlib only, no Noir/network — 126 tests
176
177
  ```
177
178
 
178
179
  ## Releasing (maintainer)
@@ -204,7 +205,7 @@ managed-AppSync / VTL boundary**, **upload-security** + **PII-output-boundary**
204
205
  de-dup + **bundled Semgrep rules**, tailored probe staging, agent briefing, traceable findings ledger
205
206
  with **calibrated confidence (CJE — Wilson CIs)**, proof harness, test suite, **Docker bundle** (all
206
207
  scanners + Noir, arch-aware), **dynamic phase v1** (authenticated read-only cross-tenant BOLA —
207
- validated live, reproduced a hand-pentest's 14/14). Validated against the **PTREQ0013000 pen test +
208
+ validated live, reproduced a hand-pentest's 14/14). Validated against the **REF-PENTEST pen test +
208
209
  retest** (incl. correcting two findings the retest disproved: AppSync introspection *is* disablable
209
210
  engine-level, and API_KEY-default is anonymous-auth, not CSWSH).
210
211
  **Next:** dynamic write-verb BOLA + JWT/auth probes + ZAP/Nuclei two-role diff (gated, they mutate),
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "websec-validator"
7
- version = "0.4.2"
7
+ version = "0.5.0"
8
8
  description = "Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -60,7 +60,7 @@ def build(facts: dict, ledger: dict | None = None) -> list:
60
60
  add("Secret hygiene", "Given the repo + git history, Then no live credential is present and no secret "
61
61
  "reaches the client bundle", "recon")
62
62
 
63
- # P6 — Signing-secret integrity (forgeable JWT, PTREQ0013000 #8)
63
+ # P6 — Signing-secret integrity (forgeable JWT, REF-PENTEST #8)
64
64
  for sd in ((facts.get("auth", {}) or {}).get("insecure_secret_defaults", []) or [])[:5]:
65
65
  add("Signing-secret integrity", f"Given the signing-secret env var is unset, When the app boots, Then it "
66
66
  f"FAILS CLOSED — no hard-coded fallback ({sd.get('literal')!r} in {sd.get('file')})",
@@ -24,6 +24,7 @@ from .schemas import SchemasExtractor
24
24
  from .stack import StackExtractor
25
25
  from .surface import SurfaceExtractor
26
26
  from .tenant import TenantExtractor
27
+ from .transport_security import TransportSecurityExtractor
27
28
  from .upload_security import UploadSecurityExtractor
28
29
 
29
30
  # Order matters: stack first (others read facts['stack']); authz after routes
@@ -41,6 +42,7 @@ REGISTRY: list[Extractor] = [
41
42
  IacCiExtractor(),
42
43
  ClientExposureExtractor(),
43
44
  ClientIntegrityExtractor(),
45
+ TransportSecurityExtractor(),
44
46
  PiiExposureExtractor(),
45
47
  GraphQLExtractor(),
46
48
  IntegrationsExtractor(),
@@ -28,7 +28,7 @@ _COOKIE_RESERVED = {"get", "set", "getall", "has", "delete", "clear", "tostring"
28
28
  "foreach", "entries", "keys", "values", "size", "name", "value", "length"}
29
29
 
30
30
  # Insecure DEFAULT signing secret — a hard-coded fallback on a secret/key var (the forgeable-JWT
31
- # class, PTREQ0013000 #8). JS/TS: `process.env.JWT_SECRET || 'dev-secret-do-not-use-in-prod'`;
31
+ # class, REF-PENTEST #8). JS/TS: `process.env.JWT_SECRET || 'dev-secret-do-not-use-in-prod'`;
32
32
  # Python: os.environ.get('JWT_SECRET', 'dev-secret'). A quoted fallback on a *SECRET/*KEY var is
33
33
  # almost never benign — and if it's a dev-ish placeholder AND the repo actually signs JWTs, anyone
34
34
  # who reads the source can forge tokens for any user/role.
@@ -92,7 +92,7 @@ class AuthExtractor(Extractor):
92
92
  for mm in SECRET_DEFAULT_PY.finditer(text):
93
93
  secret_defaults.append((rel, mm.group(1)))
94
94
 
95
- # Hard-coded fallback signing secret → forgeable-JWT lead (PTREQ0013000 #8). De-dup by
95
+ # Hard-coded fallback signing secret → forgeable-JWT lead (REF-PENTEST #8). De-dup by
96
96
  # (file, literal); mark dev-ish placeholders. findings.py escalates dev-ish + jwt-in-use to
97
97
  # CRITICAL; probes.stage seeds the literal into the hs256 brute-force candidate list.
98
98
  seen_sd: set = set()
@@ -16,14 +16,14 @@ SKIP_DIRS = {".git", "node_modules", "dist", "build", ".next", ".nuxt", "venv",
16
16
  ".venv", "__pycache__", ".mypy_cache", ".pytest_cache", "coverage",
17
17
  ".turbo", "out", "target", ".gradle", "vendor", "site-packages",
18
18
  ".terraform", "security", ".websec-out", "websec-out", ".cache",
19
- ".svelte-kit", "storybook-static", ".serverless",
19
+ ".svelte-kit", "storybook-static", ".serverless", ".aws-sam", "cdk.out", ".sst", ".amplify",
20
20
  # agent tooling + editor dirs + worktree copies — not the target app
21
21
  ".wolf", ".claude", ".worktrees", ".idea", ".vscode", ".agent", ".agents"}
22
22
  CODE_EXT = {".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs", ".py", ".go", ".rb",
23
23
  ".java", ".php", ".prisma",
24
24
  # Managed-cloud surfaces: AppSync GraphQL SDL (@aws_* auth directives) + VTL
25
25
  # resolvers (where realtime/subscription authz actually lives, or is missing).
26
- # PTREQ0013000 #2/#5 lived in these file types — previously invisible to every
26
+ # REF-PENTEST #2/#5 lived in these file types — previously invisible to every
27
27
  # iter_code()-based extractor. routes.py SPEC_PATH still splits .graphql/.gql out
28
28
  # of the route list so SDL doesn't generate phantom endpoints.
29
29
  ".graphql", ".gql", ".vtl"}
@@ -16,21 +16,32 @@ SECRETISH = re.compile(r"SECRET|PRIVATE|TOKEN|PASSWORD|PASSWD|API_?KEY|ACCESS_?K
16
16
  SERVER_SECRET = re.compile(r"process\.env\.([A-Z0-9_]*(?:SECRET|PRIVATE|TOKEN|PASSWORD|API_?KEY|ACCESS_?KEY)[A-Z0-9_]*)")
17
17
 
18
18
  # VALUE-aware leak detection — hardens the name-based scan above so it survives a benign rename
19
- # (the PTREQ0013000 #3 gap: a real key carried in a non-secret-named public var slips the name scan).
20
- # We match distinctive secret SHAPES, not var names. AppSync's `da2-` key has NO scanner rule at all,
21
- # so we always flag it; the generic shapes (which trivy/gitleaks already catch) are only flagged when
22
- # the file is client-reachable, to add the ships-to-browser angle without duplicating those scanners.
19
+ # (the REF-PENTEST #3 gap: a real key carried in a non-secret-named public var slips the name scan).
20
+ # We match distinctive secret SHAPES, not var names CLOUD-AGNOSTIC by design (AWS + Azure + GCP +
21
+ # generic), so the same value-leak detector works on a Next.js-on-Vercel, an Azure SWA, or a GCP
22
+ # Firebase app alike. AppSync's `da2-` key has NO scanner rule at all, so we always flag it; the
23
+ # generic shapes (which trivy/gitleaks already catch) are only flagged when the file is
24
+ # client-reachable, to add the ships-to-browser angle without duplicating those scanners.
23
25
  SECRET_SHAPES = [
26
+ # AWS
24
27
  (re.compile(r"\bda2-[a-z0-9]{26}\b"), "AWS AppSync API key (da2-…)", True),
25
28
  (re.compile(r"\bAKIA[0-9A-Z]{16}\b"), "AWS access key id (AKIA)", False),
29
+ # GCP / Google
26
30
  (re.compile(r"\bAIza[0-9A-Za-z_\-]{35}\b"), "Google API key (AIza…)", False),
31
+ (re.compile(r"""["']type["']\s*:\s*["']service_account["']"""), "GCP service-account credential JSON", False),
32
+ # Azure
33
+ (re.compile(r"AccountKey=[A-Za-z0-9+/]{86}=="), "Azure Storage account key (AccountKey=…)", False),
34
+ (re.compile(r"DefaultEndpointsProtocol=https;AccountName="), "Azure Storage connection string", False),
35
+ (re.compile(r"[?&]sig=[A-Za-z0-9%/+]{43,}&se="), "Azure SAS token (sig=…&se=…)", False),
36
+ # cloud-neutral
37
+ (re.compile(r"-----BEGIN (?:RSA |EC )?PRIVATE KEY-----"), "Private-key PEM block (TLS / SSH / SA key)", False),
27
38
  (re.compile(r"\bsk_live_[0-9A-Za-z]{16,}\b"), "Stripe live secret key (sk_live_…)", False),
28
39
  (re.compile(r"\beyJ[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{4,}\b"), "JWT (eyJ…)", False),
29
40
  ]
30
41
  # CDK build-time injection: a CloudFormation output / SSM param / Secret wired INTO a public build
31
42
  # var — e.g. CodeBuild `envFromCfnOutputs: { VITE_APPSYNC_API_KEY: appsyncApiKeyOutput }`. Invisible
32
43
  # to every secret scanner because the value isn't in source; it's injected at build time (the exact
33
- # mechanism that shipped the AppSync key to the browser in PTREQ0013000 #3).
44
+ # mechanism that shipped the AppSync key to the browser in REF-PENTEST #3).
34
45
  CFN_TO_PUBLIC = re.compile(
35
46
  r"(?:envFromCfnOutputs|buildEnvironment|environmentVariables|partialBuildSpec)"
36
47
  r"[\s\S]{0,400}?((?:NEXT_PUBLIC_|VITE_|REACT_APP_|GATSBY_|EXPO_PUBLIC_)\w*)\s*[:=]\s*"
@@ -0,0 +1,272 @@
1
+ """Client-trust-boundary / tamperable-display extractor — the man-in-the-browser (MITB) class.
2
+
3
+ Generalized from the agent-wallet lesson: when an app renders a **security-critical sink value** —
4
+ ANY value the user ACTS ON by reading or copying, where a silent swap causes irreversible loss or
5
+ misdirection — that on-screen value is rewritable by code running in the victim's own browser
6
+ (malware, a rogue extension, a poisoned JS dependency in the app's own bundle). TLS protects the
7
+ wire, not the DOM.
8
+
9
+ The sink set is deliberately GENERIC and classified by BLAST RADIUS, not by app type — the pen-test
10
+ team's principle: detect by **data-flow role**, never by keyword/category. The same probe that finds
11
+ a swapped crypto address finds a swapped IBAN, a swapped 2FA seed, or a swapped webhook URL. The
12
+ keyword lists below are a STARTING SET, not the whole detector:
13
+ - money-movement : crypto/wallet address, IBAN/routing/account/SWIFT, payee/pay-to → HIGH
14
+ - credential : 2FA/TOTP seed, recovery/mnemonic phrase, private/API/license key → HIGH
15
+ - config/integrity : webhook/callback URL, DNS record, invoice payment instructions → MEDIUM
16
+ Severity tracks IRREVERSIBILITY; confidence stays LOW — this is an architectural "verify the
17
+ compensating controls" lead, never a "your app is broken" claim. No web app can make on-screen
18
+ display cryptographically tamper-proof; that's an inherent platform limit (it's why hardware wallets
19
+ exist), accepted by Coinbase/MetaMask/banks alike.
20
+
21
+ The two controls that actually move the needle:
22
+ Layer A (kill the SCALABLE vector): a strict Content-Security-Policy (`script-src 'self'` + a
23
+ nonce, no `unsafe-inline`/`unsafe-eval`) so an injected/supply-chain script can't run.
24
+ (The framework-agnostic CSP/HSTS *baseline* audit lives in `transport_security.py`.)
25
+ Layer B (anchor trust OFF the browser surface): an out-of-band verification path — emailed
26
+ canonical value, a short safety code / fingerprint, a server-rendered identicon, an
27
+ EIP-55 / IBAN checksum — so a single-surface tamper is at least *detectable*.
28
+
29
+ Also emitted here (same trust boundary):
30
+ - weak-fingerprint : a safety-code/fingerprint truncated to too few bits is grindable offline (#7).
31
+ - overclaimed-control: code or UI copy asserting a CLIENT-SIDE check is "tamper-proof" / "MitB-proof"
32
+ is a genuine finding — it makes teams overtrust a tripwire and under-invest in the real,
33
+ out-of-band/server-side control (#8).
34
+ - cswsh : a WebSocket authenticated via an AMBIENT COOKIE (the CSWSH determinant).
35
+ """
36
+
37
+ from __future__ import annotations
38
+
39
+ import re
40
+
41
+ from .base import Extractor, RepoContext
42
+
43
+ # --- Security-critical sink values, classified by blast radius (severity ∝ irreversibility) ---
44
+ SINK_MONEY = re.compile(
45
+ r"\b(?:wallet|receive|receiving|deposit|recipient|payout|beneficiary|payment|destination|payee)[_-]?address\b"
46
+ r"|\bwalletAddress\b|\btoAddress\b|\bpayTo\b|\bpayee\b|\brouting[_-]?number\b|\baccount[_-]?number\b"
47
+ r"|\biban\b|\bswift[_-]?code\b|\bsort[_-]?code\b|\b0x[0-9a-fA-F]{40}\b"
48
+ r"|crypto.{0,12}address|blockchain.{0,12}address", re.I)
49
+ SINK_CREDENTIAL = re.compile(
50
+ r"\b(?:totp|2fa|mfa|authenticator)[_-]?(?:seed|secret|key)\b|\botpauth://"
51
+ r"|\b(?:recovery|seed|mnemonic)[_-]?phrase\b|\bmnemonic\b|\bprivate[_-]?key\b|\brecovery[_-]?code\b"
52
+ r"|\b(?:api|license|licence|access)[_-]?key\b|\bclient[_-]?secret\b", re.I)
53
+ SINK_CONFIG = re.compile(
54
+ r"\bwebhook[_-]?url\b|\bcallback[_-]?url\b|\bdns[_-]?record\b|\bnameserver\b|\bcname[_-]?record\b"
55
+ r"|\binvoice[\s\S]{0,18}(?:account|iban|instructions|number)\b", re.I)
56
+
57
+ # --- Sink-role signals: the value is demonstrably SEEN/COPIED/LINKED (data-flow gate) ---
58
+ QR_SIGNAL = re.compile(r"\bqr[\s_-]?code\b|QRCode|react-qr|qrcode\.react|toDataURL\(", re.I)
59
+ CLIPBOARD = re.compile(r"navigator\.clipboard|clipboard\.writeText|copyToClipboard|useCopyToClipboard"
60
+ r"|writeText\(|execCommand\(\s*['\"]copy")
61
+ HREF_SINK = re.compile(r"href=\{|href=['\"](?:tel:|mailto:|bitcoin:|ethereum:|lightning:)"
62
+ r"|\b(?:to|toAddress|recipient|amount|payee)\s*=\s*\{")
63
+ # #2 — the sink value arrives over a client-side round-trip the browser (and a MitB) can intercept,
64
+ # rather than being server-rendered. A newly-added client fetch for a once-server-rendered value is a
65
+ # regression in itself (manufactures a tamper vector).
66
+ CLIENT_FETCH = re.compile(r"\bfetch\(|\baxios\b|useSWR\b|useQuery\b|useLazyQuery\b|\$\.(?:ajax|get|post)\b"
67
+ r"|XMLHttpRequest|\.get\(['\"]/(?:api|v\d)|graphql\b", re.I)
68
+
69
+ # Layer A — strict CSP detection (kept self-contained; transport_security.py owns the baseline audit)
70
+ CSP_PRESENT = re.compile(r"Content-Security-Policy|contentSecurityPolicy", re.I)
71
+ CSP_SCRIPT_SELF = re.compile(r"script-src[^;'\"]*'self'", re.I)
72
+ CSP_NONCE = re.compile(r"'nonce-|nonce-\$\{|\bstrict-dynamic\b", re.I)
73
+ CSP_UNSAFE = re.compile(r"'unsafe-(?:inline|eval)'", re.I)
74
+
75
+ # Layer B — out-of-band trust anchor detection
76
+ OOB_ANCHOR = re.compile(
77
+ r"safety[_-]?code|safetyCode|fingerprint|identicon|blockie|jazzicon|emoji[_-]?code"
78
+ r"|out[_-]of[_-]band|toChecksumAddress|getAddress\(|checksumAddress|\beip[_-]?55\b|verifyAddress"
79
+ r"|address[_-]?verif|verif\w*[_-]?address|sendVerificationEmail|canonical[_-]?address|mod[_-]?97", re.I)
80
+
81
+ # #7 — a fingerprint / safety-code derived from a TRUNCATED hash is grindable offline. Flag a hash/HMAC
82
+ # sliced to a small char count (hex → 4 bits/char, so .slice(0,12) ≈ 48 bits < the 60-bit floor), or a
83
+ # *code variable sliced short. Heuristic robustness note, not a deterministic vuln.
84
+ WEAK_FINGERPRINT = re.compile(
85
+ r"(?:sha256|sha1|sha512|md5|createHash|createHmac|\bhmac\b|digest)\b[\s\S]{0,90}?"
86
+ r"\.(?:slice|substring|substr)\(\s*0\s*,\s*([1-9]|1[0-4])\b"
87
+ r"|(?:safety|finger|verif|short|otp)[_-]?code\b[\s\S]{0,50}?\.(?:slice|substring|substr)\(\s*0\s*,\s*([1-9]|1[0-4])\b",
88
+ re.I)
89
+ # #8 — dishonest control framing: a CLIENT-side check asserted to be unbeatable. Genuine finding.
90
+ OVERCLAIM = re.compile(
91
+ r"tamper[\s_-]?proof|tamper[\s_-]?resistant|mitb[\s_-]?proof|man-in-the-browser[\s_-]?proof"
92
+ r"|impossible to (?:tamper|forge|fake|modify|intercept)|cryptographically (?:guaranteed|proven|secure)"
93
+ r"|can(?:'|no)?t be (?:tampered|forged|faked|modified|intercepted)|unhackable|100% (?:secure|safe)", re.I)
94
+
95
+ # WebSocket / realtime auth model — the CSWSH determinant (REF-PENTEST #4). CSWSH is only
96
+ # exploitable when the socket authenticates via an AMBIENT COOKIE the browser auto-attaches
97
+ # cross-origin. A token in the connection payload / subprotocol, stored origin-scoped, is NOT
98
+ # exploitable (SOP blocks a cross-origin page from reading it).
99
+ WS_USAGE = re.compile(r"new\s+WebSocket\(|socket\.io|graphql-ws|subscriptions-transport-ws|appsync-realtime"
100
+ r"|\bwss?://", re.I)
101
+ WS_COOKIE_AUTH = re.compile(r"withCredentials\s*:\s*true|credentials\s*:\s*['\"]include['\"]"
102
+ r"|document\.cookie[\s\S]{0,80}?(?:socket|ws\b|websocket)", re.I)
103
+
104
+
105
+ class ClientIntegrityExtractor(Extractor):
106
+ name = "client_integrity"
107
+ category = "exposure"
108
+
109
+ def extract(self, ctx: RepoContext, facts: dict) -> dict:
110
+ sinks: dict[str, str] = {} # rel -> blast radius (money|credential|config)
111
+ qr_files, clip_files = [], []
112
+ csp_present = csp_self = csp_nonce = csp_unsafe = False
113
+ oob, weak_fp, overclaim, tamper_vectors = [], [], [], []
114
+ ws_usage = ws_cookie = False
115
+ for _p, rel, text in ctx.iter_code():
116
+ has_copy = bool(CLIPBOARD.search(text) or QR_SIGNAL.search(text) or HREF_SINK.search(text))
117
+ # genuine browser-DISPLAY surface: a frontend file by extension, an explicit client component,
118
+ # or a known client-framework marker — NOT a backend service/repository that merely references
119
+ # an `account`/`recipient` field (the real-repo FP: backend message processors, SDK models).
120
+ client_file = (rel.lower().endswith((".tsx", ".jsx", ".vue", ".svelte", ".astro", ".html", ".hbs"))
121
+ or "use client" in text[:400] or "@Component(" in text
122
+ or "customElements.define" in text or "LitElement" in text)
123
+ # money sinks are specific on a client surface; the broader credential/config set additionally
124
+ # requires a copy/QR/href signal so a stray `apiKey` reference isn't noise.
125
+ radius = None
126
+ if client_file and SINK_MONEY.search(text):
127
+ radius = "money"
128
+ elif client_file and has_copy and SINK_CREDENTIAL.search(text):
129
+ radius = "credential"
130
+ elif client_file and has_copy and SINK_CONFIG.search(text):
131
+ radius = "config"
132
+ if radius:
133
+ sinks.setdefault(rel, radius)
134
+ if CLIENT_FETCH.search(text): # #2 — sink fed by an interceptable client round-trip
135
+ tamper_vectors.append(rel)
136
+
137
+ if QR_SIGNAL.search(text) and len(qr_files) < 30:
138
+ qr_files.append(rel)
139
+ if CLIPBOARD.search(text) and len(clip_files) < 30:
140
+ clip_files.append(rel)
141
+ if CSP_PRESENT.search(text):
142
+ csp_present = True
143
+ if CSP_SCRIPT_SELF.search(text):
144
+ csp_self = True
145
+ if CSP_NONCE.search(text):
146
+ csp_nonce = True
147
+ if CSP_UNSAFE.search(text):
148
+ csp_unsafe = True
149
+ if OOB_ANCHOR.search(text) and len(oob) < 20:
150
+ oob.append(rel)
151
+ if client_file and WEAK_FINGERPRINT.search(text) and len(weak_fp) < 20:
152
+ weak_fp.append(rel) # client-side safety code only — a backend HMAC truncation is out of scope
153
+ if client_file and OVERCLAIM.search(text) and len(overclaim) < 20:
154
+ overclaim.append(rel)
155
+ if WS_USAGE.search(text):
156
+ ws_usage = True
157
+ if WS_COOKIE_AUTH.search(text):
158
+ ws_cookie = True
159
+
160
+ # strict = a real `script-src 'self'` (+ a nonce / strict-dynamic) with NO unsafe-inline/eval
161
+ strict_csp = bool(csp_present and csp_self and csp_nonce and not csp_unsafe)
162
+ out_of_band = bool(oob)
163
+ ws_cookie_auth = bool(ws_usage and ws_cookie) # the CSWSH determinant (ambient-cookie WS auth)
164
+
165
+ radii = set(sinks.values())
166
+ present = bool(sinks)
167
+ # severity tracks blast radius: a money/credential sink swap is irreversible → HIGH.
168
+ high_blast = bool(radii & {"money", "credential"})
169
+ sev_csp = "HIGH" if high_blast else "MEDIUM"
170
+ sev_oob = "MEDIUM" if high_blast else "LOW"
171
+
172
+ findings = []
173
+ if present:
174
+ shown = ", ".join(sorted(sinks)[:5])
175
+ kinds = "/".join(sorted(radii))
176
+ if not strict_csp:
177
+ why = ("no Content-Security-Policy found" if not csp_present
178
+ else "CSP allows 'unsafe-inline'/'unsafe-eval' in script-src" if csp_unsafe
179
+ else "CSP present but not a strict script-src 'self' + nonce policy")
180
+ findings.append({
181
+ "severity": sev_csp, "confidence": "LOW", "attack_class": "tamperable-display",
182
+ "file": sorted(sinks)[0],
183
+ "issue": "security-critical value rendered client-side without a strict CSP",
184
+ "detail": f"This app renders a {kinds}-class sink value the user reads/copies ({shown}) but "
185
+ f"{why}. A poisoned dependency or injected script (man-in-the-browser) can then "
186
+ "rewrite the displayed/copied value or swap the QR for EVERY user at once (the scalable "
187
+ "vector). Add Layer A: `script-src 'self'` + per-request nonce + `strict-dynamic`, no "
188
+ "unsafe-inline/eval, object-src 'none'. (Ship report-only first to avoid breaking SDKs, "
189
+ "then enforce.) Severity tracks irreversibility — a swapped money/credential value is "
190
+ "unrecoverable."})
191
+ if not out_of_band:
192
+ findings.append({
193
+ "severity": sev_oob, "confidence": "LOW", "attack_class": "tamperable-display",
194
+ "file": sorted(sinks)[0],
195
+ "issue": "no out-of-band trust anchor for the displayed security-critical value",
196
+ "detail": f"No second, browser-independent source of truth was found for {shown} "
197
+ "(emailed canonical value, a short safety code / fingerprint, a server-rendered "
198
+ "identicon, an EIP-55 / IBAN-mod-97 checksum). Without one, a single-surface tamper is "
199
+ "undetectable by the user. Add Layer B: anchor trust OFF the browser surface so the user "
200
+ "can cross-check. NOTE: on-screen display can never be made cryptographically "
201
+ "tamper-proof on the web — the goal is detectable, not impossible."})
202
+
203
+ # #2 — sink value arrives via an interceptable client round-trip (server-render or sign it)
204
+ if present and tamper_vectors:
205
+ findings.append({
206
+ "severity": sev_oob, "confidence": "LOW", "attack_class": "client-tamper-vector",
207
+ "file": sorted(set(tamper_vectors))[0],
208
+ "issue": "security-critical value populated by a client-side fetch (interceptable in the browser)",
209
+ "detail": f"The sink value in {', '.join(sorted(set(tamper_vectors))[:4])} is populated by a client-side "
210
+ "fetch/XHR whose response the browser — and a man-in-the-browser — can intercept and rewrite, "
211
+ "rather than being server-rendered. Prefer server-render; if a round-trip is unavoidable, SIGN "
212
+ "the payload and verify integrity, don't trust raw response fields. A NEWLY-added client "
213
+ "round-trip for a once-server-rendered value is itself a regression."})
214
+
215
+ # #7 — grindable fingerprint/safety-code (robustness note, only meaningful when a sink exists)
216
+ if present and weak_fp:
217
+ findings.append({
218
+ "severity": "LOW", "confidence": "LOW", "attack_class": "weak-fingerprint",
219
+ "file": sorted(set(weak_fp))[0],
220
+ "issue": "safety-code / fingerprint derived from a truncated hash (grindable)",
221
+ "detail": f"A fingerprint/safety-code in {', '.join(sorted(set(weak_fp))[:4])} is a hash/HMAC sliced "
222
+ "to a small character count. ~40-48 bits is brute-forceable on a commodity GPU in hours, so "
223
+ "an attacker can grind a tampered value that yields a MATCHING code. Target >=60 bits, kept "
224
+ "human-comparable (grouped base32, e.g. XXXX-XXXX-XXXX). Verify the slice length / encoding."})
225
+
226
+ # #8 — over-claimed control framing (genuine finding: it manufactures misplaced trust)
227
+ if overclaim:
228
+ findings.append({
229
+ "severity": "LOW", "confidence": "MEDIUM", "attack_class": "overclaimed-control",
230
+ "file": sorted(set(overclaim))[0],
231
+ "issue": "client-side check framed as tamper-proof / cryptographically guaranteed",
232
+ "detail": f"Code or UI copy in {', '.join(sorted(set(overclaim))[:4])} asserts a CLIENT-SIDE control "
233
+ "is tamper-proof / MitB-proof / cryptographically guaranteed. On the web that claim is false "
234
+ "(the DOM is rewritable post-TLS) and it's a real finding: it makes teams and auditors "
235
+ "OVERTRUST a tripwire and under-invest in the actual out-of-band / server-side control. "
236
+ "Scope the claim honestly ('opportunistic tamper tripwire, not a guarantee') and ensure the "
237
+ "trust root is out-of-band or server-side."})
238
+
239
+ # CSWSH is ONLY real when the WS auth is an ambient cookie (REF-PENTEST #4).
240
+ if ws_cookie_auth:
241
+ findings.append({
242
+ "severity": "MEDIUM", "confidence": "LOW", "attack_class": "cswsh",
243
+ "issue": "WebSocket authenticated via an ambient cookie (Cross-Site WebSocket Hijacking)",
244
+ "detail": "A WebSocket/realtime connection appears to authenticate via a cookie "
245
+ "(withCredentials / credentials:'include'), which the browser auto-attaches "
246
+ "cross-origin — so a page on any origin can open an authenticated socket (CSWSH, #4). "
247
+ "Validate the Origin on the handshake, or move the credential into the connection "
248
+ "payload / subprotocol and store it origin-scoped (not a cookie). If WS auth is "
249
+ "already a token in the payload, CSWSH is NOT exploitable."})
250
+
251
+ return {
252
+ "sensitive_display": sorted(sinks),
253
+ "sink_blast_radius": dict(sorted(sinks.items())),
254
+ "websocket_auth": ("cookie (CSWSH-exposed — validate Origin)" if ws_cookie_auth
255
+ else "token-or-none (CSWSH not exploitable)" if ws_usage
256
+ else "no websocket detected"),
257
+ "qr_generation": sorted(set(qr_files)),
258
+ "clipboard_copy": sorted(set(clip_files)),
259
+ "strict_csp": strict_csp,
260
+ "csp_present": csp_present,
261
+ "csp_has_unsafe": csp_unsafe,
262
+ "out_of_band_anchor": out_of_band,
263
+ "anchors_found": sorted(set(oob)),
264
+ "weak_fingerprints": sorted(set(weak_fp)),
265
+ "overclaimed_controls": sorted(set(overclaim)),
266
+ "client_fetch_sinks": sorted(set(tamper_vectors)),
267
+ "findings": findings,
268
+ "note": (f"Renders {'/'.join(sorted(radii))}-class security-critical value(s) — review man-in-the-browser "
269
+ "exposure: strict CSP (kill the scalable vector) + an out-of-band anchor (make tamper "
270
+ "detectable). Inherent web-platform limit; treat as architectural, LOW-confidence." if present else
271
+ "No security-critical display values detected — MITB/tamperable-display class N/A."),
272
+ }
@@ -22,7 +22,7 @@ PLAYGROUND = re.compile(r"playground\s*:\s*true|graphiql\s*:\s*true|LandingPageG
22
22
  LIMITING = re.compile(r"graphql-depth-limit|depthLimit|costAnalysis|graphql-cost-analysis|"
23
23
  r"createComplexityLimitRule|query-complexity|graphql-armor")
24
24
 
25
- # --- AppSync / managed GraphQL (PTREQ0013000 #2 introspection-via-WAF-bypass, #5 sub-authz) ---
25
+ # --- AppSync / managed GraphQL (REF-PENTEST #2 introspection-via-WAF-bypass, #5 sub-authz) ---
26
26
  APPSYNC_MARK = re.compile(r"appsync\.GraphqlApi|CfnGraphQLApi|Definition\.fromSchema|aws-appsync|aws_appsync", re.I)
27
27
  AWS_AUTH_DIRECTIVE = re.compile(r"@aws_(?:api_key|iam|oidc|cognito_user_pools|auth|subscribe)")
28
28
  # A Subscription field that carries a tenant-scoping arg MUST be authz-bound in its resolver, or any
@@ -33,7 +33,7 @@ TENANT_ARG = re.compile(r"\b(\w+)\s*\(([^)]*\b(?:groupId|group_id|orgId|org_id|t
33
33
  # Identity-binding signals in a VTL resolver — the field is tied to the CALLER, not a free arg.
34
34
  VTL_AUTHZ = re.compile(r"\$ctx(?:tx)?\.identity|\$context\.identity|identity\.(?:sub|username|claims|resolverContext)"
35
35
  r"|util\.unauthorized|\bgroupIds?\b[\s\S]{0,80}?\bcontains\b|#if\s*\(\s*!?\s*\$ctx\.identity")
36
- # Engine-level introspection disable on aws-cdk-lib appsync.GraphqlApi. The PTREQ0013000 RETEST
36
+ # Engine-level introspection disable on aws-cdk-lib appsync.GraphqlApi. The REF-PENTEST RETEST
37
37
  # proved this IS available and un-bypassable (unlike a WAF string-match) — so a correctly-configured
38
38
  # AppSync API must NOT be flagged. This corrects the 0.3.0 false positive that always cried wolf.
39
39
  APPSYNC_INTROSPECTION_OFF = re.compile(r"introspectionConfig\s*:\s*[\w.]*\bDISABLED\b")
@@ -93,7 +93,12 @@ class GraphQLExtractor(Extractor):
93
93
  "detail": "Set `introspectionConfig: appsync.IntrospectionConfig.DISABLED` so the engine "
94
94
  "rejects __schema/__type regardless of encoding. A WAF byte-match on `__schema` "
95
95
  "is NOT sufficient — bypassable via Unicode/JSON escapes and it only fronts one "
96
- "endpoint (PTREQ0013000 #2). Run the appsync-introspection probe to confirm."})
96
+ "endpoint (REF-PENTEST #2). Fronting AppSync with API Gateway is ALSO not the "
97
+ "fix: it proxies POST /graphql opaquely (it can't parse the query to block "
98
+ "introspection without the same bypassable string-match) and does not cover the "
99
+ "SEPARATE realtime WebSocket endpoint, so subscription-BOLA / CSWSH remain — fix "
100
+ "at the engine/auth layer, treat any gateway/WAF as defense-in-depth only. Run "
101
+ "the appsync-introspection probe to confirm."})
97
102
  if not (appsync_limiting or limiting):
98
103
  findings.append({"severity": "LOW", "issue": "AppSync has no query depth / resolver-count limit",
99
104
  "attack_class": "graphql",
@@ -131,7 +136,7 @@ class GraphQLExtractor(Extractor):
131
136
  def _subscription_authz(self, ctx: RepoContext, schema_texts: list, findings: list) -> list:
132
137
  """For each Subscription field carrying a tenant-scoping arg, check a co-located VTL resolver
133
138
  binds that arg to the caller's identity. Missing/passthrough VTL → cross-group BOLA: any
134
- authenticated user subscribes to any tenant's stream (PTREQ0013000 #5). Verified shape:
139
+ authenticated user subscribes to any tenant's stream (REF-PENTEST #5). Verified shape:
135
140
  the fixed (identity-bound) VTL PASSES; the pre-fix passthrough FIRES."""
136
141
  vtl_corpus = {ctx.rel(p): ctx.text(p) for p in ctx.glob("**/*.vtl", 300)}
137
142
  results = []
@@ -155,7 +160,7 @@ class GraphQLExtractor(Extractor):
155
160
  detail = (f"Subscription `{field}({args})` accepts a tenant arg but its VTL resolver does NOT bind "
156
161
  f"it to the caller's identity ($ctx.identity / groupIds.contains / util.unauthorized) — "
157
162
  f"any authenticated user can subscribe to ANY tenant's stream (cross-group BOLA, "
158
- f"PTREQ0013000 #5).")
163
+ f"REF-PENTEST #5).")
159
164
  results.append({"field": field, "verdict": verdict, "severity": sev})
160
165
  if sev != "OK":
161
166
  findings.append({"severity": sev, "attack_class": "bola",
@@ -19,7 +19,7 @@ UNTRUSTED = re.compile(
19
19
  USES = re.compile(r"uses:\s*([^\s@#]+)@([^\s#'\"]+)")
20
20
  SHA40 = re.compile(r"^[0-9a-f]{40}$")
21
21
 
22
- # CDK / managed-AppSync auth (PTREQ0013000 #4 CSWSH, + the #2/#5 attack surface). Regex over CDK
22
+ # CDK / managed-AppSync auth (REF-PENTEST #4 CSWSH, + the #2/#5 attack surface). Regex over CDK
23
23
  # TypeScript, not an AST — aliased/helper-extracted constructs can evade it (honest FN risk).
24
24
  APPSYNC_API = re.compile(r"appsync\.GraphqlApi|new\s+GraphqlApi|CfnGraphQLApi|aws-cdk-lib/aws-appsync|@aws-cdk/aws-appsync")
25
25
  # defaultAuthorization block resolving to API_KEY → the realtime/WebSocket endpoint takes a static
@@ -30,7 +30,7 @@ APPSYNC_APIKEY_MODE = re.compile(r"AuthorizationType\.API_KEY|authorizationType\
30
30
  WAFV2 = re.compile(r"wafv2\.CfnWebACL|\bCfnWebACL\b|aws_wafv2|wafv2\.CfnWebACLAssociation")
31
31
  WAF_ASSOC = re.compile(r"CfnWebACLAssociation|WebACLAssociation")
32
32
  # WAF used as the PRIMARY control for an app-layer flaw — a bypassable band-aid, not a remediation
33
- # (PTREQ0013000 #2/#11). A byteMatchStatement/regex matching `__schema`, SQL keywords or `<script`
33
+ # (REF-PENTEST #2/#11). A byteMatchStatement/regex matching `__schema`, SQL keywords or `<script`
34
34
  # means the app-layer bug is still there; the string-match is evadable via encoding + only one door.
35
35
  WAF_APPLAYER_MATCH = re.compile(
36
36
  r"(?:byteMatchStatement|searchString|RegexPatternSet|regexString)[\s\S]{0,220}?"