PyPI - websec-validator - Versions diffs - 0.2.0__py3-none-any.whl - Mend

websec-validator 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

websec_validator/__init__.py +14 -0
websec_validator/briefing.py +218 -0
websec_validator/calibration.json +75 -0
websec_validator/calibration.py +226 -0
websec_validator/cli.py +395 -0
websec_validator/constitution.py +81 -0
websec_validator/corpus.json +49 -0
websec_validator/dynamic.py +249 -0
websec_validator/extractors/__init__.py +56 -0
websec_validator/extractors/auth.py +77 -0
websec_validator/extractors/authz.py +130 -0
websec_validator/extractors/base.py +101 -0
websec_validator/extractors/client_exposure.py +48 -0
websec_validator/extractors/graphql.py +71 -0
websec_validator/extractors/iac_ci.py +65 -0
websec_validator/extractors/integrations.py +55 -0
websec_validator/extractors/routes.py +215 -0
websec_validator/extractors/schemas.py +75 -0
websec_validator/extractors/stack.py +80 -0
websec_validator/extractors/surface.py +86 -0
websec_validator/extractors/tenant.py +33 -0
websec_validator/findings.py +199 -0
websec_validator/probes.py +79 -0
websec_validator/proof.py +96 -0
websec_validator/recon.py +28 -0
websec_validator/report.py +114 -0
websec_validator/scanners.py +248 -0
websec_validator/templates/probes/bola-cross-tenant.sh +192 -0
websec_validator/templates/probes/bola-write-verbs.py +147 -0
websec_validator/templates/probes/compare-roles.sh +69 -0
websec_validator/templates/probes/dlp-bypass-offline.py +149 -0
websec_validator/templates/probes/hs256-brute-force.py +90 -0
websec_validator/templates/probes/jwt-attacks.sh +161 -0
websec_validator/templates/probes/mass-assignment.py +201 -0
websec_validator/templates/probes/race-conditions.py +144 -0
websec_validator/templates/probes/rate-limit-burst.sh +136 -0
websec_validator/templates/probes/s3-assess.sh +120 -0
websec_validator/templates/probes/ssrf-probes.sh +189 -0
websec_validator/templates/probes/webhook-forgery.py +113 -0
websec_validator/templates/reports/FINDINGS-SUMMARY.md.template +75 -0
websec_validator/templates/reports/access-control-matrix.md.template +65 -0
websec_validator/templates/reports/findings-triage.md.template +28 -0
websec_validator/templates/reports/pentest-handover-brief.md.template +121 -0
websec_validator/templates/reports/per-tool-FINDINGS.md.template +37 -0
websec_validator-0.2.0.dist-info/METADATA +232 -0
websec_validator-0.2.0.dist-info/RECORD +50 -0
websec_validator-0.2.0.dist-info/WHEEL +5 -0
websec_validator-0.2.0.dist-info/entry_points.txt +2 -0
websec_validator-0.2.0.dist-info/licenses/LICENSE +21 -0
websec_validator-0.2.0.dist-info/top_level.txt +1 -0

websec_validator/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""websec-validator — local-first security recon that briefs an AI coding agent.
+The tool does the deterministic half (read the repo, run the scanners it finds,
+stage the probe library tailored to what it discovered) and emits three artifacts:
+  1. findings.json    — de-duplicated static scanner results
+  2. FACTS.json       — stack, routes, auth-model candidates, attack surface
+  3. AGENT-BRIEFING.md — marching orders + staged probe scripts for your AI agent
+It never calls an LLM, never runs a server, and never needs a running instance of
+the target app. Running the probes and applying fixes is the agent + human's job.
+"""
+__version__ = "0.1.0"

websec_validator/briefing.py ADDED Viewed

@@ -0,0 +1,218 @@
+"""Render AGENT-BRIEFING.md — the marching orders for the AI coding agent.
+Now driven by the full recon facts: it leads with the *targeting* (which exact
+endpoints are SSRF/IDOR/upload candidates), because that's what turns a generic
+probe into a precise one.
+"""
+from __future__ import annotations
+def _bullets(items, empty="_(none)_", cap=40):
+    items = list(items or [])
+    if not items:
+        return empty
+    shown = items[:cap]
+    out = "\n".join(f"- {x}" for x in shown)
+    if len(items) > cap:
+        out += f"\n- _…and {len(items) - cap} more (see FACTS.json)_"
+    return out
+def _section(title, items):
+    return f"**{title}** ({len(items or [])}):\n{_bullets(items)}\n"
+def render(facts: dict, scanners: dict, scan_results: list, probe_manifest: list,
+           unified: dict | None = None) -> str:
+    stack = facts.get("stack", {})
+    auth = facts.get("auth", {})
+    routes = facts.get("routes", {})
+    tgt = routes.get("targeting", {})
+    tenant = facts.get("tenant", {})
+    surface = facts.get("surface", {})
+    sink_summary = ", ".join(f"{k} ({n})" for k, n in surface.get("sink_counts", {}).items()) or "_none_"
+    authz = facts.get("authz", {})
+    gs = authz.get("guard_summary", {})
+    global_auth = authz.get("global_auth_middleware", False)
+    roles_str = ", ".join(f"`{r}`" for r in authz.get("roles_detected", [])) or "_none detected_"
+    unprot = authz.get("write_endpoints_without_visible_guard", [])
+    unprot_section = (_section("Write endpoints with NO guard visible in their handler file (verify)", unprot)
+                      if unprot else "_Every write endpoint has a visible guard or looks public — still spot-check._")
+    mw = authz.get("next_middleware", {})
+    mw_line = (f"Next.js middleware `{mw.get('file')}` gates matchers: {mw.get('matchers')}"
+               if mw.get("present") else "_No Next.js middleware.ts found — auth is per-handler._")
+    iac = facts.get("iac_ci", {})
+    iac_findings = iac.get("findings", [])
+    iac_lines = "\n".join(f"- **{f['severity']}** `{f['kind']}` — `{f['file']}` — {f['detail']}"
+                          for f in iac_findings[:20]) or "_none_"
+    client = facts.get("client_exposure", {})
+    client_leaks = client.get("public_secret_leaks", []) + client.get("server_secret_in_client_component", [])
+    client_section = _bullets(client_leaks) if client_leaks else "_none detected_"
+    gql = facts.get("graphql", {})
+    if gql.get("present"):
+        gfind = "; ".join(f"{x['severity']} {x['issue']}" for x in gql.get("findings", [])) or "no obvious issues"
+        gql_line = f"{', '.join(gql.get('endpoints', []))} · introspection={gql.get('introspection')} · {gfind}"
+    else:
+        gql_line = "_no GraphQL detected_"
+    integ = facts.get("integrations", {})
+    integ_line = ", ".join(integ.get("third_party_integrations", [])) or "none detected"
+    wh_unverified = integ.get("webhooks_without_sig_verification", [])
+    wh_line = (_section("⚠ Webhooks with NO signature-verification in their handler (verify)", wh_unverified)
+               if wh_unverified else f"_{len(integ.get('webhook_endpoints', []))} webhook endpoint(s); signature code present or none found_")
+    avail = ", ".join(s["name"] for s in scanners.get("available", [])) or "none on PATH"
+    missing = "\n".join(f"- **{s['name']}** ({s['category']}) — `{s.get('install','')}`"
+                        for s in scanners.get("missing", [])) or "_all relevant scanners present_"
+    if scan_results:
+        scan_lines = "\n".join(
+            (f"- **{r.get('name')}** → {r.get('findings','?')} finding(s) (`{r.get('output','')}`)"
+             if "findings" in r else f"- **{r.get('name')}** → {r.get('status','?')}")
+            for r in scan_results)
+    else:
+        scan_lines = "_Detected but not executed — run `websec run <repo> --scan`._"
+    if unified:
+        top_lines = "\n".join(
+            f"- **{t['severity']}** [{t['category']}] {t['title']} — `{t['file']}` ({'+'.join(t['tools'])})"
+            for t in unified.get("top", [])) or "_no findings_"
+        findings_block = (
+            f"**{unified['total']} de-duplicated findings** "
+            f"({unified['cross_tool_or_dup_merged']} cross-tool/duplicate merged) · "
+            f"by severity {unified['by_severity']} · by category {unified['by_category']}\n\n"
+            f"Top findings (full list in `findings.json`):\n{top_lines}")
+    else:
+        findings_block = scan_lines
+    probe_lines = "\n".join(
+        f"- **{p['key']}** — {p.get('attack_class','')}  \n"
+        f"  `{p['file']}` · _supply:_ {p.get('agent_must_supply','')}"
+        for p in probe_manifest if "attack_class" in p)
+    endpoints = routes.get("endpoints", [])
+    inventory = _bullets([f"`{e['method']:6}` {e['path']}" for e in endpoints], cap=80)
+    return f"""# AGENT BRIEFING — security pass for `{facts.get('target','')}`
+> Generated by **websec-validator v{facts.get('version','')}** — deterministic recon, no LLM.
+> **You are an AI coding agent.** The tool did the reliable half (mapping the surface + staging
+> the probes). You + the human do the reasoning, the running, and the fixing.
+| Lane | Owns |
+|---|---|
+| 🔧 tool (done) | recon → {routes.get('count',0)} endpoints, scanner findings, staged probes |
+| 🤖 you | confirm auth/tenant model, finalize + run the probes at the targets below, triage, fix |
+| 🧑 human | running TEST instance + test accounts; review every diff |
+⚠️ Static findings + recon need **no running app**. The probes need a **live test instance + test
+credentials** — ask the human, never fabricate, never hit production.
+---
+## 1. What this app is (detected)
+- **Languages:** {", ".join(stack.get("languages", [])) or "?"}  ·  **Frameworks:** {", ".join(stack.get("frameworks", [])) or "?"}
+- **Datastores:** {", ".join(stack.get("datastores", [])) or "?"}  ·  **Monorepo:** {stack.get("monorepo", False)}
+- **Auth scheme:** `{auth.get("scheme","?")}` (token in {auth.get("token_location","?")})  ·  guard files: {len(auth.get("guard_files", []))}
+- **Route engine:** {routes.get("engine","?")}  ·  **{routes.get('count',0)} endpoints**  ·  by method: {routes.get("by_method", {})}
+## 2. ★ Tenant boundary (confirm first — highest value, easiest to get wrong)
+{_bullets([f"`{t['key']}` — {t['occurrences']}×" for t in tenant.get("candidates", [])],
+          "_no common tenant key found — confirm whether this app is multi-tenant; if not, skip cross-tenant probes_")}
+{tenant.get("note","")}
+## 3. ★ Attack surface & targeting (point the probes HERE)
+{_section("IDOR / BOLA candidates — endpoints with a path/object id", tgt.get("idor_candidates"))}
+{_section("SSRF candidates — endpoints taking a url/domain-ish param", tgt.get("ssrf_candidates"))}
+{_section("Open-redirect candidates", tgt.get("open_redirect_candidates"))}
+{_section("File-upload candidates — path-traversal / content-type", tgt.get("upload_candidates"))}
+{_section("Write endpoints — mass-assignment / BOLA-write", tgt.get("write_endpoints"))}
+{_section("Auth endpoints", tgt.get("auth_endpoints"))}
+**Code-level sinks** (cross-reference with the above): {sink_summary}
+**Mass-assignment targets** — this app's privileged model fields (try injecting these into create/update payloads): {", ".join(facts.get("schemas", {}).get("sensitive_fields", [])) or "_none detected_"}  ·  ORMs: {", ".join(facts.get("schemas", {}).get("orms", [])) or "?"}
+## 3b. ★ Access control (who can reach what — your #1 test)
+Guard coverage (file-level heuristic): {gs.get("with_visible_guard",0)} with visible guard · {gs.get("no_visible_guard",0)} none visible · {gs.get("unknown",0)} unknown.  Global auth middleware: **{global_auth}**.  Roles in code: {roles_str}
+{authz.get("note","")}
+{unprot_section}
+{mw_line}
+## 3c. Config, CI/CD & client-side risks
+**Pipeline / IaC** ({len(iac_findings)} finding(s)):
+{iac_lines}
+**Client-side secret exposure** (ships to the browser if real): {client_section}
+Production source maps exposed: {client.get("production_source_maps", False)}
+**GraphQL surface:** {gql_line}
+**Third-party integrations:** {integ_line}
+{wh_line}
+## 4. Static findings (no running app needed)
+Scanners available: {avail}
+{findings_block}
+Install for fuller coverage:
+{missing}
+## 5. Tailored probes (staged — drafts you finalize against §2–§3)
+{probe_lines}
+Keep these in the repo after you run them — re-running after a fix proves "still blocked, now safer."
+## 6. How to work this — verify with a debate, then fix
+The findings ledger (`findings-ledger.json` / REPORT.md) comes pre-ranked with a **confidence**
+(HIGH = dynamically confirmed; MEDIUM/LOW = hypothesis). Each finding also carries a **calibrated**
+estimate — `calibrated.p` (measured real-vuln rate for that attack-class/confidence bucket on a
+labeled vuln corpus), `calibrated.ci` (95% interval), `calibrated.n` (sample size), `calibrated.basis`.
+**A wide CI or `basis: prior (uncalibrated)` means thin data — lean on the debate, not the number.**
+The rates skew optimistic (the corpus is deliberately vulnerable); to be conservative, threshold on the
+CI lower bound. **The calibration self-improves:** every `websec dynamic` run folds its *confirmed*
+results (a write that executed unauthenticated = real; one that's auth-enforced = a recon false positive)
+into a local overlay, so these numbers personalize to your apps the more you run it. **Verify before you
+report** — especially MEDIUM/LOW — by running a 4-role debate per finding (this is the FP killer):
+- **Advocate** — argue it's real; cite the evidence chain + the CWE / OWASP-API.
+- **Challenger** — try hard to *refute* it: false positive? intended-public? unreachable? guarded by a
+  pattern the static scan missed? (default to skepticism)
+- **Mediator** — decide: confirmed / false-positive / needs-data. You may override the tool.
+- **Explainer** — write the survivor up: exact `curl` repro, real impact, and the fix.
+**Generate probes the same way** — a Positive perspective (intended behavior holds) + Negative
+(bypass / injection / error) + Edge (boundary / concurrency / unusual input), then a Critic dedupes
+them into one runnable suite. More perspectives = broader coverage.
+**Verify the constitution** (`CONSTITUTION.md`): every ⬜ line is a Given/When/Then to confirm with a
+probe — flip it to ✅ holds or 🔴 VIOLATED.
+Order: static triage (on a {surface.get("datastore_class","?")} datastore, injection alerts are usually FPs) →
+confirm the auth/tenant model → run §3-targeted probes (low-priv, then cross-tenant; record PASS counts
+like "14/14 blocked") → fix what fails → re-run to confirm. **Human reviews every diff; never run
+destructive or production probes without explicit authorization.**
+## 7. Hand back
+What was tested, what held (PASS counts), what's open (repro + fix), which probes are now regression tests. Cite `FACTS.json` + `scanners/`.
+---
+## Appendix A — full endpoint inventory
+{inventory}
+"""

websec_validator/calibration.json ADDED Viewed

@@ -0,0 +1,75 @@
+{
+  "meta": {
+    "corpus": [
+      "VAmPI",
+      "NodeGoat",
+      "DVGA"
+    ],
+    "n_total": 59,
+    "method": "binomial proportion + Wilson 95% CI",
+    "min_n": 5,
+    "unmatched_rule": "unmatched finding = false positive",
+    "researched_classes": [
+      "command-injection",
+      "graphql",
+      "missing-auth",
+      "sqli",
+      "ssti"
+    ],
+    "caveat": "indicative \u2014 calibrated on a deliberately-vulnerable app corpus; skews optimistic on clean production code"
+  },
+  "by_class_label": {
+    "command-injection|LOW": {
+      "n": 1,
+      "k": 1,
+      "p": 1.0,
+      "ci": [
+        0.207,
+        1.0
+      ]
+    },
+    "graphql|MEDIUM": {
+      "n": 2,
+      "k": 2,
+      "p": 1.0,
+      "ci": [
+        0.342,
+        1.0
+      ]
+    },
+    "missing-auth|MEDIUM": {
+      "n": 41,
+      "k": 27,
+      "p": 0.659,
+      "ci": [
+        0.505,
+        0.784
+      ]
+    }
+  },
+  "by_label": {
+    "LOW": {
+      "n": 8,
+      "k": 1,
+      "p": 0.125,
+      "ci": [
+        0.022,
+        0.471
+      ]
+    },
+    "MEDIUM": {
+      "n": 51,
+      "k": 29,
+      "p": 0.569,
+      "ci": [
+        0.433,
+        0.695
+      ]
+    }
+  },
+  "prior": {
+    "HIGH": 0.85,
+    "MEDIUM": 0.5,
+    "LOW": 0.25
+  }
+}

websec_validator/calibration.py ADDED Viewed

@@ -0,0 +1,226 @@
+"""Calibrated confidence (CJE) — turn the rule-based HIGH/MEDIUM/LOW labels into
+*measured* real-rates with honest confidence intervals.
+WHAT THIS IS (honest scope): run the recon ledger against a labeled vuln-app corpus,
+count how often each (attack_class, label) bucket actually corresponds to a real,
+documented vulnerability, and express it as an observed rate + a **Wilson score
+interval**. With a small corpus the INTERVAL is the headline — a wide CI means
+"grounded, but not enough data to be sure yet." The numbers tighten as the corpus grows.
+WHAT THIS IS NOT: calibrated on *deliberately-vulnerable* apps, so the rates skew
+OPTIMISTIC for normal/clean code (real repos have a far lower base rate of true vulns).
+Every per-finding estimate carries the sample size `n` and a `basis` so the consumer
+can see how much to trust it; a finding that doesn't match a documented vuln is counted
+as a false positive (the corpus is well-documented, so unlisted findings are noise).
+No ML, no deps — binomial proportion + Wilson interval (stdlib `math`). The cell
+structure upgrades cleanly to isotonic regression if a large labeled set ever exists.
+"""
+from __future__ import annotations
+import json
+import math
+import os
+from importlib import resources
+from pathlib import Path
+Z95 = 1.959963984540054   # z for a 95% two-sided interval
+MIN_N = 5                 # a cell needs ≥ this many samples to be used (else fall back a tier)
+# uncalibrated fallback prior — used ONLY when we have no data; always labeled as such
+PRIOR = {"HIGH": 0.85, "MEDIUM": 0.5, "LOW": 0.25}
+CAVEAT = ("indicative — calibrated on a deliberately-vulnerable app corpus; "
+          "skews optimistic on clean production code")
+# Self-improving LOCAL overlay: user-global, gitignored (lives outside any repo), never
+# shipped. It accrues *confirmed* labels from your own dynamic runs (and optional hand-labels)
+# and is merged on top of the shipped public table so the numbers personalize to YOUR apps.
+LOCAL_PATH = Path(os.environ.get("WEBSEC_CALIBRATION_HOME",
+                                 str(Path.home() / ".cache" / "websec-validator"))) / "calibration-local.json"
+def wilson(k: int, n: int, z: float = Z95) -> tuple:
+    """95% Wilson score interval for k successes in n trials → (lo, hi), clamped to [0,1].
+    Wilson (not the normal approximation) because it stays sane at small n and extreme
+    p — exactly our regime. n=0 → (0,1): maximal ignorance.
+    """
+    if n <= 0:
+        return (0.0, 1.0)
+    phat = k / n
+    denom = 1 + z * z / n
+    center = (phat + z * z / (2 * n)) / denom
+    half = (z / denom) * math.sqrt(phat * (1 - phat) / n + z * z / (4 * n * n))
+    return (max(0.0, center - half), min(1.0, center + half))
+def _cell(k: int, n: int) -> dict:
+    lo, hi = wilson(k, n)
+    return {"n": n, "k": k, "p": round(k / n, 3) if n else None,
+            "ci": [round(lo, 3), round(hi, 3)]}
+def is_real(attack_class: str, location: str, truth: list) -> bool:
+    """A finding is REAL iff it matches a documented truth entry, else a false positive.
+    (Conservative rule, per design decision: on a well-documented vuln app, a finding
+    that isn't on the known-vuln list is treated as noise.)
+    """
+    loc = (location or "").lower()
+    for t in (truth or []):
+        if t.get("class") != attack_class:
+            continue
+        sub = (t.get("location_contains") or "").lower()
+        if not sub or sub == "*" or sub in loc:
+            return True
+    return False
+def fit(labeled: list, corpus_names: list, researched_classes: set | None = None) -> dict:
+    """labeled: list of {attack_class, confidence, is_real}. Returns the calibration table.
+    `researched_classes`: classes for which the corpus has actual ground truth. Per-class
+    cells are published ONLY for these — a class we never researched would otherwise emit a
+    misleading p=0 (every finding auto-counted FP). Such findings still count as FP in the
+    per-label aggregate (conservative), but at runtime fall back to that aggregate.
+    """
+    by_cl: dict = {}
+    by_l: dict = {}
+    for r in labeled:
+        cl = f"{r['attack_class']}|{r['confidence']}"
+        by_cl.setdefault(cl, [0, 0])
+        by_l.setdefault(r["confidence"], [0, 0])
+        by_cl[cl][1] += 1
+        by_l[r["confidence"]][1] += 1
+        if r["is_real"]:
+            by_cl[cl][0] += 1
+            by_l[r["confidence"]][0] += 1
+    cells = {k: _cell(v[0], v[1]) for k, v in sorted(by_cl.items())}
+    if researched_classes is not None:
+        rc = set(researched_classes)
+        cells = {k: c for k, c in cells.items() if k.split("|", 1)[0] in rc}
+    return {
+        "meta": {"corpus": corpus_names, "n_total": len(labeled),
+                 "method": "binomial proportion + Wilson 95% CI", "min_n": MIN_N,
+                 "unmatched_rule": "unmatched finding = false positive",
+                 "researched_classes": sorted(researched_classes) if researched_classes is not None else None,
+                 "caveat": CAVEAT},
+        "by_class_label": cells,
+        "by_label": {k: _cell(v[0], v[1]) for k, v in sorted(by_l.items())},
+        "prior": PRIOR,
+    }
+def load_shipped() -> dict | None:
+    """Load the shipped, public, corpus-based calibration.json (best-effort)."""
+    try:
+        p = resources.files("websec_validator").joinpath("calibration.json")
+        return json.loads(p.read_text())
+    except Exception:
+        return None
+def load_local() -> dict | None:
+    """Load the user-global self-improving overlay (raw cell counts; best-effort)."""
+    try:
+        if LOCAL_PATH.is_file():
+            return json.loads(LOCAL_PATH.read_text())
+    except Exception:
+        pass
+    return None
+def _merge(shipped: dict | None, local: dict | None) -> dict | None:
+    """Combine the shipped table with the local overlay by SUMMING cell counts, then
+    recomputing Wilson. Local samples are confirmed (oracle), so they're not filtered."""
+    if not shipped and not local:
+        return None
+    base = json.loads(json.dumps(shipped)) if shipped else {"meta": {"caveat": CAVEAT},
+                                                            "by_class_label": {}, "by_label": {}}
+    base.setdefault("prior", PRIOR)
+    base.setdefault("meta", {})
+    if local:
+        for grp in ("by_class_label", "by_label"):
+            merged = dict(base.get(grp, {}))
+            for key, lc in (local.get(grp, {}) or {}).items():
+                sc = merged.get(key, {})
+                merged[key] = _cell(sc.get("k", 0) + lc.get("k", 0), sc.get("n", 0) + lc.get("n", 0))
+            base[grp] = merged
+        ls = (local.get("meta", {}) or {}).get("samples", 0)
+        base["meta"]["personalized"] = True
+        base["meta"]["local_samples"] = ls
+        base["meta"]["caveat"] = (base["meta"].get("caveat", CAVEAT)
+                                  + f" · +{ls} confirmed local sample(s) folded in (personalized to your apps)")
+    return base
+def load() -> dict | None:
+    """Merged calibration the runtime uses: shipped public table + your LOCAL self-improving overlay."""
+    return _merge(load_shipped(), load_local())
+def record_samples(labeled: list, runs: int = 1) -> dict | None:
+    """Fold confirmed labeled samples into the LOCAL overlay (best-effort; user-global, gitignored).
+    `labeled`: list of {attack_class, confidence, is_real}. Returns the updated overlay, or None
+    if there was nothing to record / the write failed (never raises — calibration is non-critical).
+    """
+    if not labeled:
+        return None
+    try:
+        local = load_local() or {"meta": {"source": "local self-improving overlay", "samples": 0, "runs": 0},
+                                 "by_class_label": {}, "by_label": {}}
+        for r in labeled:
+            for grp, key in (("by_class_label", f"{r['attack_class']}|{r['confidence']}"),
+                             ("by_label", r["confidence"])):
+                cell = local.setdefault(grp, {}).setdefault(key, {"n": 0, "k": 0})
+                cell["n"] += 1
+                cell["k"] += 1 if r.get("is_real") else 0
+        local["meta"]["samples"] = local["meta"].get("samples", 0) + len(labeled)
+        local["meta"]["runs"] = local["meta"].get("runs", 0) + runs
+        LOCAL_PATH.parent.mkdir(parents=True, exist_ok=True)
+        LOCAL_PATH.write_text(json.dumps(local, indent=2) + "\n")
+        return local
+    except Exception:
+        return None
+def samples_from_dynamic(dynamic: dict) -> list:
+    """Turn a dynamic run into confirmed calibration samples — dynamic is an ORACLE.
+    Write-verb auth enforcement is unambiguous: a write that EXECUTED unauthenticated (or reached
+    the handler past the auth gate) is a real missing-auth; one that's auth-enforced is a recon
+    FALSE POSITIVE (recon flagged it, the live app actually blocks it). Cross-tenant LEAKs are
+    confirmed BOLA. (Unauth GET reachability is excluded — a public endpoint reached without auth
+    may be intended, so it's not a clean label.)
+    """
+    out = []
+    for r in (((dynamic or {}).get("write_auth_enforcement", {}) or {}).get("results", []) or []):
+        v = r.get("verdict", "")
+        if v == "auth-enforced":
+            out.append({"attack_class": "missing-auth", "confidence": "MEDIUM", "is_real": False})
+        elif v == "EXECUTED-UNAUTH" or v.startswith("no-auth-gate"):
+            out.append({"attack_class": "missing-auth", "confidence": "MEDIUM", "is_real": True})
+    for _lk in (((dynamic or {}).get("cross_tenant_bola", {}) or {}).get("leaks", []) or []):
+        out.append({"attack_class": "bola", "confidence": "MEDIUM", "is_real": True})
+    return out
+def apply(attack_class: str, confidence: str, table: dict | None) -> dict:
+    """Attach a calibrated estimate for a finding's (attack_class, confidence) bucket.
+    Three-tier graceful fallback: per-(class,label) if it has ≥ min_n samples, else
+    per-label, else an explicitly-flagged uncalibrated prior. Always reports `n` + `basis`.
+    """
+    if table:
+        min_n = table.get("meta", {}).get("min_n", MIN_N)
+        caveat = table.get("meta", {}).get("caveat", CAVEAT)
+        cl = table.get("by_class_label", {}).get(f"{attack_class}|{confidence}")
+        if cl and cl["n"] >= min_n:
+            return {"p": cl["p"], "ci": cl["ci"], "n": cl["n"], "basis": "class+label", "note": caveat}
+        lab = table.get("by_label", {}).get(confidence)
+        if lab and lab["n"] >= min_n:
+            return {"p": lab["p"], "ci": lab["ci"], "n": lab["n"], "basis": "label", "note": caveat}
+    prior = (table or {}).get("prior", PRIOR)
+    return {"p": prior.get(confidence, 0.5), "ci": [0.0, 1.0], "n": 0,
+            "basis": "prior (uncalibrated)", "note": "no calibration data for this bucket — uncalibrated prior"}