PyPI - websec-validator - Versions diffs - 0.2.0__py3-none-any.whl - Mend

websec-validator 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

websec_validator/__init__.py +14 -0
websec_validator/briefing.py +218 -0
websec_validator/calibration.json +75 -0
websec_validator/calibration.py +226 -0
websec_validator/cli.py +395 -0
websec_validator/constitution.py +81 -0
websec_validator/corpus.json +49 -0
websec_validator/dynamic.py +249 -0
websec_validator/extractors/__init__.py +56 -0
websec_validator/extractors/auth.py +77 -0
websec_validator/extractors/authz.py +130 -0
websec_validator/extractors/base.py +101 -0
websec_validator/extractors/client_exposure.py +48 -0
websec_validator/extractors/graphql.py +71 -0
websec_validator/extractors/iac_ci.py +65 -0
websec_validator/extractors/integrations.py +55 -0
websec_validator/extractors/routes.py +215 -0
websec_validator/extractors/schemas.py +75 -0
websec_validator/extractors/stack.py +80 -0
websec_validator/extractors/surface.py +86 -0
websec_validator/extractors/tenant.py +33 -0
websec_validator/findings.py +199 -0
websec_validator/probes.py +79 -0
websec_validator/proof.py +96 -0
websec_validator/recon.py +28 -0
websec_validator/report.py +114 -0
websec_validator/scanners.py +248 -0
websec_validator/templates/probes/bola-cross-tenant.sh +192 -0
websec_validator/templates/probes/bola-write-verbs.py +147 -0
websec_validator/templates/probes/compare-roles.sh +69 -0
websec_validator/templates/probes/dlp-bypass-offline.py +149 -0
websec_validator/templates/probes/hs256-brute-force.py +90 -0
websec_validator/templates/probes/jwt-attacks.sh +161 -0
websec_validator/templates/probes/mass-assignment.py +201 -0
websec_validator/templates/probes/race-conditions.py +144 -0
websec_validator/templates/probes/rate-limit-burst.sh +136 -0
websec_validator/templates/probes/s3-assess.sh +120 -0
websec_validator/templates/probes/ssrf-probes.sh +189 -0
websec_validator/templates/probes/webhook-forgery.py +113 -0
websec_validator/templates/reports/FINDINGS-SUMMARY.md.template +75 -0
websec_validator/templates/reports/access-control-matrix.md.template +65 -0
websec_validator/templates/reports/findings-triage.md.template +28 -0
websec_validator/templates/reports/pentest-handover-brief.md.template +121 -0
websec_validator/templates/reports/per-tool-FINDINGS.md.template +37 -0
websec_validator-0.2.0.dist-info/METADATA +232 -0
websec_validator-0.2.0.dist-info/RECORD +50 -0
websec_validator-0.2.0.dist-info/WHEEL +5 -0
websec_validator-0.2.0.dist-info/entry_points.txt +2 -0
websec_validator-0.2.0.dist-info/licenses/LICENSE +21 -0
websec_validator-0.2.0.dist-info/top_level.txt +1 -0

websec_validator/extractors/tenant.py ADDED Viewed

@@ -0,0 +1,33 @@
+"""Tenant-boundary extractor — the multi-tenancy key candidates.
+The single most important and easiest-to-get-wrong fact for BOLA testing. The
+tool reports candidates by frequency; the agent confirms THE one with the human.
+"""
+from __future__ import annotations
+from .base import Extractor, RepoContext
+TENANT_KEYS = ["groupId", "group_id", "orgId", "org_id", "organizationId",
+               "tenantId", "tenant_id", "workspaceId", "workspace_id",
+               "accountId", "account_id", "companyId", "company_id",
+               "teamId", "team_id", "projectId", "project_id"]
+class TenantExtractor(Extractor):
+    name = "tenant"
+    category = "authz"
+    def extract(self, ctx: RepoContext, facts: dict) -> dict:
+        hits: dict = {}
+        for _p, _rel, text in ctx.iter_code():
+            for key in TENANT_KEYS:
+                if key in text:
+                    hits[key] = hits.get(key, 0) + text.count(key)
+        ranked = sorted(hits.items(), key=lambda kv: -kv[1])
+        return {
+            "candidates": [{"key": k, "occurrences": n} for k, n in ranked[:6]],
+            "multi_tenant_likely": bool(ranked and ranked[0][1] >= 3),
+            "note": "AGENT: confirm with the human which key (if any) is THE tenant boundary. "
+                    "If single-tenant, skip the cross-tenant BOLA probes.",
+        }

websec_validator/findings.py ADDED Viewed

@@ -0,0 +1,199 @@
+"""Traceable findings ledger — correlate recon + static scanners + dynamic into ONE
+ranked, standards-cited, confidence-scored record set.
+Each finding carries an **evidence chain** across layers (recon → static → dynamic),
+an **OWASP/CWE/ASVS citation**, a **rule-based confidence** (HIGH/MEDIUM/LOW — no ML;
+dynamic-confirmed beats static hypothesis), and a **remediation**. This is the
+deterministic half of the AITPG/TRACE design — the consuming agent then runs the
+adversarial debate (Advocate→Challenger→Mediator→Explainer) to verify, per the briefing.
+Confidence rule (deterministic):
+  HIGH    — dynamically confirmed (executed unauth / cross-tenant leak), OR a verified
+            secret, OR a fixed-version CVE at HIGH/CRITICAL.
+  MEDIUM  — static evidence with a concrete pattern (recon no-guard write, SAST hit,
+            user-input-gated sink, real-but-lower CVE).
+  LOW     — single-source hypothesis with no corroboration (recon-only signal).
+"""
+from __future__ import annotations
+import fnmatch
+from pathlib import Path
+from . import calibration
+# attack class → authoritative citations + a remediation pattern
+STANDARDS = {
+    "missing-auth": (["CWE-862 Missing Authorization", "CWE-306 Missing Authentication"],
+                     "ASVS V4.1.1", ["API1:2023 BOLA", "API5:2023 BFLA"]),
+    "bola": (["CWE-639 Authorization Bypass (IDOR)"], "ASVS V4.2.1", ["API1:2023 BOLA"]),
+    "ssrf": (["CWE-918 SSRF"], "ASVS V12.6", ["API7:2023 SSRF"]),
+    "secret": (["CWE-798 Hard-coded Credentials"], "ASVS V2.10", ["API8:2023 Misconfiguration"]),
+    "sqli": (["CWE-89 SQL Injection"], "ASVS V5.3.4", ["API8:2023"]),
+    "command-injection": (["CWE-78 OS Command Injection"], "ASVS V5.3.8", []),
+    "path-traversal": (["CWE-22 Path Traversal"], "ASVS V12.3", []),
+    "ssti": (["CWE-1336 SSTI"], "ASVS V5.2.5", []),
+    "open-redirect": (["CWE-601 Open Redirect"], "ASVS V5.1.5", []),
+    "insecure-deserialization": (["CWE-502 Deserialization"], "ASVS V5.5", []),
+    "xxe": (["CWE-611 XXE"], "ASVS V5.5.2", []),
+    "prototype-pollution": (["CWE-1321 Prototype Pollution"], "ASVS V5.1", []),
+    "mass-assignment": (["CWE-915 Mass Assignment"], "ASVS V5.1.2", ["API3:2023 BOPLA"]),
+    "cve": (["CWE-1395 Vulnerable Dependency"], "ASVS V14.2.1", ["API8:2023"]),
+    "iac": (["CWE-1188 Insecure Default"], "ASVS V14.1", []),
+    "client-exposure": (["CWE-200 Information Exposure"], "ASVS V14.3", []),
+    "graphql": (["CWE-200 Information Exposure"], "ASVS V13.1", ["API8:2023"]),
+    "sast": (["CWE-710 Coding Standards"], "ASVS V1.1", []),
+}
+REMEDIATION = {
+    "missing-auth": "Add an auth guard to the handler (e.g. requireAuth()/getServerSession()), or a "
+                    "middleware matcher over /api/(.*) with an explicit public allowlist so it can't be forgotten.",
+    "bola": "Enforce object ownership: verify the authenticated principal owns/can access the resource id (tenant scope).",
+    "ssrf": "Validate + allowlist outbound URLs; block RFC1918/IMDS/file://; never fetch a raw user-supplied URL.",
+    "secret": "Rotate the credential, remove from code/history, load from a secrets manager.",
+    "cve": "Upgrade the dependency to the fixed version.",
+    "iac": "Apply the hardening (non-root user, pin actions to a SHA, enforce TLS, etc.).",
+    "client-exposure": "Move the secret server-side; never reference it from a client component or a NEXT_PUBLIC_/VITE_ var.",
+    "graphql": "Disable introspection + the playground in production; add query depth/complexity limits.",
+}
+_DEFAULT_REM = "Review and remediate per the cited standard."
+SEV_RANK = {"CRITICAL": 4, "HIGH": 3, "MEDIUM": 2, "LOW": 1, "INFO": 0}
+CONF_RANK = {"HIGH": 2, "MEDIUM": 1, "LOW": 0}
+WRITE_VERBS = {"POST", "PUT", "PATCH", "DELETE"}
+def _cite(cls):
+    cwe, asvs, api = STANDARDS.get(cls, ([], "", []))
+    return {"cwe": cwe, "asvs": asvs, "owasp_api": api}
+def load_suppressions(repo_root: Path) -> list:
+    """Read `.websec-ignore` (repo root or cwd): glob path patterns or `category:<x>` lines."""
+    pats = []
+    for cand in (repo_root / ".websec-ignore", Path.cwd() / ".websec-ignore"):
+        try:
+            if cand.is_file():
+                for ln in cand.read_text().splitlines():
+                    ln = ln.split("#", 1)[0].strip()
+                    if ln:
+                        pats.append(ln)
+        except Exception:
+            pass
+    return pats
+def _suppressed(f, pats):
+    hay = f"{f.get('category','')} {f.get('location','')} {f.get('title','')}".lower()
+    for p in pats:
+        pl = p.lower()
+        if pl.startswith("category:") and f.get("category", "").lower() == pl.split(":", 1)[1]:
+            return True
+        if fnmatch.fnmatch(f.get("location", "").lower(), pl) or pl in hay:
+            return True
+    return False
+def _f(title, category, attack_class, severity, confidence, location, evidence):
+    return {"title": title, "category": category, "attack_class": attack_class,
+            "severity": severity, "confidence": confidence,
+            "location": location, "evidence": evidence, "standards": _cite(attack_class),
+            "remediation": REMEDIATION.get(attack_class, _DEFAULT_REM), "status": "open"}
+def build_ledger(facts: dict, unified: dict | None, dynamic: dict | None = None,
+                 suppressions: list | None = None) -> dict:
+    suppressions = suppressions or []
+    out = []
+    # ---- 1. Access control: correlate recon (per-endpoint guard) with dynamic verdicts ----
+    authz = facts.get("authz", {})
+    dyn_write = {(r["method"], r["path"]): r for r in
+                 ((dynamic or {}).get("write_auth_enforcement", {}) or {}).get("results", [])}
+    dyn_get = {r["path"]: r for r in
+               ((dynamic or {}).get("unauth_reachability", {}) or {}).get("results", [])}
+    for eg in authz.get("endpoint_guards", []):
+        if eg.get("guarded") or eg.get("public_hint") or not eg.get("analyzed"):
+            continue
+        m, p = eg.get("method"), eg.get("path")
+        is_write = m in WRITE_VERBS
+        ev = [{"layer": "recon", "detail": f"no auth guard found in handler {eg.get('code_path','?')}"}]
+        conf, sev = "MEDIUM", ("HIGH" if is_write else "MEDIUM")
+        dv = dyn_write.get((m, p)) or dyn_get.get(p)
+        if dv:
+            verdict = dv.get("verdict", "")
+            if "EXECUTED-UNAUTH" in verdict:
+                ev.append({"layer": "dynamic", "detail": f"{m} executed UNAUTHENTICATED (HTTP {dv.get('status')})"})
+                conf, sev = "HIGH", "CRITICAL"
+            elif "no-auth-gate" in verdict or verdict == "OPEN-no-auth":
+                ev.append({"layer": "dynamic", "detail": f"reached unauthenticated (HTTP {dv.get('status')}, {verdict})"})
+                conf = "HIGH"
+                sev = "HIGH" if is_write else "MEDIUM"
+            elif verdict in ("auth-enforced", "protected"):
+                continue  # dynamic says it's actually protected → not a finding
+        out.append(_f(f"Missing authorization: {m} {p}", "access-control", "missing-auth",
+                      sev, conf, p, ev))
+    # ---- 1b. Cross-tenant BOLA leaks (dynamically confirmed) ----
+    for lk in ((dynamic or {}).get("cross_tenant_bola", {}) or {}).get("leaks", []):
+        out.append(_f(f"Cross-tenant read: {lk.get('direction')} {lk.get('path')}", "access-control", "bola",
+                      "CRITICAL", "HIGH", lk.get("path", ""),
+                      [{"layer": "dynamic", "detail": f"cross-tenant GET returned another tenant's data "
+                        f"(HTTP {lk.get('status')}, {lk.get('direction')})"}]))
+    # ---- 2. Static scanner findings (de-duplicated `unified`) ----
+    cat_to_class = {"sca": "cve", "secret": "secret", "iac": "iac", "sast": "sast"}
+    for t in (unified or {}).get("top", []):
+        cat = t.get("category", "")
+        cls = cat_to_class.get(cat, "sast")
+        sev = t.get("severity", "MEDIUM")
+        conf = "HIGH" if cat in ("secret",) or (cat == "sca" and sev in ("HIGH", "CRITICAL")) else "MEDIUM"
+        out.append(_f(t.get("title", cat), f"static-{cat}", cls, sev, conf, t.get("file", ""),
+                      [{"layer": "static", "detail": f"{'+'.join(t.get('tools', []))}: {t.get('title','')}"}]))
+    # ---- 3. Attack-surface sinks (recon hypotheses) ----
+    for cls, info in (facts.get("surface", {}).get("sinks", {}) or {}).items():
+        out.append(_f(f"{cls} sink ({info.get('count')} site(s))", "attack-surface",
+                      cls if cls in STANDARDS else "sast", "MEDIUM", "LOW",
+                      (info.get("files") or ["?"])[0],
+                      [{"layer": "recon", "detail": f"user-input-gated {cls} in {info.get('count')} file(s)"}]))
+    # ---- 4. Client-side secret exposure (HIGH — ships to browser) ----
+    for leak in (facts.get("client_exposure", {}).get("public_secret_leaks", []) +
+                 facts.get("client_exposure", {}).get("server_secret_in_client_component", [])):
+        out.append(_f(f"Secret exposed to client: {leak}", "client-exposure", "client-exposure",
+                      "HIGH", "HIGH", leak, [{"layer": "recon", "detail": "secret-named var reaches the browser bundle"}]))
+    # ---- 5. IaC / CI-CD ----
+    for fnd in (facts.get("iac_ci", {}).get("findings", []) or []):
+        out.append(_f(f"{fnd.get('kind')}: {fnd.get('detail','')[:80]}", "iac-ci", "iac",
+                      fnd.get("severity", "MEDIUM"), "MEDIUM", fnd.get("file", ""),
+                      [{"layer": "recon", "detail": fnd.get("detail", "")}]))
+    # ---- 6. GraphQL ----
+    g = facts.get("graphql", {})
+    if g.get("present"):
+        for fnd in g.get("findings", []):
+            out.append(_f(f"GraphQL: {fnd.get('issue')}", "graphql", "graphql",
+                          fnd.get("severity", "MEDIUM"), "MEDIUM", (g.get("endpoints") or ["/graphql"])[0],
+                          [{"layer": "recon", "detail": fnd.get("detail", "")}]))
+    # ---- suppress + rank ----
+    kept = [f for f in out if not _suppressed(f, suppressions)]
+    suppressed_n = len(out) - len(kept)
+    kept.sort(key=lambda f: (-SEV_RANK.get(f["severity"], 0), -CONF_RANK.get(f["confidence"], 0)))
+    # ---- calibrate: attach a measured real-rate + CI to each finding (best-effort) ----
+    cal_table = calibration.load()
+    by_sev, by_conf, by_basis = {}, {}, {}
+    for f in kept:
+        f["calibrated"] = calibration.apply(f.get("attack_class", ""), f["confidence"], cal_table)
+        by_sev[f["severity"]] = by_sev.get(f["severity"], 0) + 1
+        by_conf[f["confidence"]] = by_conf.get(f["confidence"], 0) + 1
+        by_basis[f["calibrated"]["basis"]] = by_basis.get(f["calibrated"]["basis"], 0) + 1
+    return {"findings": kept, "total": len(kept), "suppressed": suppressed_n,
+            "by_severity": by_sev, "by_confidence": by_conf,
+            "calibration": {"loaded": bool(cal_table), "by_basis": by_basis,
+                            "personalized": bool((cal_table or {}).get("meta", {}).get("personalized")),
+                            "local_samples": (cal_table or {}).get("meta", {}).get("local_samples", 0),
+                            "caveat": (cal_table or {}).get("meta", {}).get("caveat")},
+            "dynamic_included": bool(dynamic)}

websec_validator/probes.py ADDED Viewed

@@ -0,0 +1,79 @@
+"""Stage the probe library, tailored to the extracted attack surface.
+Probe selection is now driven by the real recon facts — we only stage what the
+surface justifies, and the briefing tells the agent exactly which endpoints to
+point each probe at.
+"""
+from __future__ import annotations
+from importlib import resources
+from pathlib import Path
+# label -> (filename, attack class, what the agent must supply)
+PROBES = {
+    "bola-cross-tenant": ("bola-cross-tenant.sh", "BOLA / cross-tenant read (OWASP API #1)",
+                          "two role tokens in different tenants + the IDOR-candidate routes"),
+    "bola-write-verbs": ("bola-write-verbs.py", "BOLA on PATCH/PUT/POST/DELETE",
+                         "two role tokens + the write endpoints + a sample object id per tenant"),
+    "mass-assignment": ("mass-assignment.py", "BOPLA / mass assignment (OWASP API #3)",
+                        "a low-priv token + a write endpoint that updates a record"),
+    "jwt-attacks": ("jwt-attacks.sh", "JWT: alg:none, tamper, expiry, replay",
+                   "a valid token + the login + a protected endpoint"),
+    "hs256-brute-force": ("hs256-brute-force.py", "Offline HS256 weak-secret brute",
+                         "one HS256 JWT (offline — no live app needed)"),
+    "ssrf-probes": ("ssrf-probes.sh", "SSRF: IMDS / RFC1918 / file://",
+                   "an authorized token + the SSRF-candidate endpoints/params"),
+    "race-conditions": ("race-conditions.py", "Race / claim-collision invariants",
+                       "a token + an endpoint with a single-winner invariant + an idempotency key"),
+    "webhook-forgery": ("webhook-forgery.py", "Inbound webhook signature/replay",
+                       "the webhook path + signature header name + scheme"),
+    "rate-limit-burst": ("rate-limit-burst.sh", "Rate-limit + X-Forwarded-For bypass",
+                        "the login + a rate-limited endpoint"),
+    "compare-roles": ("compare-roles.sh", "Two-role DAST surface diff",
+                     "two SARIF reports from a role-A and role-B scan (dynamic phase)"),
+    "dlp-bypass-offline": ("dlp-bypass-offline.py", "DLP/detection regex encoding bypass",
+                          "your DLP/redaction regexes (offline)"),
+    "s3-assess": ("s3-assess.sh", "S3 bucket posture", "a bucket name + AWS creds"),
+}
+ALWAYS = ["jwt-attacks", "hs256-brute-force", "rate-limit-burst"]
+def applicable(facts: dict) -> list:
+    """Pick probes the extracted surface actually justifies."""
+    chosen = list(ALWAYS)
+    targeting = (facts.get("routes") or {}).get("targeting", {})
+    tenant = (facts.get("tenant") or {}).get("candidates")
+    if targeting.get("write_endpoints"):
+        chosen += ["mass-assignment"]
+    if tenant:
+        chosen += ["bola-cross-tenant", "bola-write-verbs", "compare-roles"]
+    if targeting.get("ssrf_candidates") or (facts.get("surface") or {}).get("sinks", {}).get("ssrf-outbound-http"):
+        chosen += ["ssrf-probes"]
+    if targeting.get("write_endpoints"):
+        chosen += ["webhook-forgery", "race-conditions"]
+    seen, ordered = set(), []
+    for k in chosen:
+        if k in PROBES and k not in seen:
+            seen.add(k)
+            ordered.append(k)
+    return ordered
+def stage(chosen: list, outdir: Path) -> list:
+    dest = outdir / "probes"
+    dest.mkdir(parents=True, exist_ok=True)
+    manifest = []
+    src_root = resources.files("websec_validator").joinpath("templates/probes")
+    for key in chosen:
+        fname, attack, needs = PROBES[key]
+        try:
+            (dest / fname).write_bytes(src_root.joinpath(fname).read_bytes())
+            manifest.append({"key": key, "file": f"probes/{fname}",
+                             "attack_class": attack, "agent_must_supply": needs})
+        except Exception as e:
+            manifest.append({"key": key, "file": fname, "status": f"stage-error: {e}"})
+    return manifest

websec_validator/proof.py ADDED Viewed

@@ -0,0 +1,96 @@
+"""Proof harness — score the recon engine against a known-vuln-app corpus.
+WHAT THIS MEASURES (honest scope): for each deliberately-vulnerable app, does the
+recon engine SURFACE the attack surface the app is known to have (right framework,
+auth scheme, endpoint count, IDOR/GraphQL presence)? That's a deterministic,
+regression-trackable PROXY for the engine's quality — it tells us the briefing
+points the agent at the right places.
+WHAT IT DOES NOT MEASURE: the full kill-criterion — whether handing the briefing
+to a coding agent makes it find the *planted bugs* better than a generic prompt.
+That A/B requires driving real agents against running apps; the protocol for it is
+in corpus/PROOF-PROTOCOL.md and is a manual step.
+"""
+from __future__ import annotations
+import json
+import subprocess
+from pathlib import Path
+from . import __version__, recon
+def _ensure_repo(entry: dict, workdir: Path) -> Path | None:
+    if entry.get("local_path") and Path(entry["local_path"]).is_dir():
+        return Path(entry["local_path"])
+    dest = workdir / entry["name"]
+    if dest.is_dir() and any(dest.iterdir()):   # already cloned — reuse
+        return dest
+    if not entry.get("repo"):
+        return None
+    try:
+        subprocess.run(["git", "clone", "--depth", "1", entry["repo"], str(dest)],
+                       capture_output=True, text=True, check=True, timeout=240)
+        return dest
+    except Exception:
+        return None
+def _score(entry: dict, facts: dict) -> dict:
+    exp = entry.get("expect", {})
+    stack = facts.get("stack", {})
+    routes = facts.get("routes", {})
+    tgt = routes.get("targeting", {})
+    auth = facts.get("auth", {})
+    gql = facts.get("graphql", {})
+    checks = []
+    def chk(name, ok, got):
+        checks.append({"check": name, "pass": bool(ok), "got": got})
+    if "frameworks" in exp:
+        got = stack.get("frameworks", [])
+        chk("frameworks ⊇ expected", set(exp["frameworks"]).issubset(set(got)), got)
+    if "min_endpoints" in exp:
+        chk(f"endpoints ≥ {exp['min_endpoints']}", routes.get("count", 0) >= exp["min_endpoints"], routes.get("count", 0))
+    if "auth_scheme_contains" in exp:
+        hay = (auth.get("scheme", "") + " " + " ".join(auth.get("schemes_detected", []))).lower()
+        chk(f"auth ~ '{exp['auth_scheme_contains']}'", exp["auth_scheme_contains"] in hay, auth.get("scheme"))
+    if exp.get("idor_present"):
+        n = len(tgt.get("idor_candidates", []))
+        chk("IDOR candidates found", n > 0, n)
+    if exp.get("graphql_present"):
+        chk("GraphQL detected", gql.get("present", False), gql.get("present", False))
+    if exp.get("tenant_key"):
+        keys = [c["key"] for c in facts.get("tenant", {}).get("candidates", [])]
+        chk(f"tenant key '{exp['tenant_key']}'", exp["tenant_key"] in keys, keys[:3])
+    passed = sum(1 for c in checks if c["pass"])
+    return {"checks": checks, "passed": passed, "total": len(checks),
+            "score": round(passed / len(checks), 2) if checks else None}
+def run_proof(corpus_path: Path, workdir: Path) -> dict:
+    corpus = json.loads(Path(corpus_path).read_text())
+    workdir.mkdir(parents=True, exist_ok=True)
+    results = []
+    for entry in corpus:
+        repo = _ensure_repo(entry, workdir)
+        if not repo:
+            results.append({"name": entry["name"], "status": "unavailable (clone failed / no local_path)"})
+            continue
+        try:
+            facts = recon.build_facts(repo, __version__)
+        except Exception as e:
+            results.append({"name": entry["name"], "status": f"recon error: {e}"})
+            continue
+        results.append({"name": entry["name"], "endpoints": facts.get("routes", {}).get("count"),
+                        "vulns": entry.get("vulns", ""), **_score(entry, facts)})
+    total_checks = sum(r.get("total", 0) for r in results)
+    total_pass = sum(r.get("passed", 0) for r in results)
+    return {"results": results,
+            "aggregate": {"apps": len(results),
+                          "overall_coverage": round(total_pass / total_checks, 2) if total_checks else None,
+                          "checks_passed": total_pass, "checks_total": total_checks}}

websec_validator/recon.py ADDED Viewed

@@ -0,0 +1,28 @@
+"""Recon entry point — thin wrapper over the extractor framework.
+All the real work lives in extractors/. This module just exposes the stable
+build_facts / write_facts / detect_stack API the CLI depends on.
+"""
+from __future__ import annotations
+import json
+from pathlib import Path
+from . import extractors
+from .extractors.base import RepoContext
+from .extractors.stack import StackExtractor
+def build_facts(root: Path, version: str) -> dict:
+    return extractors.run_all(root, version)
+def write_facts(facts: dict, out: Path) -> Path:
+    out.write_text(json.dumps(facts, indent=2))
+    return out
+def detect_stack(root: Path) -> dict:
+    """Lightweight stack-only detection for scanner relevance (CLI doctor)."""
+    return StackExtractor().extract(RepoContext(Path(root)), {})

websec_validator/report.py ADDED Viewed

@@ -0,0 +1,114 @@
+"""Comprehensive, human-readable REPORT.md — the historical artifact.
+Every `websec run` writes one of these into an immutable timestamped run dir, so
+you get a durable record of the whole pass: stack, attack surface, access-control
+map, de-duplicated static findings, and (when present) dynamic results — all in
+one doc. Structured so it can grow into the traceable findings ledger (evidence
+chain + standards citations + calibrated confidence) without being rebuilt.
+"""
+from __future__ import annotations
+from .briefing import _bullets, _section
+def render(facts: dict, scanners: dict, scan_results: list, unified: dict | None,
+           probe_manifest: list, timestamp: str, ledger: dict | None = None) -> str:
+    stack = facts.get("stack", {})
+    routes = facts.get("routes", {})
+    tgt = routes.get("targeting", {})
+    authz = facts.get("authz", {})
+    gs = authz.get("guard_summary", {})
+    surface = facts.get("surface", {})
+    # executive summary
+    sev = (unified or {}).get("by_severity", {})
+    sev_line = " · ".join(f"{k}: {v}" for k, v in sev.items()) if sev else "_run with --scan for static findings_"
+    unprot = authz.get("write_endpoints_without_visible_guard", [])
+    top_findings = ""
+    if unified and unified.get("top"):
+        top_findings = "\n".join(
+            f"- **{t['severity']}** [{t['category']}] {t['title']} — `{t['file']}` ({'+'.join(t['tools'])})"
+            for t in unified["top"])
+    else:
+        top_findings = "_no static scan run (use `--scan`)_"
+    sinks = ", ".join(f"{k} ({n})" for k, n in surface.get("sink_counts", {}).items()) or "none"
+    if ledger and ledger.get("findings"):
+        _ll = []
+        for f in ledger["findings"][:60]:
+            cwe = (f["standards"]["cwe"][:1] or [""])[0]
+            chain = " → ".join(e["layer"] for e in f["evidence"])
+            api = (" · " + ", ".join(f["standards"]["owasp_api"])) if f["standards"]["owasp_api"] else ""
+            cal = f.get("calibrated") or {}
+            calstr = (f" · P(real)≈**{cal.get('p')}** CI {cal.get('ci')} (n={cal.get('n')}, {cal.get('basis')})"
+                      if cal else "")
+            _ll.append(f"- **[{f['severity']}/{f['confidence']}]** {f['title']}  \n"
+                       f"  `{f['location']}` · evidence: {chain} · {cwe}{api}{calstr}  \n"
+                       f"  _fix:_ {f['remediation']}")
+        ledger_block = "\n".join(_ll)
+        ledger_hdr = (f"**{ledger['total']} findings** · {ledger['by_severity']} · "
+                      f"confidence {ledger['by_confidence']}"
+                      + (f" · {ledger['suppressed']} suppressed" if ledger.get('suppressed') else ""))
+    else:
+        ledger_block, ledger_hdr = top_findings, sev_line
+    cal_caveat = ((ledger or {}).get("calibration", {}).get("caveat")
+                  or "calibrated on a vuln-app corpus — indicative only, skews optimistic on clean code")
+    return f"""# websec-validator report — {facts.get('target','')}
+> Generated {timestamp} · websec-validator v{facts.get('version','')} · **immutable run record** (never overwritten).
+> Deterministic recon — no LLM. Hand `AGENT-BRIEFING.md` (same dir) to your coding agent to act on this.
+## Executive summary
+| | |
+|---|---|
+| Stack | {", ".join(stack.get("languages", [])) or "?"} · {", ".join(stack.get("frameworks", [])) or "?"} · {", ".join(stack.get("datastores", [])) or "?"} |
+| Endpoints | **{routes.get('count', 0)}** (via {routes.get('engine','?').split(' ')[0]}) |
+| Auth | {facts.get('auth', {}).get('scheme','?')} · roles: {', '.join(authz.get('roles_detected', [])) or 'none'} |
+| Access control | {gs.get('with_visible_guard', 0)} guarded · **{gs.get('no_visible_guard', 0)} no visible guard** · global-middleware: {authz.get('global_auth_middleware', False)} |
+| Findings (ledger) | {ledger_hdr} |
+| Attack surface | IDOR: {len(tgt.get('idor_candidates', []))} · SSRF: {len(tgt.get('ssrf_candidates', []))} · upload: {len(tgt.get('upload_candidates', []))} · writes: {len(tgt.get('write_endpoints', []))} |
+## 1. Findings ledger (ranked · evidence chain · standards · confidence)
+{ledger_block}
+_Full ledger with complete evidence chains + remediation in `findings-ledger.json`. Confidence: HIGH = dynamically confirmed or verified; MEDIUM = concrete static evidence; LOW = single-source hypothesis to verify._
+_**P(real)** = measured real-vuln rate for that attack-class/confidence bucket, with a 95% confidence interval and sample size `n` ({cal_caveat}). A wide CI or `basis: prior (uncalibrated)` means thin data — lean on the verification debate, not the number; to be conservative, threshold on the CI lower bound._
+## 2. Access control
+{_section("⚠ Write endpoints with no visible guard (verify — top missing-authz leads)", unprot)}
+{authz.get("note","")}
+## 3. Attack surface & targeting
+{_section("IDOR / BOLA candidates", tgt.get("idor_candidates"))}
+{_section("SSRF candidates", tgt.get("ssrf_candidates"))}
+{_section("File-upload candidates", tgt.get("upload_candidates"))}
+**Code-level sinks (user-input-gated):** {sinks}
+**Mass-assignment targets (privileged model fields):** {", ".join(facts.get("schemas", {}).get("sensitive_fields", [])) or "none detected"}  ·  ORMs: {", ".join(facts.get("schemas", {}).get("orms", [])) or "?"}
+## 4. Config / CI-CD / client-side
+**IaC/CI:** {len((facts.get("iac_ci") or {}).get("findings", []))} finding(s) · **GraphQL:** {(facts.get("graphql") or {}).get("present", False)} · **client-side secret exposure:** {len((facts.get("client_exposure") or {}).get("public_secret_leaks", []) + (facts.get("client_exposure") or {}).get("server_secret_in_client_component", []))}
+## 5. Staged probes
+{_bullets([f"`{p['key']}` — {p.get('attack_class','')}" for p in probe_manifest if 'attack_class' in p])}
+## Appendix — endpoint inventory
+{_bullets([f"`{e['method']:6}` {e['path']}" for e in routes.get("endpoints", [])], cap=200)}
+---
+_Roadmap: this report grows into a traceable findings ledger — each finding gaining an evidence
+chain (recon → static → dynamic), an OWASP/CWE citation, and a calibrated H/M/L confidence._
+"""