PyPI - websec-validator - Versions diffs - 0.2.0__py3-none-any.whl - Mend

websec-validator 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

websec_validator/__init__.py +14 -0
websec_validator/briefing.py +218 -0
websec_validator/calibration.json +75 -0
websec_validator/calibration.py +226 -0
websec_validator/cli.py +395 -0
websec_validator/constitution.py +81 -0
websec_validator/corpus.json +49 -0
websec_validator/dynamic.py +249 -0
websec_validator/extractors/__init__.py +56 -0
websec_validator/extractors/auth.py +77 -0
websec_validator/extractors/authz.py +130 -0
websec_validator/extractors/base.py +101 -0
websec_validator/extractors/client_exposure.py +48 -0
websec_validator/extractors/graphql.py +71 -0
websec_validator/extractors/iac_ci.py +65 -0
websec_validator/extractors/integrations.py +55 -0
websec_validator/extractors/routes.py +215 -0
websec_validator/extractors/schemas.py +75 -0
websec_validator/extractors/stack.py +80 -0
websec_validator/extractors/surface.py +86 -0
websec_validator/extractors/tenant.py +33 -0
websec_validator/findings.py +199 -0
websec_validator/probes.py +79 -0
websec_validator/proof.py +96 -0
websec_validator/recon.py +28 -0
websec_validator/report.py +114 -0
websec_validator/scanners.py +248 -0
websec_validator/templates/probes/bola-cross-tenant.sh +192 -0
websec_validator/templates/probes/bola-write-verbs.py +147 -0
websec_validator/templates/probes/compare-roles.sh +69 -0
websec_validator/templates/probes/dlp-bypass-offline.py +149 -0
websec_validator/templates/probes/hs256-brute-force.py +90 -0
websec_validator/templates/probes/jwt-attacks.sh +161 -0
websec_validator/templates/probes/mass-assignment.py +201 -0
websec_validator/templates/probes/race-conditions.py +144 -0
websec_validator/templates/probes/rate-limit-burst.sh +136 -0
websec_validator/templates/probes/s3-assess.sh +120 -0
websec_validator/templates/probes/ssrf-probes.sh +189 -0
websec_validator/templates/probes/webhook-forgery.py +113 -0
websec_validator/templates/reports/FINDINGS-SUMMARY.md.template +75 -0
websec_validator/templates/reports/access-control-matrix.md.template +65 -0
websec_validator/templates/reports/findings-triage.md.template +28 -0
websec_validator/templates/reports/pentest-handover-brief.md.template +121 -0
websec_validator/templates/reports/per-tool-FINDINGS.md.template +37 -0
websec_validator-0.2.0.dist-info/METADATA +232 -0
websec_validator-0.2.0.dist-info/RECORD +50 -0
websec_validator-0.2.0.dist-info/WHEEL +5 -0
websec_validator-0.2.0.dist-info/entry_points.txt +2 -0
websec_validator-0.2.0.dist-info/licenses/LICENSE +21 -0
websec_validator-0.2.0.dist-info/top_level.txt +1 -0

websec_validator/cli.py ADDED Viewed

@@ -0,0 +1,395 @@
+"""websec — CLI entry point.
+Commands:
+  websec run <repo> [--scan] [--out DIR]   full pipeline → FACTS.json + AGENT-BRIEFING.md + probes/
+  websec recon <repo> [--out DIR]          recon only → FACTS.json
+  websec doctor [<repo>]                    show which scanners are present / missing
+Code-in, artifacts-out. No LLM, no server, no running app. Point your AI coding
+agent at the generated AGENT-BRIEFING.md.
+"""
+from __future__ import annotations
+import argparse
+import json
+import sys
+from pathlib import Path
+from . import (__version__, briefing, calibration, constitution, dynamic, findings, probes, proof,
+               recon, report, scanners)
+def _resolve_target(raw: str) -> Path:
+    p = Path(raw).expanduser().resolve()
+    if not p.is_dir():
+        sys.exit(f"error: target is not a directory: {p}")
+    return p
+def _default_out(target: Path, out: str | None) -> Path:
+    d = Path(out).expanduser().resolve() if out else Path.cwd() / "websec-out"
+    d.mkdir(parents=True, exist_ok=True)
+    return d
+def _new_run_dir(out: str | None) -> tuple:
+    """Create an immutable timestamped run dir and point `latest` at it. Returns (run_dir, ts).
+    Every run is preserved — nothing is overwritten."""
+    import datetime
+    base = Path(out).expanduser().resolve() if out else Path.cwd() / "websec-out"
+    ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
+    run = base / "runs" / ts
+    run.mkdir(parents=True, exist_ok=True)
+    latest = base / "latest"
+    try:
+        if latest.is_symlink() or latest.exists():
+            latest.unlink()
+        latest.symlink_to(Path("runs") / ts, target_is_directory=True)
+    except Exception:
+        pass
+    return run, ts
+def cmd_doctor(args) -> int:
+    target = _resolve_target(args.target) if args.target else None
+    langs = recon.detect_stack(target)["languages"] if target else None
+    det = scanners.detect(langs)
+    print(f"websec-validator v{__version__} — scanner check"
+          + (f"  (stack: {', '.join(langs) or 'unknown'})" if langs else ""))
+    print("\n  available:")
+    for s in det["available"]:
+        print(f"    ✓ {s['name']:20} {s['category']}")
+    if not det["available"]:
+        print("    (none on PATH)")
+    print("\n  missing (optional — install for fuller coverage):")
+    for s in det["missing"]:
+        print(f"    · {s['name']:20} {s['category']:8} {s.get('install','')}")
+    print("\n  Docker:", "present" if _which("docker") else "not found "
+          "(used for reproducible scanner runs in a future release)")
+    return 0
+def cmd_recon(args) -> int:
+    target = _resolve_target(args.target)
+    out = _default_out(target, args.out)
+    facts = recon.build_facts(target, __version__)
+    recon.write_facts(facts, out / "FACTS.json")
+    print(f"✓ FACTS.json → {out / 'FACTS.json'}")
+    _print_facts_summary(facts)
+    return 0
+def cmd_run(args) -> int:
+    target = _resolve_target(args.target)
+    out, ts = _new_run_dir(args.out)
+    print(f"websec-validator v{__version__}  ·  target: {target}  ·  run {ts}\n")
+    # 1. recon
+    facts = recon.build_facts(target, __version__)
+    recon.write_facts(facts, out / "FACTS.json")
+    langs = facts["stack"]["languages"]
+    _print_facts_summary(facts)
+    # 2. scanners: detect, optionally run
+    det = scanners.detect(langs)
+    scan_results = []
+    unified = None
+    if args.scan:
+        print("\n  running available static scanners (read-only)…")
+        scan_results = scanners.run_available(target, out, langs)
+        for r in scan_results:
+            tag = r.get("findings", r.get("status", "?"))
+            print(f"    {r['name']}: {tag}")
+        unified = scanners.normalize_findings(scan_results, out)
+        print(f"  → {unified['total']} de-duplicated findings "
+              f"({unified['cross_tool_or_dup_merged']} merged) · {unified['by_severity']}")
+    else:
+        print(f"\n  scanners available: {', '.join(s['name'] for s in det['available']) or 'none'}"
+              "  (add --scan to execute them)")
+    # 3. probes: choose + stage
+    chosen = probes.applicable(facts)
+    manifest = probes.stage(chosen, out)
+    print(f"\n  staged {len([m for m in manifest if 'attack_class' in m])} tailored probe template(s) → {out / 'probes'}")
+    # 4. traceable findings ledger (recon + static; dynamic merges in via `websec dynamic`)
+    suppressions = findings.load_suppressions(target)
+    ledger = findings.build_ledger(facts, unified, None, suppressions)
+    (out / "findings-ledger.json").write_text(json.dumps(ledger, indent=2))
+    (out / "CONSTITUTION.md").write_text(constitution.render(constitution.build(facts, ledger)))
+    if ledger["total"]:
+        print(f"\n  ledger: {ledger['total']} finding(s) · {ledger['by_severity']} · confidence {ledger['by_confidence']}"
+              + (f" · {ledger['suppressed']} suppressed" if ledger["suppressed"] else ""))
+    # 5. briefing + comprehensive REPORT.md (immutable run record)
+    (out / "AGENT-BRIEFING.md").write_text(briefing.render(facts, det, scan_results, manifest, unified))
+    (out / "REPORT.md").write_text(report.render(facts, det, scan_results, unified, manifest, ts, ledger))
+    (out / "manifest.json").write_text(json.dumps(
+        {"facts": "FACTS.json", "scanners": det, "scan_results": scan_results,
+         "findings_summary": unified, "ledger": {"total": ledger["total"], "by_severity": ledger["by_severity"]},
+         "probes": manifest, "timestamp": ts}, indent=2))
+    print(f"\n✓ run {ts} saved (immutable — nothing overwritten):\n    {out}")
+    print("    REPORT.md          — full historical record")
+    print("    AGENT-BRIEFING.md  — hand this to your AI coding agent")
+    print(f"  latest → {out.parent.parent / 'latest'}    ·    add `websec-out/` to .gitignore")
+    return 0
+def cmd_dynamic(args) -> int:
+    base = Path(args.out).expanduser().resolve() if args.out else Path.cwd() / "websec-out"
+    # resolve BEFORE _new_run_dir repoints `latest` (else the symlink moves under us)
+    facts_path = (Path(args.facts).expanduser() if args.facts else base / "latest" / "FACTS.json").resolve()
+    if not facts_path.is_file():
+        sys.exit(f"error: FACTS.json not found at {facts_path} — run `websec run <repo>` first (or pass --facts)")
+    out, ts = _new_run_dir(args.out)
+    dyn: dict = {}
+    if args.unauth:
+        if not args.target:
+            sys.exit("error: --unauth requires --target")
+        if args.probe_writes and not dynamic.is_localhost(args.target):
+            sys.exit("error: --probe-writes is localhost-only (it sends write verbs) — point --target at your sandbox")
+        print(f"websec dynamic — STRICT read-only · UNAUTHENTICATED · GET-only  ·  run {ts}\n")
+        dyn = dynamic.run_unauth(args.target, facts_path, out, probe_writes=args.probe_writes)
+        u = dyn["unauth_reachability"]
+        print(f"  target: {u['target']}  ·  → {u['summary']}")
+        for r in u["results"]:
+            mark = "🔓" if r["verdict"] == "OPEN-no-auth" else (" ·" if r["verdict"] == "protected" else "  ")
+            print(f"    {mark} {str(r['status']):>4}  {r['verdict']:26} {r['path']}")
+        if args.probe_writes:
+            w = dyn["write_auth_enforcement"]
+            print(f"\n  write-verb auth enforcement → {w['summary']}")
+            for r in w["results"]:
+                mark = "🔓" if r["verdict"] != "auth-enforced" and not r["verdict"].startswith("http-") else " ·"
+                print(f"    {mark} {str(r['status']):>4}  {r['verdict']:42} {r['method']} {r['path']}")
+    elif args.config:
+        cfg = Path(args.config).expanduser().resolve()
+        if not cfg.is_file():
+            sys.exit(f"error: config not found: {cfg}")
+        print(f"websec dynamic — authenticated cross-tenant BOLA (read-only)  ·  run {ts}\n")
+        dyn = dynamic.run_dynamic(cfg, facts_path, out)
+        ct = dyn.get("cross_tenant_bola", {})
+        if ct.get("error"):
+            print("  ERROR:", ct["error"])
+            return 1
+        print(f"  agentA {ct['agentA']['email']} (tenant {ct['agentA']['tenant']}) · "
+              f"agentB {ct['agentB']['email']} (tenant {ct['agentB']['tenant']})")
+        print(f"  → {ct['summary']}")
+        for lk in ct.get("leaks", []):
+            print(f"     🚨 LEAK {lk['direction']} {lk['path']} → HTTP {lk['status']}")
+    else:
+        sys.exit("error: provide --config (authenticated cross-tenant) OR --unauth --target (read-only)")
+    # merge dynamic evidence into the traceable ledger + write the immutable run report
+    facts_dict = json.loads(facts_path.read_text())
+    ledger = findings.build_ledger(facts_dict, None, dyn,
+                                   findings.load_suppressions(Path(facts_dict.get("target", "."))))
+    (out / "findings-ledger.json").write_text(json.dumps(ledger, indent=2))
+    (out / "CONSTITUTION.md").write_text(constitution.render(constitution.build(facts_dict, ledger)))
+    (out / "REPORT.md").write_text(
+        report.render(facts_dict, {"available": [], "missing": []}, [], None, [], ts, ledger))
+    print(f"\n  ledger: {ledger['total']} finding(s) · {ledger['by_severity']} · confidence {ledger['by_confidence']}")
+    # self-improving calibration: dynamic is an oracle — fold this run's CONFIRMED results
+    # (executed-unauth / auth-enforced / cross-tenant leak) into the user-global local overlay
+    samples = calibration.samples_from_dynamic(dyn)
+    rec = calibration.record_samples(samples) if samples else None
+    if rec:
+        nr = sum(1 for s in samples if s["is_real"])
+        print(f"  calibration: folded {len(samples)} confirmed sample(s) ({nr} real / {len(samples) - nr} FP) "
+              f"into your local overlay → {rec['meta']['samples']} total; confidence now personalizes to your apps")
+    print(f"  ✓ run {ts} saved (immutable): {out}")
+    return 1 if ledger["by_severity"].get("CRITICAL") else 0
+def cmd_proof(args) -> int:
+    from importlib import resources
+    corpus_path = (Path(args.corpus).expanduser().resolve() if args.corpus
+                   else Path(str(resources.files("websec_validator").joinpath("corpus.json"))))
+    workdir = (Path(args.workdir).expanduser().resolve() if args.workdir
+               else Path.home() / ".cache" / "websec-corpus")
+    print(f"websec proof — recon coverage vs vuln-app corpus\n  corpus:  {corpus_path}\n  workdir: {workdir}\n")
+    res = proof.run_proof(corpus_path, workdir)
+    for r in res["results"]:
+        if r.get("score") is None:
+            print(f"  {r['name']:12} — {r.get('status', 'no checks')}")
+            continue
+        print(f"  {r['name']:12} {r['passed']}/{r['total']} checks · {r.get('endpoints', '?')} endpoints · {r.get('vulns', '')[:55]}")
+        for c in r.get("checks", []):
+            print(f"       {'✓' if c['pass'] else '✗'} {c['check']:22} got={c['got']}")
+    agg = res["aggregate"]
+    print(f"\n  OVERALL recon coverage: {agg.get('overall_coverage')} "
+          f"({agg['checks_passed']}/{agg['checks_total']} checks, {agg['apps']} apps)")
+    print("  NOTE: PROXY metric (does recon surface the known-vuln surface?). The full agent-lift")
+    print("  kill-criterion is the manual A/B in corpus/PROOF-PROTOCOL.md.")
+    return 0
+def cmd_calibrate(args) -> int:
+    """Fit confidence calibration: run the recon ledger against the labeled vuln corpus,
+    measure how often each (attack_class, label) bucket is a real documented vuln, and
+    write calibration.json (shipped + applied at runtime by findings.build_ledger)."""
+    from importlib import resources
+    # --ingest: fold a hand-labeled findings file into your LOCAL overlay (the manual real-repo path)
+    if getattr(args, "ingest", None):
+        src = Path(args.ingest).expanduser().resolve()
+        if not src.is_file():
+            sys.exit(f"error: --ingest file not found: {src}")
+        data = json.loads(src.read_text())
+        rows = data.get("findings", data) if isinstance(data, dict) else data
+        labeled = [{"attack_class": r.get("attack_class", ""), "confidence": r.get("confidence", "MEDIUM"),
+                    "is_real": bool(r.get("is_real"))} for r in rows]
+        rec = calibration.record_samples(labeled)
+        if not rec:
+            sys.exit("error: nothing ingested (empty file, or local overlay not writable)")
+        nr = sum(1 for s in labeled if s["is_real"])
+        print(f"websec calibrate --ingest: folded {len(labeled)} hand-labeled sample(s) "
+              f"({nr} real / {len(labeled) - nr} FP) into {calibration.LOCAL_PATH} → {rec['meta']['samples']} total.")
+        return 0
+    corpus_path = (Path(args.corpus).expanduser().resolve() if args.corpus
+                   else Path(str(resources.files("websec_validator").joinpath("corpus.json"))))
+    workdir = (Path(args.workdir).expanduser().resolve() if args.workdir
+               else Path.home() / ".cache" / "websec-corpus")
+    out_path = (Path(args.out).expanduser().resolve() if args.out
+                else Path(calibration.__file__).resolve().parent / "calibration.json")
+    corpus = json.loads(corpus_path.read_text())
+    workdir.mkdir(parents=True, exist_ok=True)
+    print("websec calibrate — fitting confidence against the labeled vuln corpus")
+    print(f"  corpus:  {corpus_path}\n  workdir: {workdir}\n  out:     {out_path}\n")
+    labeled, used = [], []
+    for entry in corpus:
+        truth = entry.get("truth")
+        if not truth:
+            print(f"  {entry['name']:12} — no truth block, skipped")
+            continue
+        repo = proof._ensure_repo(entry, workdir)
+        if not repo:
+            print(f"  {entry['name']:12} — unavailable (clone failed / no local_path)")
+            continue
+        try:
+            facts = recon.build_facts(repo, __version__)
+            ledger = findings.build_ledger(facts, None, None, [])
+        except Exception as e:
+            print(f"  {entry['name']:12} — recon/ledger error: {e}")
+            continue
+        n_real = 0
+        for f in ledger["findings"]:
+            real = calibration.is_real(f.get("attack_class", ""), f.get("location", ""), truth)
+            labeled.append({"attack_class": f.get("attack_class", ""),
+                            "confidence": f["confidence"], "is_real": real})
+            n_real += int(real)
+        used.append(entry["name"])
+        print(f"  {entry['name']:12} {len(ledger['findings'])} findings · {n_real} matched a documented vuln")
+    if not labeled:
+        print("\n  no labeled findings produced — is the corpus cloned? (needs network on first run)")
+        return 1
+    researched = {t.get("class") for entry in corpus for t in (entry.get("truth") or [])}
+    table = calibration.fit(labeled, used, researched)
+    out_path.write_text(json.dumps(table, indent=2) + "\n")
+    print(f"\n  fitted {table['meta']['n_total']} findings across {len(used)} app(s) → {out_path}")
+    for k, v in table["by_label"].items():
+        print(f"    {k:7} {v['k']}/{v['n']} real · p={v['p']} · 95% CI {v['ci']}")
+    print(f"\n  NOTE: {table['meta']['caveat']}.")
+    print("  Per-finding estimates carry n + basis; wide CI / basis=prior ⇒ trust the debate, not the number.")
+    return 0
+def _which(b):
+    import shutil
+    return shutil.which(b)
+def _print_facts_summary(facts: dict) -> None:
+    st = facts.get("stack", {})
+    rt = facts.get("routes", {})
+    tg = rt.get("targeting", {})
+    print(f"  stack:    {', '.join(st.get('languages', [])) or '?'}  ·  "
+          f"frameworks: {', '.join(st.get('frameworks', [])) or '?'}  ·  "
+          f"datastores: {', '.join(st.get('datastores', [])) or '?'}")
+    print(f"  auth:     {facts.get('auth', {}).get('scheme', '?')}")
+    tc = facts.get("tenant", {}).get("candidates", [])
+    print(f"  tenant?:  {', '.join(t['key'] for t in tc) or 'none detected'}"
+          + ("   ← confirm THE boundary" if tc else ""))
+    print(f"  routes:   {rt.get('count', 0)} endpoints via {rt.get('engine', '?').split(' ')[0]}")
+    print(f"  targets:  IDOR={len(tg.get('idor_candidates', []))} "
+          f"SSRF={len(tg.get('ssrf_candidates', []))} "
+          f"upload={len(tg.get('upload_candidates', []))} "
+          f"writes={len(tg.get('write_endpoints', []))}")
+def build_parser() -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser(prog="websec",
+        description="Local-first security recon that briefs your AI coding agent.")
+    p.add_argument("--version", action="version", version=f"websec-validator {__version__}")
+    # metavar lists only the user-facing commands; recon/proof/calibrate still work but are
+    # omitted (they get no `help=`, so argparse leaves them out of the listing entirely).
+    sub = p.add_subparsers(dest="cmd", required=True, metavar="{run,doctor,dynamic}")
+    r = sub.add_parser("run", help="full pipeline → briefing + tailored probes")
+    r.add_argument("target")
+    r.add_argument("--scan", action="store_true", help="also execute available static scanners")
+    r.add_argument("--out", help="output dir (default: ./websec-out)")
+    r.set_defaults(func=cmd_run)
+    # recon/proof/calibrate are hidden from the main --help (argparse.SUPPRESS): recon is a
+    # subset of `run`, and proof/calibrate are for developing the tool itself. They still work
+    # if invoked explicitly — the user-facing surface is just `run` (+ the advanced `dynamic`).
+    rc = sub.add_parser("recon")
+    rc.add_argument("target")
+    rc.add_argument("--out", help="output dir (default: ./websec-out)")
+    rc.set_defaults(func=cmd_recon)
+    d = sub.add_parser("doctor", help="show which scanners are installed")
+    d.add_argument("target", nargs="?", help="optional repo to scope scanner relevance")
+    d.set_defaults(func=cmd_doctor)
+    pf = sub.add_parser("proof")
+    pf.add_argument("--corpus", help="corpus JSON (default: bundled)")
+    pf.add_argument("--workdir", help="where to clone corpus apps (default: ~/.cache/websec-corpus)")
+    pf.set_defaults(func=cmd_proof)
+    cal = sub.add_parser("calibrate")
+    cal.add_argument("--corpus", help="corpus JSON with `truth` blocks (default: bundled)")
+    cal.add_argument("--workdir", help="where corpus apps are cloned (default: ~/.cache/websec-corpus)")
+    cal.add_argument("--out", help="where to write calibration.json (default: bundled, next to the package)")
+    cal.add_argument("--ingest", help="fold a hand-labeled findings JSON ({attack_class,confidence,is_real}) into your LOCAL overlay")
+    cal.set_defaults(func=cmd_calibrate)
+    dyn = sub.add_parser("dynamic", help="dynamic probes vs a LIVE target (read-only): cross-tenant BOLA (--config) or unauth reachability (--unauth)")
+    dyn.add_argument("--config", help="dynamic config JSON (target + role creds) for authenticated cross-tenant BOLA")
+    dyn.add_argument("--unauth", action="store_true", help="STRICT read-only: GET each data-read endpoint with NO auth (needs --target)")
+    dyn.add_argument("--probe-writes", action="store_true", help="also test write-verb auth enforcement (LOCALHOST-only, non-destructive)")
+    dyn.add_argument("--target", help="target base URL (for --unauth)")
+    dyn.add_argument("--facts", help="FACTS.json from a prior run (default: ./websec-out/FACTS.json)")
+    dyn.add_argument("--out", help="output dir (default: ./websec-out)")
+    dyn.set_defaults(func=cmd_dynamic)
+    return p
+_COMMANDS = {"run", "recon", "doctor", "proof", "dynamic", "calibrate"}
+def main(argv=None) -> int:
+    argv = list(argv if argv is not None else sys.argv[1:])
+    parser = build_parser()
+    if not argv:                      # bare `websec` → show help, don't error
+        parser.print_help()
+        return 0
+    # bare `websec <path>` (no subcommand) ⇒ treat as `websec run <path>` — point-and-go
+    if argv[0] not in _COMMANDS and not argv[0].startswith("-"):
+        argv = ["run"] + argv
+    args = parser.parse_args(argv)
+    return args.func(args)
+if __name__ == "__main__":
+    raise SystemExit(main())

websec_validator/constitution.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""Security constitution — the app's invariants as checkable Given/When/Then.
+Spec-kit's `constitution` idea applied to security: instead of (only) a list of
+findings, emit the rules the app MUST uphold, derived deterministically from the
+recon, each phrased as a verifiable acceptance scenario. The dynamic probes verify
+them; a matching dynamically-confirmed ledger finding flips an invariant to VIOLATED.
+This makes the output a *checkable spec*, not just prose.
+"""
+from __future__ import annotations
+def build(facts: dict, ledger: dict | None = None) -> list:
+    routes = facts.get("routes", {})
+    tgt = routes.get("targeting", {})
+    authz = facts.get("authz", {})
+    integ = facts.get("integrations", {})
+    tenant = facts.get("tenant", {})
+    # endpoints with a dynamically-confirmed access-control finding → VIOLATED
+    violated = {f["location"] for f in (ledger or {}).get("findings", [])
+                if f.get("category") == "access-control"
+                and any(e.get("layer") == "dynamic" for e in f.get("evidence", []))}
+    inv = []
+    def add(principle, statement, source, status="VERIFY"):
+        inv.append({"principle": principle, "statement": statement, "source": source, "status": status})
+    # P1 — Authentication: every non-public endpoint must reject unauthenticated access
+    n = 0
+    for eg in authz.get("endpoint_guards", []):
+        if eg.get("public_hint") or eg.get("guarded") or not eg.get("analyzed"):
+            continue
+        n += 1
+        if n > 40:
+            continue
+        status = "VIOLATED" if eg.get("path") in violated else "VERIFY"
+        add("Authentication", f"Given no auth token, When `{eg['method']} {eg['path']}`, Then 401/403 "
+            f"(no body, no mutation)", eg.get("code_path", "recon"), status)
+    if n > 40:
+        add("Authentication", f"_…and {n - 40} more endpoints with no visible guard — see findings-ledger.json_", "recon")
+    # P2 — Tenant isolation
+    for t in tenant.get("candidates", [])[:1]:
+        add("Tenant isolation", f"Given role A's token, When reading another tenant's resource via "
+            f"`{{{t['key']}}}`, Then 403/404 (no cross-tenant data)", "recon")
+    # P3 — SSRF defense
+    for s in tgt.get("ssrf_candidates", [])[:8]:
+        add("SSRF defense", f"Given a url/host param = 169.254.169.254 / RFC1918 / file://, "
+            f"When `{s}`, Then the fetch is blocked", "recon")
+    # P4 — Webhook integrity
+    for w in integ.get("webhook_endpoints", [])[:8]:
+        add("Webhook integrity", f"Given a forged or missing signature, When `{w}`, Then 401 "
+            f"(and replays inside the window are rejected)", "recon")
+    # P5 — Secret hygiene (always)
+    add("Secret hygiene", "Given the repo + git history, Then no live credential is present and no secret "
+        "reaches the client bundle", "recon")
+    return inv
+def render(inv: list) -> str:
+    by_principle: dict = {}
+    for i in inv:
+        by_principle.setdefault(i["principle"], []).append(i)
+    mark = {"VIOLATED": "🔴 VIOLATED", "VERIFY": "⬜ verify", "HOLDS": "✅ holds"}
+    out = ["# Security constitution\n",
+           "> Invariants this app must uphold, derived from recon. The dynamic probes verify them; "
+           "a dynamically-confirmed finding flips one to 🔴 VIOLATED. Treat ⬜ as a hypothesis to confirm.\n"]
+    viol = sum(1 for i in inv if i["status"] == "VIOLATED")
+    out.append(f"**{len(inv)} invariants · {viol} VIOLATED · {sum(1 for i in inv if i['status']=='VERIFY')} to verify**\n")
+    for p, items in by_principle.items():
+        out.append(f"## {p}")
+        for i in items:
+            out.append(f"- {mark.get(i['status'], i['status'])} — {i['statement']}  ·  _{i['source']}_")
+        out.append("")
+    return "\n".join(out)

websec_validator/corpus.json ADDED Viewed

@@ -0,0 +1,49 @@
+[
+  {
+    "name": "VAmPI",
+    "repo": "https://github.com/erev0s/VAmPI",
+    "language": "python",
+    "vulns": "OWASP API Top 10 — BOLA, mass-assignment, JWT weaknesses, excessive data exposure",
+    "expect": {
+      "frameworks": ["flask"],
+      "min_endpoints": 10,
+      "auth_scheme_contains": "jwt",
+      "idor_present": true
+    },
+    "truth": [
+      {"class": "missing-auth", "location_contains": "*", "note": "VAmPI is a broken-object/function-level-authorization demo: its API endpoints are intentionally missing or mis-applying auth"},
+      {"class": "sqli", "location_contains": "*", "note": "raw SQL string interpolation in the vulnerable code path"}
+    ]
+  },
+  {
+    "name": "NodeGoat",
+    "repo": "https://github.com/OWASP/NodeGoat",
+    "language": "node",
+    "vulns": "OWASP Top 10 — injection, broken auth, IDOR (allocations), access control",
+    "expect": {
+      "frameworks": ["express"],
+      "min_endpoints": 8,
+      "auth_scheme_contains": "session",
+      "idor_present": true
+    },
+    "truth": [
+      {"class": "missing-auth", "location_contains": "*", "note": "broken access control across allocation/admin routes"},
+      {"class": "command-injection", "location_contains": "*", "note": "SSJS injection via eval in the contributions handler"},
+      {"class": "ssti", "location_contains": "*", "note": "server-side template injection demo"}
+    ]
+  },
+  {
+    "name": "DVGA",
+    "repo": "https://github.com/dolevf/Damn-Vulnerable-GraphQL-Application",
+    "language": "python",
+    "vulns": "GraphQL — introspection, injection, DoS (batching/aliasing), info disclosure",
+    "expect": {
+      "frameworks": ["flask"],
+      "graphql_present": true
+    },
+    "truth": [
+      {"class": "graphql", "location_contains": "*", "note": "introspection + playground enabled, no depth/complexity limits"},
+      {"class": "command-injection", "location_contains": "*", "note": "OS command injection via the system-diagnostics resolver"}
+    ]
+  }
+]