PyPI - websec-validator - Versions diffs - 0.2.2__tar.gz → 0.2.4__tar.gz - Mend

websec-validator 0.2.2tar.gz → 0.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

{websec_validator-0.2.2/src/websec_validator.egg-info → websec_validator-0.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: websec-validator
-Version: 0.2.2
+Version: 0.2.4
 Summary: Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app.
 Author: Ricardo Accioly
 License: MIT
@@ -171,9 +171,9 @@ the next dynamic probes (explicitly gated — they mutate).
 ## Validated on
-HugoCross (Next.js), `wu-whatsappinbox` (106-service Express/AWS monorepo), VAmPI, NodeGoat, DVGA —
-independently reproducing a hand-done pentest's findings (tenant boundary, SSO-endpoint SSRF, media
-upload, conversation-BOLA routes, roles).
+A production Next.js app, a large Express/AWS monorepo, and the VAmPI / NodeGoat / DVGA vuln-app
+corpus — independently reproducing a hand-done pentest's findings (tenant boundary, SSRF, file
+upload, cross-tenant BOLA, role/authz gaps).
 ## Tests

{websec_validator-0.2.2 → websec_validator-0.2.4}/README.md RENAMED Viewed

@@ -159,9 +159,9 @@ the next dynamic probes (explicitly gated — they mutate).
 ## Validated on
-HugoCross (Next.js), `wu-whatsappinbox` (106-service Express/AWS monorepo), VAmPI, NodeGoat, DVGA —
-independently reproducing a hand-done pentest's findings (tenant boundary, SSO-endpoint SSRF, media
-upload, conversation-BOLA routes, roles).
+A production Next.js app, a large Express/AWS monorepo, and the VAmPI / NodeGoat / DVGA vuln-app
+corpus — independently reproducing a hand-done pentest's findings (tenant boundary, SSRF, file
+upload, cross-tenant BOLA, role/authz gaps).
 ## Tests

{websec_validator-0.2.2 → websec_validator-0.2.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "websec-validator"
-version = "0.2.2"
+version = "0.2.4"
 description = "Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app."
 readme = "README.md"
 requires-python = ">=3.11"

{websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/briefing.py RENAMED Viewed

@@ -118,6 +118,7 @@ credentials** — ask the human, never fabricate, never hit production.
 - **Datastores:** {", ".join(stack.get("datastores", [])) or "?"}  ·  **Monorepo:** {stack.get("monorepo", False)}
 - **Auth scheme:** `{auth.get("scheme","?")}` (token in {auth.get("token_location","?")})  ·  guard files: {len(auth.get("guard_files", []))}
 - **Route engine:** {routes.get("engine","?")}  ·  **{routes.get('count',0)} endpoints**  ·  by method: {routes.get("by_method", {})}
+{("> " + routes["note"]) if routes.get("note") else ""}
 ## 2. ★ Tenant boundary (confirm first — highest value, easiest to get wrong)

{websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/cli.py RENAMED Viewed

@@ -87,7 +87,7 @@ def cmd_run(args) -> int:
     print(f"websec-validator v{__version__}  ·  target: {target}  ·  run {ts}\n")
     # 1. recon
-    facts = recon.build_facts(target, __version__)
+    facts = recon.build_facts(target, __version__, args.exclude)
     recon.write_facts(facts, out / "FACTS.json")
     langs = facts["stack"]["languages"]
     _print_facts_summary(facts)
@@ -98,7 +98,8 @@ def cmd_run(args) -> int:
     unified = None
     if args.scan:
         print("\n  running available static scanners (read-only)…")
-        scan_results = scanners.run_available(target, out, langs)
+        only = args.scanners.split(",") if args.scanners else None
+        scan_results = scanners.run_available(target, out, langs, excludes=args.exclude, only=only)
         for r in scan_results:
             tag = r.get("findings", r.get("status", "?"))
             print(f"    {r['name']}: {tag}")
@@ -342,6 +343,10 @@ def build_parser() -> argparse.ArgumentParser:
     r.add_argument("target")
     r.add_argument("--scan", action="store_true", help="also execute available static scanners")
     r.add_argument("--out", help="output dir (default: ./websec-out)")
+    r.add_argument("--exclude", action="append", metavar="PATH",
+                   help="exclude a path/glob from recon + scanners (repeatable; e.g. --exclude 'docs/**')")
+    r.add_argument("--scanners", metavar="A,B",
+                   help="comma-separated subset of scanners to run with --scan (e.g. gitleaks,semgrep)")
     r.set_defaults(func=cmd_run)
     # recon/proof/calibrate are hidden from the main --help (argparse.SUPPRESS): recon is a

{websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/extractors/__init__.py RENAMED Viewed

@@ -39,9 +39,9 @@ REGISTRY: list[Extractor] = [
 ]
-def run_all(root: Path, version: str) -> dict:
+def run_all(root: Path, version: str, excludes: list | None = None) -> dict:
     """Walk the repo once, run every extractor, return the merged FACTS dict."""
-    ctx = RepoContext(root)
+    ctx = RepoContext(root, excludes)
     facts: dict = {
         "tool": "websec-validator",
         "version": version,

{websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/extractors/auth.py RENAMED Viewed

@@ -47,7 +47,7 @@ class AuthExtractor(Extractor):
         # Detect ALL schemes present, then pick a primary by priority. A JWT app
         # that also wires Passport for SSO must read as primary=jwt, not passport
-        # (the bug the WhatsApp app exposed). Priority: nextauth > jwt > session > passport > api-key.
+        # (Passport is often SSO-only). Priority: nextauth > jwt > session > passport > api-key.
         detected = []
         if nextauth:
             detected.append("nextauth (session JWT in cookie)")

{websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/extractors/authz.py RENAMED Viewed

@@ -52,6 +52,14 @@ ROLE = re.compile(
     r"has_?[Rr]ole\s*\(\s*['\"]([\w:.-]+)['\"]|"
     r"authorizeRoles\s*\(([^)]*)\)|permission_required\s*\(\s*['\"]([\w:.-]+)['\"]")
+# F5: a call to a decoder/parser named "unsafe"/"unverified"/"noVerify"/"skipVerify"
+# (e.g. decodeJwtPayloadUnsafe) — dangerous when its result feeds an auth decision.
+UNSAFE_DECODER = re.compile(r"\b([A-Za-z_]\w*(?:[Uu]nsafe|[Uu]nverified|[Nn]o[Vv]erif\w*|[Ss]kip[Vv]erif\w*)\w*)\s*\(")
+# does this file actually make an auth/identity decision? (so the unsafe decode matters)
+AUTH_CONTEXT = re.compile(
+    r"require(?:Auth|Admin|Role|Permission)|isAdmin|authoriz|getToken\s*\(|getServerSession|"
+    r"req\.auth\b|currentUser|jwt\.(?:decode|verify)|decodeJwt", re.I)
 def _parse_next_middleware(ctx: RepoContext) -> dict:
     # Next 15.5+/16 renamed `middleware.ts` → `proxy.ts` (both filenames are valid; the
@@ -127,6 +135,13 @@ class AuthzExtractor(Extractor):
                 if e.get("method") in WRITE_VERBS and not PUBLIC_HINT.search(e.get("path", "")):
                     no_guard_writes.append(f"{e['method']} {e['path']}  ({relcp or '?'})")
+        # F5: files that make an auth decision AND call an unsafe/unverified decoder
+        unsafe_decoders = []
+        for _p, rel, text in ctx.iter_code():
+            if AUTH_CONTEXT.search(text):
+                for dec in sorted(set(UNSAFE_DECODER.findall(text))):
+                    unsafe_decoders.append({"file": rel, "decoder": dec})
         if global_auth:
             where = f"`{mw['file']}` (matcher {mw.get('matchers') or '—'})" if mw_auth else "`app.use(<auth>)`"
             note = (f"A GLOBAL auth middleware ({where}) was detected — most routes are protected by default. "
@@ -146,5 +161,6 @@ class AuthzExtractor(Extractor):
                               "no_visible_guard": no_guard, "unknown": unknown},
             "endpoint_guards": egs[:400],
             "write_endpoints_without_visible_guard": sorted(set(no_guard_writes))[:60],
+            "unsafe_auth_decoders": unsafe_decoders[:30],
             "note": note,
         }

{websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/extractors/base.py RENAMED Viewed

@@ -9,6 +9,7 @@ still say something useful.
 from __future__ import annotations
+import fnmatch
 from pathlib import Path
 SKIP_DIRS = {".git", "node_modules", "dist", "build", ".next", ".nuxt", "venv",
@@ -27,13 +28,17 @@ MAX_BYTES = 2_000_000
 class RepoContext:
     """Walk the tree once; cache file text; serve cheap queries to every extractor."""
-    def __init__(self, root: Path):
+    def __init__(self, root: Path, excludes: list | None = None):
         self.root = root
+        self.excludes = [e for e in (excludes or []) if e]   # user --exclude paths/globs
         self._text: dict[Path, str] = {}
         self.code_files: list[Path] = []
         self.stack: dict = {}          # filled by StackExtractor, read by the rest
         self._walk()
+    def _excluded(self, rel: str) -> bool:
+        return any(ex in rel or fnmatch.fnmatch(rel, ex) for ex in self.excludes)
     def _walk(self) -> None:
         n = 0
         for p in self.root.rglob("*"):
@@ -44,6 +49,8 @@ class RepoContext:
             # have its whole tree skipped.
             if p.is_dir() or any(part in SKIP_DIRS for part in p.relative_to(self.root).parts):
                 continue
+            if self.excludes and self._excluded(str(p.relative_to(self.root))):
+                continue
             if p.suffix.lower() in CODE_EXT:
                 self.code_files.append(p)
                 n += 1

{websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/extractors/routes.py RENAMED Viewed

@@ -38,6 +38,20 @@ TRAVERSAL_NAMES = re.compile(r"^(file|filename|filepath|path|dir|folder|template
 TEMPLATED = ("BASE_URL", "localhost", "127.0.0.1", "${", "{{")
 ASSET_GLOB = re.compile(r"\*\.\w+")
+# A route whose source file is a vendored/third-party API SPEC (OpenAPI/Swagger/GraphQL
+# schema), not an app handler. Noir parses these and emits their paths as if the app
+# served them — which on a repo that vendors e.g. a 16k-line swagger turns ~15 real
+# findings into hundreds of phantom ones. We split these out as informational.
+SPEC_PATH = re.compile(
+    r"\.(?:ya?ml|graphql|gql|raml)$"                                  # spec file formats
+    r"|(?:^|/)(?:node_modules|vendor|vendored|third[_-]?party|examples?|schemas?"
+    r"|(?:docs?|documentation)[\w-]*)/"                               # vendor/docs/schema dirs
+    r"|swagger|openapi", re.I)
+def _is_spec_derived(code_path: str) -> bool:
+    return bool(code_path) and bool(SPEC_PATH.search(code_path))
 def _clean_path(p: str) -> str:
     p = re.sub(r":(\w+)", r"{\1}", p)    # Express :id  -> {id}
@@ -53,16 +67,17 @@ def _is_noise(path: str) -> bool:
     return bool(ASSET_GLOB.search(path))   # static-asset glob route (/*.png)
-def _noir_scan(root: Path) -> list | None:
+def _noir_scan(root: Path, extra_excludes: list | None = None) -> list | None:
     """Run Noir → list of endpoint dicts, or None if Noir unavailable/failed."""
     if not shutil.which("noir"):
         return None
+    excl = EXCLUDE_GLOBS + ("," + ",".join(extra_excludes) if extra_excludes else "")
     with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tf:
         out = Path(tf.name)
     try:
         proc = subprocess.run(
             ["noir", "scan", str(root), "-f", "json", "-o", str(out),
-             "--exclude-path", EXCLUDE_GLOBS, "--no-log", "--no-color"],
+             "--exclude-path", excl, "--no-log", "--no-color"],
             capture_output=True, text=True, timeout=300)
         if not out.exists():
             return None
@@ -77,8 +92,10 @@ def _noir_scan(root: Path) -> list | None:
             pass
-def _normalize_noir(eps: list) -> list:
-    rows, seen = [], set()
+def _normalize_noir(eps: list) -> tuple:
+    """→ (app_routes, spec_derived_routes). Routes whose source file is a vendored API
+    spec are split out so they don't generate phantom findings (B1)."""
+    rows, spec, seen = [], [], set()
     for e in eps:
         if e.get("internal"):
             continue
@@ -89,21 +106,22 @@ def _normalize_noir(eps: list) -> list:
         if _is_noise(path):
             continue
         method = (e.get("method") or "GET").upper()
-        if (method, path) in seen:
-            continue
-        seen.add((method, path))
-        params = [{"name": p.get("name", ""), "where": p.get("param_type", "")}
-                  for p in (e.get("params") or [])]
         cp = (e.get("details", {}) or {}).get("code_paths") or [{}]
-        rows.append({
+        code_path = cp[0].get("path", "")
+        if (method, path, code_path) in seen:
+            continue
+        seen.add((method, path, code_path))
+        row = {
             "method": method,
             "path": path,
-            "params": params,
+            "params": [{"name": p.get("name", ""), "where": p.get("param_type", "")}
+                       for p in (e.get("params") or [])],
             "technology": (e.get("details", {}) or {}).get("technology", ""),
-            "code_path": cp[0].get("path", ""),
+            "code_path": code_path,
             "source": "noir",
-        })
-    return rows
+        }
+        (spec if _is_spec_derived(code_path) else rows).append(row)
+    return rows, spec
 # ---- regex fallback (only when Noir is absent) ---------------------------------------------
@@ -193,19 +211,23 @@ class RoutesExtractor(Extractor):
     category = "surface"
     def extract(self, ctx: RepoContext, facts: dict) -> dict:
-        eps = _noir_scan(ctx.root)
+        eps = _noir_scan(ctx.root, getattr(ctx, "excludes", None))
         if eps is not None:
-            routes = _normalize_noir(eps)
+            routes, spec_derived = _normalize_noir(eps)
             engine = "noir"
         else:
-            routes = _fallback(ctx)
+            routes, spec_derived = _fallback(ctx), []
             engine = "regex-fallback (install OWASP Noir for full coverage: brew install noir)"
+        # honor user --exclude against route code_paths too (Noir's own --exclude-path glob is
+        # unreliable for bare dir names; this guarantees `--exclude <path>` drops those routes).
+        if getattr(ctx, "excludes", None):
+            routes = [r for r in routes if not ctx._excluded(r.get("code_path", ""))]
         by_method: dict = {}
         by_tech: dict = {}
         for r in routes:
             by_method[r["method"]] = by_method.get(r["method"], 0) + 1
             by_tech[r["technology"]] = by_tech.get(r["technology"], 0) + 1
-        return {
+        out = {
             "engine": engine,
             "count": len(routes),
             "by_method": by_method,
@@ -213,3 +235,12 @@ class RoutesExtractor(Extractor):
             "endpoints": routes,
             "targeting": _derive(routes),
         }
+        if spec_derived:
+            from collections import Counter
+            srcs = Counter(r["code_path"] for r in spec_derived)
+            out["spec_derived_excluded"] = len(spec_derived)
+            out["spec_derived_sources"] = [f"{n}× {f}" for f, n in srcs.most_common(8)]
+            out["note"] = (f"⚠ {len(spec_derived)} routes came from vendored API SPEC files "
+                           f"(OpenAPI/Swagger/GraphQL), not app handlers — EXCLUDED from the {len(routes)} "
+                           f"app routes + all findings. Sources: {', '.join(f for f, _ in srcs.most_common(5))}.")
+        return out

{websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/findings.py RENAMED Viewed

@@ -26,6 +26,9 @@ from . import calibration
 STANDARDS = {
     "missing-auth": (["CWE-862 Missing Authorization", "CWE-306 Missing Authentication"],
                      "ASVS V4.1.1", ["API1:2023 BOLA", "API5:2023 BFLA"]),
+    "unsafe-auth-decoder": (["CWE-347 Improper Verification of Cryptographic Signature",
+                             "CWE-345 Insufficient Verification of Data Authenticity"],
+                            "ASVS V3.5.2", ["API2:2023 Broken Authentication"]),
     "bola": (["CWE-639 Authorization Bypass (IDOR)"], "ASVS V4.2.1", ["API1:2023 BOLA"]),
     "ssrf": (["CWE-918 SSRF"], "ASVS V12.6", ["API7:2023 SSRF"]),
     "secret": (["CWE-798 Hard-coded Credentials"], "ASVS V2.10", ["API8:2023 Misconfiguration"]),
@@ -48,6 +51,9 @@ REMEDIATION = {
     "missing-auth": "Add an auth guard to the handler (e.g. requireAuth()/getServerSession()), or a "
                     "middleware matcher over /api/(.*) with an explicit public allowlist so it can't be forgotten.",
     "bola": "Enforce object ownership: verify the authenticated principal owns/can access the resource id (tenant scope).",
+    "unsafe-auth-decoder": "Verify the token/signature before trusting it for an auth/identity decision — use a "
+                           "verifying decode (e.g. jwt.verify with the key / a checked session), never an *Unsafe* "
+                           "or decode-only path whose output then feeds requireAuth/requireAdmin.",
     "ssrf": "Validate + allowlist outbound URLs; block RFC1918/IMDS/file://; never fetch a raw user-supplied URL.",
     "secret": "Rotate the credential, remove from code/history, load from a secrets manager.",
     "cve": "Upgrade the dependency to the fixed version.",
@@ -150,6 +156,14 @@ def build_ledger(facts: dict, unified: dict | None, dynamic: dict | None = None,
                       [{"layer": "dynamic", "detail": f"cross-tenant GET returned another tenant's data "
                         f"(HTTP {lk.get('status')}, {lk.get('direction')})"}]))
+    # ---- 1c. Unsafe/unverified decoder feeding an auth decision (F5) ----
+    for ud in ((facts.get("authz", {}) or {}).get("unsafe_auth_decoders", []) or []):
+        out.append(_f(f"Auth decision uses an unverified decoder: {ud.get('decoder')}", "access-control",
+                      "unsafe-auth-decoder", "HIGH", "MEDIUM", ud.get("file", ""),
+                      [{"layer": "recon", "detail": f"{ud.get('file')} makes an auth/identity decision AND calls "
+                        f"{ud.get('decoder')}() — if that decodes a token/signature WITHOUT verifying it, a forged "
+                        "value is trusted (the decodeJwtPayloadUnsafe → requireAdmin class of bug). Trace the call path."}]))
     # ---- 2. Static scanner findings (de-duplicated `unified`) ----
     cat_to_class = {"sca": "cve", "secret": "secret", "iac": "iac", "sast": "sast"}
     for t in (unified or {}).get("top", []):

{websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/probes.py RENAMED Viewed

@@ -136,6 +136,11 @@ def stage(chosen: list, outdir: Path, facts: dict | None = None) -> list:
     manifest = [{"key": "_context", "file": "probes/probe-context.json",
                  "note": "the target's real routes/auth/fields — finalize the drafts against this"}]
     src_root = resources.files("websec_validator").joinpath("templates/probes")
+    # always ship the shared helper the Python probes import (load context + env auth)
+    try:
+        (dest / "_lib.py").write_text(src_root.joinpath("_lib.py").read_text())
+    except Exception:
+        pass
     for key in chosen:
         fname, attack, needs = PROBES[key]
         targets = (tgt.get(_TARGET_KEYS[key], []) if key in _TARGET_KEYS else [])[:15]

{websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/recon.py RENAMED Viewed

@@ -14,8 +14,8 @@ from .extractors.base import RepoContext
 from .extractors.stack import StackExtractor
-def build_facts(root: Path, version: str) -> dict:
-    return extractors.run_all(root, version)
+def build_facts(root: Path, version: str, excludes: list | None = None) -> dict:
+    return extractors.run_all(root, version, excludes)
 def write_facts(facts: dict, out: Path) -> Path:

{websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/report.py RENAMED Viewed

@@ -43,8 +43,12 @@ def render(facts: dict, scanners: dict, scan_results: list, unified: dict | None
             chain = " → ".join(e["layer"] for e in f["evidence"])
             api = (" · " + ", ".join(f["standards"]["owasp_api"])) if f["standards"]["owasp_api"] else ""
             cal = f.get("calibrated") or {}
-            calstr = (f" · P(real)≈**{cal.get('p')}** CI {cal.get('ci')} (n={cal.get('n')}, {cal.get('basis')})"
-                      if cal else "")
+            if not cal:
+                calstr = ""
+            elif cal.get("n", 0) == 0 or str(cal.get("basis", "")).startswith("prior"):
+                calstr = " · P(real): _uncalibrated — verify manually_"   # don't dress n=0 as a measurement (B4)
+            else:
+                calstr = f" · P(real)≈**{cal.get('p')}** CI {cal.get('ci')} (n={cal.get('n')}, {cal.get('basis')})"
             _ll.append(f"- **[{f['severity']}/{f['confidence']}]** {f['title']}  \n"
                        f"  `{f['location']}` · evidence: {chain} · {cwe}{api}{calstr}  \n"
                        f"  _fix:_ {f['remediation']}")
@@ -68,7 +72,7 @@ def render(facts: dict, scanners: dict, scan_results: list, unified: dict | None
 | | |
 |---|---|
 | Stack | {", ".join(stack.get("languages", [])) or "?"} · {", ".join(stack.get("frameworks", [])) or "?"} · {", ".join(stack.get("datastores", [])) or "?"} |
-| Endpoints | **{routes.get('count', 0)}** (via {routes.get('engine','?').split(' ')[0]}) |
+| Endpoints | **{routes.get('count', 0)}** app routes (via {routes.get('engine','?').split(' ')[0]}){(" · " + str(routes.get('spec_derived_excluded')) + " spec-derived excluded") if routes.get('spec_derived_excluded') else ""} |
 | Auth | {facts.get('auth', {}).get('scheme','?')} · roles: {', '.join(authz.get('roles_detected', [])) or 'none'} |
 | Access control | {gs.get('with_visible_guard', 0)} guarded · **{gs.get('no_visible_guard', 0)} no visible guard** · global-middleware: {authz.get('global_auth_middleware', False)} |
 | Static scanner (raw, pre-triage) | {sev_line} |

{websec_validator-0.2.2 → websec_validator-0.2.4}/src/websec_validator/scanners.py RENAMED Viewed

@@ -40,27 +40,27 @@ EXCLUDE_DIRS = ("websec-out", "node_modules", ".next", "dist", "build", ".git",
                 "security", ".venv", "venv", "__pycache__", ".mypy_cache", "coverage")
-def _trivy(target: Path, out: Path) -> list:
+def _trivy(target: Path, out: Path, excludes=()) -> list:
     # SCA + secrets + IaC misconfig in one pass; pinned by the user's install.
     cmd = ["trivy", "fs", "--scanners", "vuln,secret,misconfig", "--format", "json", "--output", str(out)]
-    for d in EXCLUDE_DIRS:
+    for d in list(EXCLUDE_DIRS) + list(excludes):
         cmd += ["--skip-dirs", d]
     return cmd + [str(target)]
-def _gitleaks(target: Path, out: Path) -> list:
+def _gitleaks(target: Path, out: Path, excludes=()) -> list:
     return ["gitleaks", "detect", "--source", str(target), "--no-banner",
             "--report-format", "json", "--report-path", str(out)]
-def _semgrep(target: Path, out: Path) -> list:
+def _semgrep(target: Path, out: Path, excludes=()) -> list:
     cmd = ["semgrep", "scan", "--config", "auto", "--json", "--output", str(out)]
-    for d in EXCLUDE_DIRS:
+    for d in list(EXCLUDE_DIRS) + list(excludes):
         cmd += ["--exclude", d]
     return cmd + [str(target)]
-def _checkov(target: Path, out: Path) -> list:
+def _checkov(target: Path, out: Path, excludes=()) -> list:
     return ["checkov", "-d", str(target), "--compact", "-o", "json",
             "--output-file-path", str(out.parent)]
@@ -103,26 +103,31 @@ def detect(stack_languages: list | None = None) -> dict:
 def run_available(target: Path, outdir: Path, stack_languages: list | None = None,
-                  timeout: int = 600) -> list:
+                  timeout: int = 600, excludes: list | None = None, only: list | None = None) -> list:
     """Execute every available, runnable static scanner. Returns per-scanner status.
+    `excludes`: extra paths/dirs to skip (--exclude). `only`: run just these scanner keys.
     Raw JSON lands in outdir/scanners/<key>.json. We capture status only here;
     cross-tool normalization + de-duplication is a separate (next) step.
     """
     langs = set(stack_languages or [])
+    excludes = excludes or []
+    only = set(only) if only else None
     scan_dir = outdir / "scanners"
     scan_dir.mkdir(parents=True, exist_ok=True)
     results = []
     for s in REGISTRY:
         if s.argv is None:
             continue  # detect-only for now
+        if only is not None and s.key not in only:
+            continue
         if s.languages and not (set(s.languages) & langs):
             continue
         if not shutil.which(s.binary):
             continue
         out_file = scan_dir / f"{s.key}.json"
         try:
-            proc = subprocess.run(s.argv(target, out_file), capture_output=True,
+            proc = subprocess.run(s.argv(target, out_file, excludes), capture_output=True,
                                   text=True, timeout=timeout)
             results.append({"key": s.key, "name": s.name, "category": s.category,
                             "exit_code": proc.returncode, "output": str(out_file),

websec_validator-0.2.4/src/websec_validator/templates/probes/_lib.py ADDED Viewed

@@ -0,0 +1,90 @@
+"""Shared probe helpers — load THIS target's real surface from probe-context.json
+(written by `websec run`) and auth/ids from environment variables.
+Why env vars: recon gives you the real endpoints, auth scheme, and tenant key — but it
+cannot mint live tokens or know real object ids. You (or your agent, against a TEST
+instance) supply those:
+    TARGET=http://localhost:3000          # base URL (or set target_base_url in probe-context.json)
+    TOKEN_A=...  TOKEN_B=...               # bearer JWTs for two test accounts (different tenants)
+    COOKIE_A=...  COOKIE_B=...             # OR session cookies (e.g. NextAuth) instead of bearer
+    APIKEY=...                             # OR an API key
+    OBJ_A=...  OBJ_B=...                   # a sample object id owned by each account/tenant
+    GROUP_A=...  GROUP_B=...               # each account's tenant/group id (defaults to OBJ_* if unset)
+Run only against a TEST instance you're authorized to probe. Never production.
+"""
+import json
+import os
+import subprocess
+import sys
+from pathlib import Path
+_HERE = Path(__file__).resolve().parent
+def context() -> dict:
+    p = _HERE / "probe-context.json"
+    if not p.is_file():
+        sys.exit("probe-context.json not found next to this probe — run `websec run <repo>` and use "
+                 "the probes/ it stages (probe-context.json holds this app's real routes/auth).")
+    return json.loads(p.read_text())
+def base_url() -> str:
+    u = os.environ.get("TARGET") or context().get("target_base_url", "")
+    if not u or u.startswith("FILL"):
+        sys.exit("Set TARGET=http://host:port (or fill target_base_url in probe-context.json).")
+    return u.rstrip("/")
+def auth_headers(role: str = "A") -> list:
+    """Auth header for a role (A/B), adapting to whatever the operator supplied."""
+    tok = os.environ.get(f"TOKEN_{role}")
+    cookie = os.environ.get(f"COOKIE_{role}")
+    apikey = os.environ.get("APIKEY")
+    if tok:
+        return ["-H", f"Authorization: Bearer {tok}"]
+    if cookie:
+        return ["-H", f"Cookie: {cookie}"]
+    if apikey:
+        return ["-H", f"X-API-Key: {apikey}"]
+    return []  # unauthenticated
+def require(*names: str) -> None:
+    missing = [n for n in names if not os.environ.get(n)]
+    if missing:
+        sys.exit(f"This probe needs these env var(s): {', '.join(missing)}. See _lib.py for the list.")
+def curl(method: str, url: str, headers=None, body=None, timeout: int = 20):
+    """Returns (status_code, body_text). Never raises on HTTP errors."""
+    cmd = ["curl", "-s", "-X", method, url, "-w", "\nHTTP_CODE:%{http_code}",
+           "--max-time", str(timeout)] + (headers or [])
+    if body is not None:
+        cmd += ["-H", "content-type: application/json", "-d", json.dumps(body)]
+    out = subprocess.run(cmd, capture_output=True, text=True).stdout
+    code = int(out.split("HTTP_CODE:")[-1].strip()) if "HTTP_CODE:" in out else 0
+    return code, out.split("\nHTTP_CODE:")[0]
+def tenant_key(default: str = "groupId") -> str:
+    keys = context().get("tenant_keys") or []
+    return keys[0] if keys else default
+def write_endpoints() -> list:
+    """[(METHOD, path), …] for this app's mutating routes, from probe-context.json."""
+    out = []
+    for ep in context().get("endpoints", {}).get("writes", []):
+        parts = ep.split(" ", 1)
+        if len(parts) == 2:
+            out.append((parts[0], parts[1]))
+    return out
+def save(name: str, findings: list) -> Path:
+    out = _HERE / f"{name}-findings.json"
+    out.write_text(json.dumps(findings, indent=2) + "\n")
+    return out

websec_validator-0.2.4/src/websec_validator/templates/probes/bola-cross-tenant.sh ADDED Viewed

@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+# BOLA / cross-tenant READ probe — FACTS-driven. Role A uses its OWN token against
+# tenant B's id (and B→A), on this app's tenant-scoped routes (from probe-context.json).
+# Expect 401/403/404. A 200 that returns the OTHER tenant's data = cross-tenant BOLA
+# (OWASP API #1) — the thing an automated scanner can't tell from "just another 200".
+#
+# Env: TARGET, TOKEN_A, TOKEN_B (two accounts in DIFFERENT tenants), GROUP_A, GROUP_B
+# (each account's tenant/group id). Bearer auth; cookie users: swap the -H below.
+# Run only against a TEST instance you're authorized to probe.
+set -uo pipefail
+cd "$(dirname "$0")"
+ctx=probe-context.json
+BASE="${TARGET:-$(python3 -c "import json;print(json.load(open('$ctx'))['target_base_url'])" 2>/dev/null)}"
+if [ -z "${BASE:-}" ] || [ "${BASE#FILL}" != "$BASE" ]; then echo "Set TARGET=http://host:port (or fill probe-context.json)"; exit 2; fi
+: "${TOKEN_A:?set TOKEN_A=<jwt for an account in tenant A>}"
+: "${TOKEN_B:?set TOKEN_B=<jwt for an account in a DIFFERENT tenant>}"
+: "${GROUP_A:?set GROUP_A=<tenant/group id of account A>}"
+: "${GROUP_B:?set GROUP_B=<tenant/group id of account B>}"
+PATHS=()   # (portable; macOS bash 3.2 lacks `mapfile`)
+while IFS= read -r line; do [ -n "$line" ] && PATHS+=("$line"); done < <(python3 -c "
+import json
+c = json.load(open('$ctx'))['endpoints']
+cand = c.get('idor_candidates') or [w.split(' ',1)[1] for w in c.get('writes',[]) if ' ' in w]
+for p in cand:
+    print(p.split(' ',1)[1] if (' ' in p and p.split(' ',1)[0].isupper()) else p)
+" 2>/dev/null)
+[ "${#PATHS[@]}" -eq 0 ] && { echo "No tenant-scoped / IDOR-candidate routes in probe-context.json."; exit 2; }
+pass=0; leak=0
+attack() {  # $1=token $2=target-group-id $3=label
+  for raw in "${PATHS[@]}"; do
+    path=$(python3 -c "import re,sys; print(re.sub(r'\{[^}]+\}', sys.argv[1], sys.argv[2]))" "$2" "$raw")
+    code=$(curl -s -o /dev/null -w '%{http_code}' -m 15 -H "Authorization: Bearer $1" "$BASE$path")
+    case "$code" in
+      401|403|404) printf '  ok    %s  %-4s %s\n' "$code" "$3" "$path"; pass=$((pass+1)) ;;
+      200|206)     printf '  LEAK  %s  %-4s %s   ← returned data for the OTHER tenant? verify\n' "$code" "$3" "$path"; leak=$((leak+1)) ;;
+      *)           printf '  ??    %s  %-4s %s\n' "$code" "$3" "$path" ;;
+    esac
+  done
+}
+echo "=== cross-tenant BOLA vs $BASE   (expect 401/403/404) ==="
+echo "--- A → B's tenant ($GROUP_B) ---"; attack "$TOKEN_A" "$GROUP_B" "A→B"
+echo "--- B → A's tenant ($GROUP_A) ---"; attack "$TOKEN_B" "$GROUP_A" "B→A"
+echo "summary: $pass blocked · $leak potential leak(s)"
+[ "$leak" -gt 0 ] && echo "A 200 means the route served the OTHER tenant's id — confirm it's actually their data (not empty / your own), then debate-verify before reporting."
+exit "$leak"

websec-validator 0.2.2__tar.gz → 0.2.4__tar.gz

websec-validator 0.2.2tar.gz → 0.2.4tar.gz