PyPI - websec-validator - Versions diffs - 0.2.7__tar.gz → 0.2.9__tar.gz - Mend

websec-validator 0.2.7tar.gz → 0.2.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

{websec_validator-0.2.7/src/websec_validator.egg-info → websec_validator-0.2.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: websec-validator
-Version: 0.2.7
+Version: 0.2.9
 Summary: Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app.
 Author: Ricardo Accioly
 License: MIT

{websec_validator-0.2.7 → websec_validator-0.2.9}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "websec-validator"
-version = "0.2.7"
+version = "0.2.9"
 description = "Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app."
 readme = "README.md"
 requires-python = ">=3.11"

{websec_validator-0.2.7 → websec_validator-0.2.9}/src/websec_validator/extractors/auth.py RENAMED Viewed

@@ -63,6 +63,7 @@ class AuthExtractor(Extractor):
         # Detect ALL schemes present, then pick a primary by priority. A JWT app
         # that also wires Passport for SSO must read as primary=jwt, not passport
         # (Passport is often SSO-only). Priority: nextauth > jwt > session > passport > api-key.
+        route_count = len(routes.get("endpoints", []))
         detected = []
         if nextauth:
             detected.append("nextauth (session JWT in cookie)")
@@ -88,6 +89,11 @@ class AuthExtractor(Extractor):
             "cookie_names": cookie_names[:15],
             "guard_files": guard_files,
             "signal_counts": {"jwt": jwt, "passport": passport, "session": session, "api_key": apikey},
-            "note": "AGENT: confirm the PRIMARY auth flow + how a test token is minted before the JWT/auth "
-                    "probes. Multiple schemes often mean primary bearer/session + secondary SSO (passport).",
+            "route_count": route_count,
+            "reliable_signal": route_count > 0 or bool(nextauth),
+            "note": (("⚠ No HTTP routes detected — this auth scheme is LOW-CONFIDENCE (likely a "
+                      "library/CLI/scanner that merely mentions auth, or routes weren't parsed). "
+                      if not (route_count > 0 or nextauth) else "")
+                     + "AGENT: confirm the PRIMARY auth flow + how a test token is minted before the "
+                     "JWT/auth probes. Multiple schemes often mean primary bearer/session + secondary SSO."),
         }

{websec_validator-0.2.7 → websec_validator-0.2.9}/src/websec_validator/extractors/authz.py RENAMED Viewed

@@ -142,6 +142,28 @@ class AuthzExtractor(Extractor):
                 for dec in sorted(set(UNSAFE_DECODER.findall(text))):
                     unsafe_decoders.append({"file": rel, "decoder": dec})
+        # A guard DEFINED in a file that also calls an unsafe/unverified decoder authenticates via
+        # an unverified decode. Routes that call such a guard are the static "at-risk" set for the
+        # forged-token bypass class — the dynamic probe confirms which actually fall, but this points
+        # at them even with NO live target (turns the F5 hypothesis into named routes).
+        unverified_routes: list = []
+        unsafe_files = {ud["file"] for ud in unsafe_decoders}
+        if unsafe_files:
+            guard_def = re.compile(r"(?:export\s+)?(?:async\s+)?(?:function|const)\s+"
+                                   r"(require\w+|ensure\w+|\w*[Aa]uth\w*|verify\w+)\b")
+            unsafe_guards = set()
+            for _p, rel, text in ctx.iter_code():
+                if rel in unsafe_files:
+                    unsafe_guards.update(g for g in guard_def.findall(text) if len(g) >= 5)
+            if unsafe_guards:
+                call = re.compile(r"\b(?:" + "|".join(re.escape(g) for g in sorted(unsafe_guards)) + r")\s*\(")
+                for e in endpoints:
+                    cp = e.get("code_path", "")
+                    t = ctx.text(Path(cp)) if cp else ""
+                    if t and call.search(t):
+                        unverified_routes.append(f"{e.get('method')} {e.get('path')}")
+            unverified_routes = sorted(set(unverified_routes))[:60]
         if global_auth:
             where = f"`{mw['file']}` (matcher {mw.get('matchers') or '—'})" if mw_auth else "`app.use(<auth>)`"
             note = (f"A GLOBAL auth middleware ({where}) was detected — most routes are protected by default. "
@@ -162,5 +184,6 @@ class AuthzExtractor(Extractor):
             "endpoint_guards": egs[:400],
             "write_endpoints_without_visible_guard": sorted(set(no_guard_writes))[:60],
             "unsafe_auth_decoders": unsafe_decoders[:30],
+            "unverified_signature_routes": unverified_routes,
             "note": note,
         }

websec_validator-0.2.9/src/websec_validator/extractors/tenant.py ADDED Viewed

@@ -0,0 +1,42 @@
+"""Tenant-boundary extractor — the multi-tenancy key candidates.
+The single most important and easiest-to-get-wrong fact for BOLA testing. The
+tool reports candidates by frequency; the agent confirms THE one with the human.
+"""
+from __future__ import annotations
+from .base import Extractor, RepoContext
+TENANT_KEYS = ["groupId", "group_id", "orgId", "org_id", "organizationId",
+               "tenantId", "tenant_id", "workspaceId", "workspace_id",
+               "accountId", "account_id", "companyId", "company_id",
+               "teamId", "team_id", "projectId", "project_id"]
+class TenantExtractor(Extractor):
+    name = "tenant"
+    category = "authz"
+    def extract(self, ctx: RepoContext, facts: dict) -> dict:
+        route_count = len((facts.get("routes") or {}).get("endpoints", []))
+        hits: dict = {}
+        files: dict = {}
+        for _p, rel, text in ctx.iter_code():
+            for key in TENANT_KEYS:
+                c = text.count(key)
+                if c:
+                    hits[key] = hits.get(key, 0) + c
+                    bucket = files.setdefault(key, [])
+                    if rel not in bucket and len(bucket) < 5:
+                        bucket.append(rel)
+        ranked = sorted(hits.items(), key=lambda kv: -kv[1])
+        return {
+            "candidates": [{"key": k, "occurrences": n, "files": files.get(k, [])} for k, n in ranked[:6]],
+            "multi_tenant_likely": bool(route_count > 0 and ranked and ranked[0][1] >= 3),
+            "route_count": route_count,
+            "note": ("AGENT: confirm with the human which key (if any) is THE tenant boundary. "
+                     "If single-tenant, skip the cross-tenant BOLA probes."
+                     + ("  ⚠ No HTTP routes detected — a tenant key here may be a string in "
+                        "library/scanner code, not a real boundary." if route_count == 0 else "")),
+        }

{websec_validator-0.2.7 → websec_validator-0.2.9}/src/websec_validator/findings.py RENAMED Viewed

@@ -157,12 +157,18 @@ def build_ledger(facts: dict, unified: dict | None, dynamic: dict | None = None,
                         f"(HTTP {lk.get('status')}, {lk.get('direction')})"}]))
     # ---- 1c. Unsafe/unverified decoder feeding an auth decision (F5) ----
-    for ud in ((facts.get("authz", {}) or {}).get("unsafe_auth_decoders", []) or []):
+    _authz = facts.get("authz", {}) or {}
+    _uvr = _authz.get("unverified_signature_routes", []) or []
+    for ud in (_authz.get("unsafe_auth_decoders", []) or []):
+        ev = [{"layer": "recon", "detail": f"{ud.get('file')} makes an auth/identity decision AND calls "
+               f"{ud.get('decoder')}() — if that decodes a token/signature WITHOUT verifying it, a forged "
+               "value is trusted (the decodeJwtPayloadUnsafe → requireAdmin class of bug). Trace the call path."}]
+        if _uvr:
+            ev.append({"layer": "recon", "detail": f"static at-risk routes ({len(_uvr)}) — call a guard defined "
+                       f"alongside this unverified decode: {', '.join(_uvr[:8])}{' …' if len(_uvr) > 8 else ''}. "
+                       "Run `websec dynamic --unauth` / the forged-token probe to confirm which accept a forged token."})
         out.append(_f(f"Auth decision uses an unverified decoder: {ud.get('decoder')}", "access-control",
-                      "unsafe-auth-decoder", "HIGH", "MEDIUM", ud.get("file", ""),
-                      [{"layer": "recon", "detail": f"{ud.get('file')} makes an auth/identity decision AND calls "
-                        f"{ud.get('decoder')}() — if that decodes a token/signature WITHOUT verifying it, a forged "
-                        "value is trusted (the decodeJwtPayloadUnsafe → requireAdmin class of bug). Trace the call path."}]))
+                      "unsafe-auth-decoder", "HIGH", "MEDIUM", ud.get("file", ""), ev))
     # ---- 1d. Forged-token acceptance — unverified signature, DYNAMICALLY CONFIRMED ----
     # The verdict for 1c: we presented an UNSIGNED/bogus-sig token and the route reached its

{websec_validator-0.2.7 → websec_validator-0.2.9}/src/websec_validator/scanners.py RENAMED Viewed

@@ -217,6 +217,27 @@ def _generic_secret(rule: str) -> bool:
     return r in _GENERIC_SECRET_RULES or "generic" in r or "entropy" in r
+# Secrets matched in DOCUMENTATION / EXAMPLE files are overwhelmingly placeholders, not live
+# credentials — e.g. `curl -H "Authorization: Bearer <token>"` in a README/API doc, or a
+# value in `.env.example`. Tier those to LOW + a verify note (still visible — a real key CAN be
+# pasted into docs by mistake). Dogfooding flagged 4 HIGH curl-auth-header FPs across an API's
+# README + docs/*.md (bug below).
+_DOC_EXT = (".md", ".mdx", ".markdown", ".rst", ".txt", ".adoc")
+_DOC_DIR_MARKERS = ("/docs/", "/doc/", "/examples/", "/example/", "/samples/", "/sample/", "/.github/")
+_DOC_NAME_PREFIX = ("readme", "changelog", "contributing", "license", "authors", "history", "notice")
+_EXAMPLE_SUFFIX = (".example", ".sample", ".dist", ".template", ".tmpl")
+_DOC_NOTE = "in a documentation/example file — almost always a placeholder, verify before treating as real"
+def _is_doc_or_example(path: str) -> bool:
+    p = (path or "").replace("\\", "/").lower()
+    base = p.rsplit("/", 1)[-1]
+    return (p.endswith(_DOC_EXT)
+            or any(m in p for m in _DOC_DIR_MARKERS)
+            or any(base.startswith(m) for m in _DOC_NAME_PREFIX)
+            or any(s in base for s in _EXAMPLE_SUFFIX))
 def _norm_trivy(data: dict) -> list:
     out = []
     for res in (data.get("Results") or []):
@@ -231,6 +252,8 @@ def _norm_trivy(data: dict) -> list:
             sev, note = _aws_secret_tier(s.get("Match", ""), s.get("Code", "") or "")
             if not sev and _generic_secret(rid):
                 sev, note = "MEDIUM", _GENERIC_NOTE
+            if _is_doc_or_example(tgt):
+                sev, note = "LOW", (note + "; " if note else "") + _DOC_NOTE
             title = f"secret: {s.get('Title') or rid}" + (f" — {note}" if note else "")
             out.append({"tool": "trivy", "category": "secret", "severity": sev or _sev(s.get("Severity") or "HIGH"),
                         "key": rid, "file": tgt, "line": s.get("StartLine", 0),
@@ -250,6 +273,8 @@ def _norm_gitleaks(data) -> list:
         sev, note = _aws_secret_tier(x.get("Secret", ""), x.get("Match", ""))
         if not sev and _generic_secret(rule):
             sev, note = "MEDIUM", _GENERIC_NOTE
+        if _is_doc_or_example(f):
+            sev, note = "LOW", (note + "; " if note else "") + _DOC_NOTE
         title = f"secret: {(x.get('Description') or rule)[:80]}" + (f" — {note}" if note else "")
         out.append({"tool": "gitleaks", "category": "secret", "severity": sev or "HIGH",
                     "key": rule, "file": f, "line": x.get("StartLine", 0),

{websec_validator-0.2.7 → websec_validator-0.2.9/src/websec_validator.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: websec-validator
-Version: 0.2.7
+Version: 0.2.9
 Summary: Local-first security recon that briefs your AI coding agent: facts + tailored probe scripts, code-in / artifacts-out. No LLM, no server, no running app.
 Author: Ricardo Accioly
 License: MIT

{websec_validator-0.2.7 → websec_validator-0.2.9}/tests/test_hardening.py RENAMED Viewed

@@ -18,7 +18,9 @@ sys.path.insert(0, str(ROOT / "src"))
 from websec_validator import dynamic, findings, probes, scanners  # noqa: E402
 from websec_validator.extractors.auth import AuthExtractor  # noqa: E402
+from websec_validator.extractors.authz import AuthzExtractor  # noqa: E402
 from websec_validator.extractors.base import RepoContext  # noqa: E402
+from websec_validator.extractors.tenant import TenantExtractor  # noqa: E402
 FACTS = {"routes": {"endpoints": [
     {"method": "GET", "path": "/api/bypass"},      # gated; accepts forged token  -> BYPASS
@@ -171,6 +173,34 @@ class SecretPrecisionTests(unittest.TestCase):
         self.assertEqual(hit["confidence"], "MEDIUM")
+class DocExampleSecretTests(unittest.TestCase):
+    """0.2.8: secrets in documentation/example files (curl examples in a README, .env.example
+    placeholders) tier to LOW + a verify note. Real code files are untouched."""
+    def test_is_doc_or_example(self):
+        self.assertTrue(scanners._is_doc_or_example("README.md"))
+        self.assertTrue(scanners._is_doc_or_example("docs/API-REFERENCE.md"))
+        self.assertTrue(scanners._is_doc_or_example(".env.example"))
+        self.assertTrue(scanners._is_doc_or_example("config/settings.sample.json"))
+        self.assertFalse(scanners._is_doc_or_example("src/app/route.ts"))
+    def test_gitleaks_doc_secret_to_low_code_stays_high(self):
+        rows = [
+            {"File": "README.md", "RuleID": "curl-auth-header", "Secret": "x" * 30, "Match": "Authorization: Bearer x", "StartLine": 1},
+            {"File": "src/server.ts", "RuleID": "private-key", "Secret": "-----BEGIN", "Match": "-----BEGIN", "StartLine": 1},
+        ]
+        by = {r["file"]: r for r in scanners._norm_gitleaks(rows)}
+        self.assertEqual(by["README.md"]["severity"], "LOW")
+        self.assertIn("documentation/example", by["README.md"]["title"])
+        self.assertEqual(by["src/server.ts"]["severity"], "HIGH")  # real code file untouched
+    def test_trivy_doc_secret_to_low(self):
+        data = {"Results": [{"Target": "docs/SECURITY.md", "Secrets": [
+            {"RuleID": "curl-auth-header", "Title": "Auth header", "Match": "Bearer x", "StartLine": 1}]}]}
+        secs = [f for f in scanners._norm_trivy(data) if f["category"] == "secret"]
+        self.assertEqual(secs[0]["severity"], "LOW")
 class CookieCoverageTests(unittest.TestCase):
     """0.2.7: extract auth cookie names so the forged-token engine covers cookie-ONLY apps."""
@@ -203,5 +233,61 @@ class CookieCoverageTests(unittest.TestCase):
         self.assertTrue(r["bypassed"][0]["via"].startswith("cookie:"))
+class NonWebAppFPTests(unittest.TestCase):
+    """0.2.9 (bug-081): on a 0-route repo (library/CLI/scanner) FLAG auth/tenant as low-confidence
+    + record tenant evidence files — but NEVER suppress. Suppression would be fragile (depends on
+    the optional noir route scanner) and could drop a real backend whose routes didn't parse."""
+    def test_auth_low_confidence_without_routes_but_still_detected(self):
+        with tempfile.TemporaryDirectory() as d:
+            d = Path(d)
+            (d / "patterns.ts").write_text("const RULE = 'express-session';\n")
+            out = AuthExtractor().extract(RepoContext(d), {"stack": {"frameworks": []}, "routes": {"endpoints": []}})
+        self.assertFalse(out["reliable_signal"])                   # 0 routes, no framework -> flagged
+        self.assertIn("session-cookie", out["schemes_detected"])   # NOT suppressed
+        self.assertIn("No HTTP routes", out["note"])               # caveat surfaced
+    def test_auth_reliable_with_routes(self):
+        with tempfile.TemporaryDirectory() as d:
+            d = Path(d)
+            (d / "app.ts").write_text("const RULE = 'express-session';\n")
+            out = AuthExtractor().extract(RepoContext(d), {"stack": {"frameworks": []},
+                                                           "routes": {"endpoints": [{"method": "GET", "path": "/x"}]}})
+        self.assertTrue(out["reliable_signal"])
+    def test_tenant_records_files_and_not_multitenant_without_routes(self):
+        with tempfile.TemporaryDirectory() as d:
+            d = Path(d)
+            (d / "a.ts").write_text("const x = groupId; const y = groupId; const z = groupId;\n")  # x3
+            out = TenantExtractor().extract(RepoContext(d), {"routes": {"endpoints": []}})
+        gc = next(c for c in out["candidates"] if c["key"] == "groupId")
+        self.assertIn("a.ts", gc["files"])                         # evidence recorded
+        self.assertFalse(out["multi_tenant_likely"])               # 0 routes -> not asserted even at >=3
+    def test_tenant_multitenant_with_routes(self):
+        with tempfile.TemporaryDirectory() as d:
+            d = Path(d)
+            (d / "a.ts").write_text("groupId groupId groupId\n")   # x3
+            out = TenantExtractor().extract(RepoContext(d), {"routes": {"endpoints": [{"method": "GET", "path": "/x"}]}})
+        self.assertTrue(out["multi_tenant_likely"])                # routes + >=3 -> asserted
+class StaticAtRiskRouteTests(unittest.TestCase):
+    """0.2.9 (B): routes calling a guard defined alongside an unverified decoder are listed
+    statically — the forged-token bypass set, even with no live target."""
+    def test_unverified_signature_routes_listed(self):
+        with tempfile.TemporaryDirectory() as d:
+            d = Path(d)
+            (d / "auth.ts").write_text(
+                "export async function requireAuth(req){ const p = decodeJwtPayloadUnsafe(t); return p; }\n")
+            (d / "route.ts").write_text(
+                "import {requireAuth} from './auth';\nexport async function GET(req){ await requireAuth(req); }\n")
+            facts = {"routes": {"endpoints": [
+                {"method": "GET", "path": "/api/x", "code_path": str(d / "route.ts")}]}}
+            out = AuthzExtractor().extract(RepoContext(d), facts)
+        self.assertIn("GET /api/x", out["unverified_signature_routes"])
 if __name__ == "__main__":
     unittest.main()

websec_validator-0.2.7/src/websec_validator/extractors/tenant.py DELETED Viewed

@@ -1,33 +0,0 @@
-"""Tenant-boundary extractor — the multi-tenancy key candidates.
-The single most important and easiest-to-get-wrong fact for BOLA testing. The
-tool reports candidates by frequency; the agent confirms THE one with the human.
-"""
-from __future__ import annotations
-from .base import Extractor, RepoContext
-TENANT_KEYS = ["groupId", "group_id", "orgId", "org_id", "organizationId",
-               "tenantId", "tenant_id", "workspaceId", "workspace_id",
-               "accountId", "account_id", "companyId", "company_id",
-               "teamId", "team_id", "projectId", "project_id"]
-class TenantExtractor(Extractor):
-    name = "tenant"
-    category = "authz"
-    def extract(self, ctx: RepoContext, facts: dict) -> dict:
-        hits: dict = {}
-        for _p, _rel, text in ctx.iter_code():
-            for key in TENANT_KEYS:
-                if key in text:
-                    hits[key] = hits.get(key, 0) + text.count(key)
-        ranked = sorted(hits.items(), key=lambda kv: -kv[1])
-        return {
-            "candidates": [{"key": k, "occurrences": n} for k, n in ranked[:6]],
-            "multi_tenant_likely": bool(ranked and ranked[0][1] >= 3),
-            "note": "AGENT: confirm with the human which key (if any) is THE tenant boundary. "
-                    "If single-tenant, skip the cross-tenant BOLA probes.",
-        }