wardproof 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wardproof/__init__.py ADDED
@@ -0,0 +1,40 @@
1
+ """Wardproof, local-first, verifiable defensive AI agent swarms."""
2
+
3
+ from wardproof.agents.base import BaseDefensiveAgent
4
+ from wardproof.agents.detector import DetectorAgent
5
+ from wardproof.agents.responder import ResponderAgent
6
+ from wardproof.agents.verifier import VerifierAgent
7
+ from wardproof.audit.ledger import AuditLedger
8
+ from wardproof.orchestration.engine import (
9
+ CircuitBreaker,
10
+ Outcome,
11
+ SwarmOrchestrator,
12
+ Watchdog,
13
+ )
14
+ from wardproof.orchestration.factory import build_default_swarm
15
+ from wardproof.sandbox.executor import SandboxExecutor, ToolRegistry
16
+ from wardproof.sandbox.permissions import PermissionBroker, ToolGrant
17
+ from wardproof.schema import Decision, Event, Finding, Severity, Verdict
18
+
19
+ __version__ = "0.1.0"
20
+ __all__ = [
21
+ "Event",
22
+ "Decision",
23
+ "Finding",
24
+ "Verdict",
25
+ "Severity",
26
+ "AuditLedger",
27
+ "BaseDefensiveAgent",
28
+ "DetectorAgent",
29
+ "VerifierAgent",
30
+ "ResponderAgent",
31
+ "SwarmOrchestrator",
32
+ "Watchdog",
33
+ "CircuitBreaker",
34
+ "Outcome",
35
+ "build_default_swarm",
36
+ "PermissionBroker",
37
+ "ToolGrant",
38
+ "SandboxExecutor",
39
+ "ToolRegistry",
40
+ ]
@@ -0,0 +1,14 @@
1
+ """Defensive agents: Detector, Verifier, Responder, and the base class."""
2
+
3
+ from wardproof.agents.base import BaseDefensiveAgent
4
+ from wardproof.agents.detector import DetectorAgent
5
+ from wardproof.agents.responder import ResponderAgent, Response
6
+ from wardproof.agents.verifier import VerifierAgent
7
+
8
+ __all__ = [
9
+ "BaseDefensiveAgent",
10
+ "DetectorAgent",
11
+ "VerifierAgent",
12
+ "ResponderAgent",
13
+ "Response",
14
+ ]
@@ -0,0 +1,90 @@
1
+ """Base class for every defensive agent in the swarm.
2
+
3
+ Lifecycle of ``process(event)``:
4
+ 1. run_guardrails -> deterministic Findings, each recorded to the ledger
5
+ 2. decide -> subclass turns Findings (+ optional LLM 2nd opinion)
6
+ into a Decision
7
+ 3. the Decision itself is recorded to the ledger
8
+
9
+ Key principles baked in here:
10
+ * Guardrails run first and always, with or without an LLM.
11
+ * Every step is written to an append-only audit ledger the agent does not own.
12
+ * The agent's own LLM is treated as UNTRUSTED, subclasses must never let it
13
+ downgrade a hard guardrail signal (see DetectorAgent / VerifierAgent).
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from abc import ABC, abstractmethod
19
+
20
+ from wardproof.audit.ledger import AuditLedger
21
+ from wardproof.guardrails.base import Guardrail
22
+ from wardproof.llm.base import LLMClient
23
+ from wardproof.llm.null import NullLLM
24
+ from wardproof.schema import Decision, Event, Finding
25
+
26
+
27
+ class BaseDefensiveAgent(ABC):
28
+ role: str = "agent"
29
+
30
+ def __init__(
31
+ self,
32
+ agent_id: str,
33
+ *,
34
+ guardrails: list[Guardrail] | None = None,
35
+ ledger: AuditLedger | None = None,
36
+ llm: LLMClient | None = None,
37
+ ) -> None:
38
+ self.agent_id = agent_id
39
+ self.guardrails = guardrails or []
40
+ self.ledger = ledger or AuditLedger()
41
+ self.llm = llm or NullLLM()
42
+
43
+ def run_guardrails(self, event: Event) -> list[Finding]:
44
+ findings: list[Finding] = []
45
+ for guard in self.guardrails:
46
+ if not guard.applies_to(event):
47
+ continue
48
+ finding = guard.inspect(event)
49
+ findings.append(finding)
50
+ if finding.triggered:
51
+ self.ledger.append(
52
+ actor=self.agent_id,
53
+ action="guardrail_triggered",
54
+ data={
55
+ "event_id": event.id,
56
+ "guardrail": finding.guardrail,
57
+ "risk": finding.risk,
58
+ "severity": finding.severity.value,
59
+ "reason": finding.reason,
60
+ },
61
+ )
62
+ return findings
63
+
64
+ @abstractmethod
65
+ def decide(self, event: Event, findings: list[Finding]) -> Decision: ...
66
+
67
+ def process(self, event: Event) -> Decision:
68
+ findings = self.run_guardrails(event)
69
+ decision = self.decide(event, findings)
70
+ self.ledger.append(
71
+ actor=self.agent_id,
72
+ action="decision",
73
+ data={
74
+ "event_id": event.id,
75
+ "verdict": decision.verdict.value,
76
+ "risk": decision.risk,
77
+ "rationale": decision.rationale,
78
+ "role": self.role,
79
+ },
80
+ )
81
+ return decision
82
+
83
+ # Convenience for subclasses that want an LLM second opinion safely.
84
+ def _llm_opinion(self, system: str, user: str) -> str:
85
+ if not self.llm.available:
86
+ return ""
87
+ try:
88
+ return self.llm.complete(system, user)
89
+ except Exception: # pragma: no cover - never let the LLM crash defence
90
+ return ""
@@ -0,0 +1,70 @@
1
+ """Detector, first-pass triage.
2
+
3
+ Aggregates guardrail findings into a single risk score and maps it to a verdict
4
+ using two thresholds. The LLM (if any) may only *raise* concern, never lower it:
5
+ its suggestion is clamped so it cannot pull risk below the deterministic floor.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from wardproof.agents.base import BaseDefensiveAgent
11
+ from wardproof.schema import Decision, Event, Finding, Verdict, max_severity
12
+
13
+ _SYSTEM = (
14
+ "You are a security detector. Given an event, reply with a single float "
15
+ "0.0-1.0 estimating how likely it is malicious. Reply with ONLY the number."
16
+ )
17
+
18
+
19
+ class DetectorAgent(BaseDefensiveAgent):
20
+ role = "detector"
21
+
22
+ def __init__(
23
+ self, *args: object, low: float = 0.2, high: float = 0.6, **kwargs: object
24
+ ) -> None:
25
+ super().__init__(*args, **kwargs) # type: ignore[arg-type]
26
+ self.low = low
27
+ self.high = high
28
+
29
+ def _deterministic_risk(self, findings: list[Finding]) -> float:
30
+ if not findings:
31
+ return 0.0
32
+ return max((f.risk for f in findings), default=0.0)
33
+
34
+ def _llm_adjust(self, event: Event, floor: float) -> float:
35
+ raw = self._llm_opinion(_SYSTEM, event.content[:2000])
36
+ if not raw:
37
+ return floor
38
+ try:
39
+ guess = float(raw.strip().split()[0])
40
+ except (ValueError, IndexError):
41
+ return floor
42
+ # LLM may only escalate. Never trust it to reduce risk.
43
+ return max(floor, min(1.0, guess))
44
+
45
+ def decide(self, event: Event, findings: list[Finding]) -> Decision:
46
+ floor = self._deterministic_risk(findings)
47
+ risk = self._llm_adjust(event, floor)
48
+ triggered = [f for f in findings if f.triggered]
49
+ severity = max_severity([f.severity for f in triggered]) if triggered else None
50
+
51
+ if risk >= self.high:
52
+ verdict = (
53
+ Verdict.QUARANTINE if event.kind in ("memory_write", "tool_call") else Verdict.BLOCK
54
+ )
55
+ elif risk >= self.low:
56
+ verdict = Verdict.ESCALATE
57
+ else:
58
+ verdict = Verdict.ALLOW
59
+
60
+ reasons = "; ".join(f.reason for f in triggered) or "no guardrail signal"
61
+ rationale = f"risk={risk:.2f} (floor={floor:.2f}); {reasons}"
62
+ return Decision(
63
+ agent_id=self.agent_id,
64
+ event_id=event.id,
65
+ verdict=verdict,
66
+ risk=risk,
67
+ findings=findings,
68
+ rationale=rationale,
69
+ metadata={"severity": severity.value if severity else "info"},
70
+ )
@@ -0,0 +1,88 @@
1
+ """Responder, turns a verdict into a concrete action.
2
+
3
+ Mapping:
4
+ ALLOW -> pass through, no action
5
+ SANITIZE -> return cleaned content (strip injection markers)
6
+ ESCALATE -> hand to a human review queue (here: record + flag)
7
+ QUARANTINE -> invoke a mitigation tool (e.g. quarantine_chunk, freeze_account)
8
+ BLOCK -> invoke a mitigation tool and refuse
9
+
10
+ The Responder is the only agent allowed to *act*, and it acts through the
11
+ sandbox executor so every mitigation is itself permission-checked and audited.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from dataclasses import dataclass
17
+ from typing import Any
18
+
19
+ from wardproof.audit.ledger import AuditLedger
20
+ from wardproof.guardrails.prompt_injection import strip_injection
21
+ from wardproof.llm.base import LLMClient
22
+ from wardproof.llm.null import NullLLM
23
+ from wardproof.sandbox.executor import SandboxExecutor
24
+ from wardproof.schema import Decision, Event, Verdict
25
+
26
+
27
+ @dataclass
28
+ class Response:
29
+ action: str
30
+ detail: str
31
+ payload: Any = None
32
+
33
+
34
+ class ResponderAgent:
35
+ role = "responder"
36
+
37
+ def __init__(
38
+ self,
39
+ agent_id: str,
40
+ *,
41
+ ledger: AuditLedger | None = None,
42
+ executor: SandboxExecutor | None = None,
43
+ mitigations: dict[Verdict, str] | None = None,
44
+ llm: LLMClient | None = None,
45
+ ) -> None:
46
+ self.agent_id = agent_id
47
+ self.ledger = ledger or AuditLedger()
48
+ self.executor = executor
49
+ self.llm = llm or NullLLM()
50
+ # Which sandbox tool to call for BLOCK / QUARANTINE verdicts.
51
+ self.mitigations = mitigations or {}
52
+
53
+ def _mitigate(self, event: Event, verdict: Verdict) -> Response:
54
+ tool = self.mitigations.get(verdict)
55
+ if tool and self.executor is not None:
56
+ result = self.executor.run(
57
+ self.agent_id,
58
+ tool,
59
+ {"event_id": event.id, "source": event.source},
60
+ )
61
+ detail = f"ran mitigation '{tool}': {result}"
62
+ else:
63
+ detail = f"{verdict.value} (no mitigation tool configured)"
64
+ return Response(action=verdict.value, detail=detail)
65
+
66
+ def respond(self, event: Event, decision: Decision) -> Response:
67
+ v = decision.verdict
68
+ if v == Verdict.ALLOW:
69
+ resp = Response("allow", "passed through")
70
+ elif v == Verdict.SANITIZE:
71
+ cleaned = strip_injection(event.content)
72
+ resp = Response("sanitize", "stripped injection markers", payload=cleaned)
73
+ elif v == Verdict.ESCALATE:
74
+ resp = Response("escalate", "queued for human review")
75
+ else: # QUARANTINE or BLOCK
76
+ resp = self._mitigate(event, v)
77
+
78
+ self.ledger.append(
79
+ actor=self.agent_id,
80
+ action="response",
81
+ data={
82
+ "event_id": event.id,
83
+ "verdict": v.value,
84
+ "action": resp.action,
85
+ "detail": resp.detail,
86
+ },
87
+ )
88
+ return resp
@@ -0,0 +1,93 @@
1
+ """Verifier, independent second opinion + detector integrity check.
2
+
3
+ The Verifier exists to catch two failure modes:
4
+ 1. The event really is malicious and slipped past (false negative).
5
+ 2. The Detector ITSELF has been compromised/poisoned and is rubber-stamping
6
+ dangerous events (insider/collusion). If the Detector said ALLOW while the
7
+ deterministic guardrails clearly fired high risk, that is an integrity
8
+ alarm, we quarantine and flag it loudly.
9
+
10
+ It re-runs its own guardrails rather than trusting the Detector's findings.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from wardproof.agents.base import BaseDefensiveAgent
16
+ from wardproof.schema import (
17
+ Decision,
18
+ Event,
19
+ Finding,
20
+ Verdict,
21
+ max_severity,
22
+ stricter_verdict,
23
+ )
24
+
25
+
26
+ class VerifierAgent(BaseDefensiveAgent):
27
+ role = "verifier"
28
+
29
+ def __init__(self, *args: object, high: float = 0.6, **kwargs: object) -> None:
30
+ super().__init__(*args, **kwargs) # type: ignore[arg-type]
31
+ self.high = high
32
+
33
+ def decide(self, event: Event, findings: list[Finding]) -> Decision:
34
+ # Independent verdict from this agent's own guardrail pass.
35
+ risk = max((f.risk for f in findings if f.triggered), default=0.0)
36
+ if risk >= self.high:
37
+ verdict = (
38
+ Verdict.QUARANTINE if event.kind in ("memory_write", "tool_call") else Verdict.BLOCK
39
+ )
40
+ elif risk > 0.0:
41
+ verdict = Verdict.ESCALATE
42
+ else:
43
+ verdict = Verdict.ALLOW
44
+ triggered = [f for f in findings if f.triggered]
45
+ sev = max_severity([f.severity for f in triggered]) if triggered else None
46
+ rationale = f"independent risk={risk:.2f}"
47
+ return Decision(
48
+ self.agent_id,
49
+ event.id,
50
+ verdict,
51
+ risk,
52
+ findings,
53
+ rationale,
54
+ metadata={"severity": sev.value if sev else "info"},
55
+ )
56
+
57
+ def verify(self, event: Event, detector_decision: Decision) -> Decision:
58
+ """Cross-check a Detector's decision. Fail-closed: combine to the
59
+ stricter verdict, and raise an integrity alarm on suspicious leniency."""
60
+ own = self.process(event)
61
+ combined = stricter_verdict(own.verdict, detector_decision.verdict)
62
+ integrity_alarm = False
63
+
64
+ # Detector said it's fine, but the deterministic floor says otherwise.
65
+ if detector_decision.verdict == Verdict.ALLOW and own.risk >= self.high:
66
+ integrity_alarm = True
67
+ combined = stricter_verdict(combined, Verdict.QUARANTINE)
68
+ self.ledger.append(
69
+ actor=self.agent_id,
70
+ action="integrity_alarm",
71
+ data={
72
+ "event_id": event.id,
73
+ "suspect_agent": detector_decision.agent_id,
74
+ "detector_verdict": detector_decision.verdict.value,
75
+ "verifier_risk": own.risk,
76
+ "note": "detector allowed a high-risk event; possible compromise",
77
+ },
78
+ )
79
+
80
+ risk = max(own.risk, detector_decision.risk)
81
+ rationale = (
82
+ f"verifier={own.verdict.value} detector={detector_decision.verdict.value} "
83
+ f"-> {combined.value}" + (" [INTEGRITY ALARM]" if integrity_alarm else "")
84
+ )
85
+ return Decision(
86
+ agent_id=self.agent_id,
87
+ event_id=event.id,
88
+ verdict=combined,
89
+ risk=risk,
90
+ findings=own.findings,
91
+ rationale=rationale,
92
+ metadata={"integrity_alarm": integrity_alarm},
93
+ )
@@ -0,0 +1,5 @@
1
+ """Tamper-evident, append-only audit ledger (hash chain + optional signatures)."""
2
+
3
+ from wardproof.audit.ledger import AuditEntry, AuditLedger
4
+
5
+ __all__ = ["AuditLedger", "AuditEntry"]
@@ -0,0 +1,158 @@
1
+ """Tamper-evident audit ledger.
2
+
3
+ Two layers of integrity, both optional to set up but always verifiable:
4
+ 1. Hash chain -> stdlib only (hashlib). Detects any mutation/reordering/deletion.
5
+ 2. Ed25519 signatures -> requires `cryptography`. Proves WHO appended each entry.
6
+
7
+ Design rule: the ledger is append-only and lives OUTSIDE the agents it audits.
8
+ An agent can write to it but must never be able to rewrite history.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import dataclasses
14
+ import hashlib
15
+ import json
16
+ import os
17
+ import threading
18
+ import time
19
+ from dataclasses import dataclass
20
+ from pathlib import Path
21
+ from typing import Any
22
+
23
+ try:
24
+ from cryptography.exceptions import InvalidSignature
25
+ from cryptography.hazmat.primitives import serialization
26
+ from cryptography.hazmat.primitives.asymmetric.ed25519 import (
27
+ Ed25519PrivateKey,
28
+ Ed25519PublicKey,
29
+ )
30
+
31
+ _HAS_CRYPTO = True
32
+ except Exception: # pragma: no cover - optional dependency
33
+ _HAS_CRYPTO = False
34
+
35
+ GENESIS_HASH = "0" * 64
36
+
37
+
38
+ def _canonical(data: Any) -> str:
39
+ return json.dumps(data, sort_keys=True, separators=(",", ":"), default=str)
40
+
41
+
42
+ @dataclass
43
+ class AuditEntry:
44
+ index: int
45
+ ts: float
46
+ actor: str
47
+ action: str
48
+ data: dict[str, Any]
49
+ prev_hash: str
50
+ entry_hash: str
51
+ signature: str | None = None
52
+
53
+ def to_dict(self) -> dict[str, Any]:
54
+ return dataclasses.asdict(self)
55
+
56
+
57
+ class AuditLedger:
58
+ def __init__(
59
+ self,
60
+ signing_key_path: str | os.PathLike[str] | None = None,
61
+ jsonl_path: str | os.PathLike[str] | None = None,
62
+ ) -> None:
63
+ self._lock = threading.Lock()
64
+ self._entries: list[AuditEntry] = []
65
+ self._jsonl_path = Path(jsonl_path) if jsonl_path else None
66
+ self._priv: Any = None
67
+ self._pub_hex: str | None = None
68
+ if signing_key_path is not None:
69
+ self._load_or_create_key(Path(signing_key_path))
70
+
71
+ # ---- signing key management ----
72
+ def _load_or_create_key(self, path: Path) -> None:
73
+ if not _HAS_CRYPTO:
74
+ raise RuntimeError(
75
+ "Signing requested but 'cryptography' is not installed. "
76
+ "Install with: pip install wardproof[crypto]"
77
+ )
78
+ if path.exists():
79
+ self._priv = Ed25519PrivateKey.from_private_bytes(path.read_bytes())
80
+ else:
81
+ self._priv = Ed25519PrivateKey.generate()
82
+ raw = self._priv.private_bytes(
83
+ serialization.Encoding.Raw,
84
+ serialization.PrivateFormat.Raw,
85
+ serialization.NoEncryption(),
86
+ )
87
+ path.parent.mkdir(parents=True, exist_ok=True)
88
+ path.write_bytes(raw)
89
+ os.chmod(path, 0o600)
90
+ self._pub_hex = (
91
+ self._priv.public_key()
92
+ .public_bytes(serialization.Encoding.Raw, serialization.PublicFormat.Raw)
93
+ .hex()
94
+ )
95
+
96
+ @property
97
+ def public_key_hex(self) -> str | None:
98
+ return self._pub_hex
99
+
100
+ # ---- core ----
101
+ def _compute_hash(
102
+ self,
103
+ index: int,
104
+ ts: float,
105
+ actor: str,
106
+ action: str,
107
+ data: dict[str, Any],
108
+ prev_hash: str,
109
+ ) -> str:
110
+ payload = f"{index}|{ts:.6f}|{actor}|{action}|{_canonical(data)}|{prev_hash}"
111
+ return hashlib.sha256(payload.encode("utf-8")).hexdigest()
112
+
113
+ def append(self, actor: str, action: str, data: dict[str, Any] | None = None) -> AuditEntry:
114
+ with self._lock:
115
+ index = len(self._entries)
116
+ prev_hash = self._entries[-1].entry_hash if self._entries else GENESIS_HASH
117
+ ts = time.time()
118
+ data = data or {}
119
+ entry_hash = self._compute_hash(index, ts, actor, action, data, prev_hash)
120
+ signature = None
121
+ if self._priv is not None:
122
+ signature = self._priv.sign(entry_hash.encode("utf-8")).hex()
123
+ entry = AuditEntry(index, ts, actor, action, data, prev_hash, entry_hash, signature)
124
+ self._entries.append(entry)
125
+ if self._jsonl_path is not None:
126
+ with self._jsonl_path.open("a", encoding="utf-8") as fh:
127
+ fh.write(_canonical(entry.to_dict()) + "\n")
128
+ return entry
129
+
130
+ @property
131
+ def entries(self) -> list[AuditEntry]:
132
+ return list(self._entries)
133
+
134
+ def verify(self, public_key_hex: str | None = None) -> tuple[bool, str]:
135
+ """Recompute the whole chain and (if a key is known) check signatures."""
136
+ pub_hex = public_key_hex or self._pub_hex
137
+ pub: Any = None
138
+ if pub_hex and _HAS_CRYPTO:
139
+ pub = Ed25519PublicKey.from_public_bytes(bytes.fromhex(pub_hex))
140
+ prev = GENESIS_HASH
141
+ for e in self._entries:
142
+ expect = self._compute_hash(e.index, e.ts, e.actor, e.action, e.data, prev)
143
+ if expect != e.entry_hash:
144
+ return False, f"hash mismatch at index {e.index}"
145
+ if e.prev_hash != prev:
146
+ return False, f"broken chain link at index {e.index}"
147
+ if e.signature is not None and pub is not None:
148
+ try:
149
+ pub.verify(bytes.fromhex(e.signature), e.entry_hash.encode("utf-8"))
150
+ except InvalidSignature:
151
+ return False, f"invalid signature at index {e.index}"
152
+ prev = e.entry_hash
153
+ return True, f"verified {len(self._entries)} entries"
154
+
155
+ def export_jsonl(self, path: str | os.PathLike[str]) -> None:
156
+ with Path(path).open("w", encoding="utf-8") as fh:
157
+ for e in self._entries:
158
+ fh.write(_canonical(e.to_dict()) + "\n")
wardproof/cli.py ADDED
@@ -0,0 +1,37 @@
1
+ """Minimal CLI. Primary job: independently verify an exported audit ledger."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ from wardproof.audit.ledger import AuditEntry, AuditLedger
11
+
12
+
13
+ def _verify_file(path: str, pub: str | None) -> int:
14
+ ledger = AuditLedger()
15
+ for line in Path(path).read_text(encoding="utf-8").splitlines():
16
+ if not line.strip():
17
+ continue
18
+ ledger._entries.append(AuditEntry(**json.loads(line))) # noqa: SLF001
19
+ ok, detail = ledger.verify(public_key_hex=pub)
20
+ print(("OK: " if ok else "FAIL: ") + detail)
21
+ return 0 if ok else 1
22
+
23
+
24
+ def main(argv: list[str] | None = None) -> int:
25
+ parser = argparse.ArgumentParser(prog="wardproof")
26
+ sub = parser.add_subparsers(dest="cmd", required=True)
27
+ vp = sub.add_parser("verify-ledger", help="verify integrity of a JSONL audit ledger")
28
+ vp.add_argument("path")
29
+ vp.add_argument("--pubkey", default=None, help="hex Ed25519 public key for signature check")
30
+ args = parser.parse_args(argv)
31
+ if args.cmd == "verify-ledger":
32
+ return _verify_file(args.path, args.pubkey)
33
+ return 1
34
+
35
+
36
+ if __name__ == "__main__":
37
+ sys.exit(main())
wardproof/config.py ADDED
@@ -0,0 +1,28 @@
1
+ """Typed configuration. Fork-friendly: override defaults in one place."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from dataclasses import dataclass
7
+
8
+
9
+ @dataclass
10
+ class WardproofConfig:
11
+ model: str = "llama3.1"
12
+ ollama_url: str = "http://localhost:11434"
13
+ detector_low: float = 0.2
14
+ detector_high: float = 0.6
15
+ signing_key_path: str | None = None
16
+ ledger_path: str | None = None
17
+
18
+ @classmethod
19
+ def from_env(cls) -> WardproofConfig:
20
+ base = cls()
21
+ return cls(
22
+ model=os.getenv("WARDPROOF_MODEL", base.model),
23
+ ollama_url=os.getenv("WARDPROOF_OLLAMA_URL", base.ollama_url),
24
+ detector_low=float(os.getenv("WARDPROOF_DETECTOR_LOW", base.detector_low)),
25
+ detector_high=float(os.getenv("WARDPROOF_DETECTOR_HIGH", base.detector_high)),
26
+ signing_key_path=os.getenv("WARDPROOF_SIGNING_KEY"),
27
+ ledger_path=os.getenv("WARDPROOF_LEDGER_PATH"),
28
+ )
@@ -0,0 +1,17 @@
1
+ """Deterministic guardrails, the first line of defence (no LLM required)."""
2
+
3
+ from wardproof.guardrails.base import Guardrail
4
+ from wardproof.guardrails.memory_poisoning import MemoryPoisoningGuardrail
5
+ from wardproof.guardrails.prompt_injection import (
6
+ PromptInjectionGuardrail,
7
+ strip_injection,
8
+ )
9
+ from wardproof.guardrails.tool_misuse import ToolMisuseGuardrail
10
+
11
+ __all__ = [
12
+ "Guardrail",
13
+ "PromptInjectionGuardrail",
14
+ "ToolMisuseGuardrail",
15
+ "MemoryPoisoningGuardrail",
16
+ "strip_injection",
17
+ ]