voidhack-agent-firewall 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- voidhack_agent_firewall/__init__.py +19 -0
- voidhack_agent_firewall/client.py +124 -0
- voidhack_agent_firewall/langchain.py +65 -0
- voidhack_agent_firewall/pii.py +53 -0
- voidhack_agent_firewall/policy.py +98 -0
- voidhack_agent_firewall/providers.py +249 -0
- voidhack_agent_firewall/rules.py +104 -0
- voidhack_agent_firewall/schemas.py +51 -0
- voidhack_agent_firewall-1.0.0.dist-info/METADATA +147 -0
- voidhack_agent_firewall-1.0.0.dist-info/RECORD +13 -0
- voidhack_agent_firewall-1.0.0.dist-info/WHEEL +5 -0
- voidhack_agent_firewall-1.0.0.dist-info/licenses/LICENSE +21 -0
- voidhack_agent_firewall-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from .client import FirewallOpenAI
|
|
2
|
+
from .langchain import FirewallCallbackHandler
|
|
3
|
+
from .providers import (
|
|
4
|
+
OPENAI_COMPATIBLE_BASE_URLS,
|
|
5
|
+
FirewallAnthropic,
|
|
6
|
+
FirewallGeminiModel,
|
|
7
|
+
FirewallGoogleGenerativeAI,
|
|
8
|
+
create_openai_compatible_firewall,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"FirewallOpenAI",
|
|
13
|
+
"FirewallCallbackHandler",
|
|
14
|
+
"FirewallAnthropic",
|
|
15
|
+
"FirewallGeminiModel",
|
|
16
|
+
"FirewallGoogleGenerativeAI",
|
|
17
|
+
"OPENAI_COMPATIBLE_BASE_URLS",
|
|
18
|
+
"create_openai_compatible_firewall",
|
|
19
|
+
]
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
from typing import Any, Dict
|
|
2
|
+
from openai import OpenAI
|
|
3
|
+
|
|
4
|
+
from . import pii
|
|
5
|
+
from .policy import Policy, load_policy
|
|
6
|
+
from .rules import check_tool_calls
|
|
7
|
+
from .schemas import Status, ToolCall
|
|
8
|
+
|
|
9
|
+
class CompletionsWrapper:
|
|
10
|
+
def __init__(self, original_completions: Any, policy: Policy):
|
|
11
|
+
self._original = original_completions
|
|
12
|
+
self._policy = policy
|
|
13
|
+
|
|
14
|
+
def create(self, *args: Any, **kwargs: Any) -> Any:
|
|
15
|
+
# 1. Inbound Inspection (Locally check input messages for PII/secrets)
|
|
16
|
+
messages = kwargs.get("messages", [])
|
|
17
|
+
for msg in messages:
|
|
18
|
+
content = msg.get("content")
|
|
19
|
+
if isinstance(content, str) and content:
|
|
20
|
+
# Mask credentials or secrets in prompts locally
|
|
21
|
+
redacted, pii_check = pii.scan_and_redact(content, self._policy, source=msg.get("role", "user"))
|
|
22
|
+
if pii_check.status is Status.FLAG:
|
|
23
|
+
msg["content"] = redacted
|
|
24
|
+
|
|
25
|
+
# 2. Call the actual upstream LLM
|
|
26
|
+
response = self._original.create(*args, **kwargs)
|
|
27
|
+
|
|
28
|
+
# 3. Outbound Inspection (Locally check response tool calls and arguments)
|
|
29
|
+
choices = getattr(response, "choices", [])
|
|
30
|
+
if not choices:
|
|
31
|
+
return response
|
|
32
|
+
|
|
33
|
+
message = choices[0].message
|
|
34
|
+
raw_calls = getattr(message, "tool_calls", None) or []
|
|
35
|
+
|
|
36
|
+
# Parse into our schema structure to run checks
|
|
37
|
+
tool_calls = []
|
|
38
|
+
for tc in raw_calls:
|
|
39
|
+
tool_calls.append(ToolCall(
|
|
40
|
+
id=tc.id,
|
|
41
|
+
type=tc.type,
|
|
42
|
+
function={"name": tc.function.name, "arguments": tc.function.arguments}
|
|
43
|
+
))
|
|
44
|
+
|
|
45
|
+
firewall_meta: Dict[str, Any] = {
|
|
46
|
+
"action": "allow",
|
|
47
|
+
"reason": None,
|
|
48
|
+
"rule_fired": None,
|
|
49
|
+
"stripped_tool_calls": [],
|
|
50
|
+
"blocked_calls": []
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if tool_calls:
|
|
54
|
+
findings, rules_check = check_tool_calls(tool_calls, self._policy)
|
|
55
|
+
blocked_ids = {f.tool_call_id for f in findings if f.status is Status.BLOCK}
|
|
56
|
+
|
|
57
|
+
if blocked_ids:
|
|
58
|
+
# Strip blocked tool calls so agent framework never receives them
|
|
59
|
+
kept = [tc for tc in raw_calls if tc.id not in blocked_ids]
|
|
60
|
+
message.tool_calls = kept if kept else None
|
|
61
|
+
|
|
62
|
+
# If everything was stripped and no text response remains, inject block message
|
|
63
|
+
if not kept and not getattr(message, "content", None):
|
|
64
|
+
message.content = self._policy.block_message
|
|
65
|
+
choices[0].finish_reason = "content_filter"
|
|
66
|
+
|
|
67
|
+
firewall_meta["action"] = "block"
|
|
68
|
+
firewall_meta["stripped_tool_calls"] = sorted(blocked_ids)
|
|
69
|
+
first_blocked = next(f for f in findings if f.status is Status.BLOCK)
|
|
70
|
+
firewall_meta["rule_fired"] = "deterministic_rules"
|
|
71
|
+
firewall_meta["reason"] = "; ".join(first_blocked.reasons)
|
|
72
|
+
|
|
73
|
+
# Capture safe representation for logging
|
|
74
|
+
by_id = {tc.id: tc for tc in raw_calls}
|
|
75
|
+
for f in findings:
|
|
76
|
+
if f.status is Status.BLOCK:
|
|
77
|
+
tc_ref = by_id.get(f.tool_call_id)
|
|
78
|
+
raw_args = tc_ref.function.arguments if tc_ref else ""
|
|
79
|
+
safe_args, _ = pii.redact(raw_args or "", self._policy)
|
|
80
|
+
firewall_meta["blocked_calls"].append({
|
|
81
|
+
"name": f.tool_name,
|
|
82
|
+
"arguments": safe_args,
|
|
83
|
+
"reasons": f.reasons
|
|
84
|
+
})
|
|
85
|
+
|
|
86
|
+
# Redact secrets from text output if any
|
|
87
|
+
content = getattr(message, "content", None)
|
|
88
|
+
if isinstance(content, str) and content:
|
|
89
|
+
redacted, pii_check = pii.scan_and_redact(content, self._policy, source="completion")
|
|
90
|
+
if pii_check.status is Status.FLAG:
|
|
91
|
+
message.content = redacted
|
|
92
|
+
firewall_meta["action"] = "redact"
|
|
93
|
+
firewall_meta["reason"] = "secret/PII redacted from model output"
|
|
94
|
+
|
|
95
|
+
# Inject firewall metadata into response object
|
|
96
|
+
response.model_extra = getattr(response, "model_extra", {}) or {}
|
|
97
|
+
response.model_extra["firewall"] = firewall_meta
|
|
98
|
+
|
|
99
|
+
return response
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class ChatWrapper:
|
|
103
|
+
def __init__(self, original_chat: Any, policy: Policy):
|
|
104
|
+
self._chat = original_chat
|
|
105
|
+
self.completions = CompletionsWrapper(original_chat.completions, policy)
|
|
106
|
+
|
|
107
|
+
def __getattr__(self, name: str) -> Any:
|
|
108
|
+
return getattr(self._chat, name)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class FirewallOpenAI:
|
|
112
|
+
"""Sleek drop-in client wrapper that adds policy controls to any OpenAI client in-process."""
|
|
113
|
+
def __init__(
|
|
114
|
+
self,
|
|
115
|
+
client: OpenAI,
|
|
116
|
+
policy_path: str
|
|
117
|
+
):
|
|
118
|
+
self._client = client
|
|
119
|
+
self._policy = load_policy(policy_path)
|
|
120
|
+
self.chat = ChatWrapper(client.chat, self._policy)
|
|
121
|
+
|
|
122
|
+
def __getattr__(self, name: str) -> Any:
|
|
123
|
+
# Proxy attributes back to the original client
|
|
124
|
+
return getattr(self._client, name)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from typing import Any, Dict, List, Optional
|
|
2
|
+
|
|
3
|
+
from .policy import load_policy
|
|
4
|
+
from .rules import check_tool_calls
|
|
5
|
+
from .schemas import Status, ToolCall
|
|
6
|
+
|
|
7
|
+
# Attempt to import LangChain's callback base class
|
|
8
|
+
try:
|
|
9
|
+
from langchain_core.callbacks import BaseCallbackHandler
|
|
10
|
+
except ImportError:
|
|
11
|
+
# Fallback mock if langchain is not installed in the current environment
|
|
12
|
+
class BaseCallbackHandler: # type: ignore
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
class PolicyViolationError(ValueError):
|
|
16
|
+
"""Raised when an agent action violates security policies."""
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
class FirewallCallbackHandler(BaseCallbackHandler):
|
|
20
|
+
"""LangChain callback plugin that halts agent runs when tool calls or arguments violate security policy."""
|
|
21
|
+
def __init__(self, policy_path: str):
|
|
22
|
+
self.policy = load_policy(policy_path)
|
|
23
|
+
|
|
24
|
+
def on_tool_start(
|
|
25
|
+
self,
|
|
26
|
+
serialized: Dict[str, Any],
|
|
27
|
+
input_str: str,
|
|
28
|
+
*,
|
|
29
|
+
run_id: Any = None,
|
|
30
|
+
parent_run_id: Any = None,
|
|
31
|
+
tags: Optional[List[str]] = None,
|
|
32
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
33
|
+
**kwargs: Any
|
|
34
|
+
) -> Any:
|
|
35
|
+
"""Intercepts tool execution right before it starts."""
|
|
36
|
+
tool_name = serialized.get("name", "")
|
|
37
|
+
|
|
38
|
+
# Check against denied tools
|
|
39
|
+
if tool_name in self.policy.tool_denylist:
|
|
40
|
+
raise PolicyViolationError(
|
|
41
|
+
f"Security Block: Tool '{tool_name}' is restricted by security policy."
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
# Check against allowed tools & argument-level rules
|
|
45
|
+
if self.policy.tool_allowlist and tool_name not in self.policy.tool_allowlist:
|
|
46
|
+
raise PolicyViolationError(
|
|
47
|
+
f"Security Block: Tool '{tool_name}' is not in the allowed tool list."
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Wrap in a ToolCall object to run argument checks
|
|
51
|
+
# LangChain tools typically pass inputs as JSON string or keyword args
|
|
52
|
+
tc = ToolCall(
|
|
53
|
+
id=str(run_id or "lc-run"),
|
|
54
|
+
type="function",
|
|
55
|
+
function={"name": tool_name, "arguments": input_str}
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
findings, rules_check = check_tool_calls([tc], self.policy)
|
|
59
|
+
blocked = [f for f in findings if f.status is Status.BLOCK]
|
|
60
|
+
|
|
61
|
+
if blocked:
|
|
62
|
+
reasons = "; ".join(blocked[0].reasons)
|
|
63
|
+
raise PolicyViolationError(
|
|
64
|
+
f"Security Block: Tool '{tool_name}' argument check failed. Reason: {reasons}"
|
|
65
|
+
)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
from .policy import Policy
|
|
6
|
+
from .schemas import CheckResult, Status
|
|
7
|
+
|
|
8
|
+
_PII_PATTERNS: list[tuple[str, re.Pattern[str]]] = [
|
|
9
|
+
("email", re.compile(r"[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}")),
|
|
10
|
+
("ssn", re.compile(r"\b\d{3}-\d{2}-\d{4}\b")),
|
|
11
|
+
("credit_card", re.compile(r"\b(?:\d[ -]*?){13,16}\b")),
|
|
12
|
+
(
|
|
13
|
+
"phone",
|
|
14
|
+
re.compile(r"\b(?:\+?\d{1,2}[ -]?)?\(?\d{3}\)?[ -]?\d{3}[ -]?\d{4}\b"),
|
|
15
|
+
),
|
|
16
|
+
("ipv4", re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")),
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _mask(label: str) -> str:
|
|
21
|
+
return f"[REDACTED:{label}]"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def redact(text: str, policy: Policy) -> tuple[str, list[str]]:
|
|
25
|
+
if not text:
|
|
26
|
+
return text, []
|
|
27
|
+
|
|
28
|
+
labels: list[str] = []
|
|
29
|
+
out = text
|
|
30
|
+
|
|
31
|
+
for name, pattern in policy.compiled_secrets():
|
|
32
|
+
if pattern.search(out):
|
|
33
|
+
out = pattern.sub(_mask(name), out)
|
|
34
|
+
labels.append(name)
|
|
35
|
+
|
|
36
|
+
for name, pattern in _PII_PATTERNS:
|
|
37
|
+
if pattern.search(out):
|
|
38
|
+
out = pattern.sub(_mask(name), out)
|
|
39
|
+
labels.append(name)
|
|
40
|
+
|
|
41
|
+
return out, labels
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def scan_and_redact(text: str, policy: Policy, source: str = "content") -> tuple[str, CheckResult]:
|
|
45
|
+
redacted, labels = redact(text, policy)
|
|
46
|
+
status = Status.FLAG if labels else Status.PASS
|
|
47
|
+
detail = f"redacted in {source}: {', '.join(labels)}" if labels else f"no PII in {source}"
|
|
48
|
+
return redacted, CheckResult(
|
|
49
|
+
name="pii_redaction",
|
|
50
|
+
status=status,
|
|
51
|
+
detail=detail,
|
|
52
|
+
meta={"labels": labels},
|
|
53
|
+
)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import yaml
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class SecretPattern:
|
|
12
|
+
name: str
|
|
13
|
+
regex: str
|
|
14
|
+
|
|
15
|
+
def compiled(self) -> re.Pattern[str]:
|
|
16
|
+
return re.compile(self.regex)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ArgRule:
|
|
21
|
+
name: str
|
|
22
|
+
reason: str
|
|
23
|
+
regex: str
|
|
24
|
+
tools: list[str] = field(default_factory=lambda: ["*"])
|
|
25
|
+
|
|
26
|
+
def compiled(self) -> re.Pattern[str]:
|
|
27
|
+
return re.compile(self.regex)
|
|
28
|
+
|
|
29
|
+
def applies_to(self, tool: str) -> bool:
|
|
30
|
+
return "*" in self.tools or tool in self.tools
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class Policy:
|
|
35
|
+
version: int = 1
|
|
36
|
+
description: str = ""
|
|
37
|
+
tool_allowlist: list[str] = field(default_factory=list)
|
|
38
|
+
tool_denylist: list[str] = field(default_factory=list)
|
|
39
|
+
egress_allowlist: list[str] = field(default_factory=list)
|
|
40
|
+
secret_patterns: list[SecretPattern] = field(default_factory=list)
|
|
41
|
+
arg_rules: list[ArgRule] = field(default_factory=list)
|
|
42
|
+
injection_phrases: list[str] = field(default_factory=list)
|
|
43
|
+
injection_threshold: float = 0.80
|
|
44
|
+
token_budget_per_session: int = 20000
|
|
45
|
+
fail_closed: bool = True
|
|
46
|
+
block_message: str = "[Agent Firewall] Action blocked by policy."
|
|
47
|
+
|
|
48
|
+
def tool_allowed(self, name: str) -> bool:
|
|
49
|
+
if name in self.tool_denylist:
|
|
50
|
+
return False
|
|
51
|
+
if not self.tool_allowlist:
|
|
52
|
+
return True
|
|
53
|
+
return name in self.tool_allowlist
|
|
54
|
+
|
|
55
|
+
def host_allowed(self, host: str) -> bool:
|
|
56
|
+
host = host.lower().strip()
|
|
57
|
+
for allowed in self.egress_allowlist:
|
|
58
|
+
allowed_host = allowed.lower().strip()
|
|
59
|
+
if host == allowed_host or host.endswith("." + allowed_host):
|
|
60
|
+
return True
|
|
61
|
+
return False
|
|
62
|
+
|
|
63
|
+
def compiled_secrets(self) -> list[tuple[str, re.Pattern[str]]]:
|
|
64
|
+
return [(pattern.name, pattern.compiled()) for pattern in self.secret_patterns]
|
|
65
|
+
|
|
66
|
+
def compiled_arg_rules(self) -> list[tuple[ArgRule, re.Pattern[str]]]:
|
|
67
|
+
return [(rule, rule.compiled()) for rule in self.arg_rules]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def load_policy(path: str | Path) -> Policy:
|
|
71
|
+
raw = yaml.safe_load(Path(path).read_text(encoding="utf-8")) or {}
|
|
72
|
+
return Policy(
|
|
73
|
+
version=int(raw.get("version", 1)),
|
|
74
|
+
description=str(raw.get("description", "")),
|
|
75
|
+
tool_allowlist=list(raw.get("tool_allowlist") or []),
|
|
76
|
+
tool_denylist=list(raw.get("tool_denylist") or []),
|
|
77
|
+
egress_allowlist=list(raw.get("egress_allowlist") or []),
|
|
78
|
+
secret_patterns=[
|
|
79
|
+
SecretPattern(name=str(item["name"]), regex=str(item["regex"]))
|
|
80
|
+
for item in raw.get("secret_patterns") or []
|
|
81
|
+
],
|
|
82
|
+
arg_rules=[
|
|
83
|
+
ArgRule(
|
|
84
|
+
name=str(item["name"]),
|
|
85
|
+
reason=str(item["reason"]),
|
|
86
|
+
regex=str(item["regex"]),
|
|
87
|
+
tools=list(item.get("tools") or ["*"]),
|
|
88
|
+
)
|
|
89
|
+
for item in raw.get("arg_rules") or []
|
|
90
|
+
],
|
|
91
|
+
injection_phrases=list(raw.get("injection_phrases") or []),
|
|
92
|
+
injection_threshold=float(raw.get("injection_threshold", 0.80)),
|
|
93
|
+
token_budget_per_session=int(raw.get("token_budget_per_session", 20000)),
|
|
94
|
+
fail_closed=bool(raw.get("fail_closed", True)),
|
|
95
|
+
block_message=str(
|
|
96
|
+
raw.get("block_message", "[Agent Firewall] Action blocked by policy.")
|
|
97
|
+
),
|
|
98
|
+
)
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Any, Callable
|
|
3
|
+
|
|
4
|
+
from openai import OpenAI
|
|
5
|
+
|
|
6
|
+
from . import pii
|
|
7
|
+
from .client import FirewallOpenAI
|
|
8
|
+
from .policy import Policy, load_policy
|
|
9
|
+
from .rules import check_tool_calls
|
|
10
|
+
from .schemas import Status, ToolCall
|
|
11
|
+
|
|
12
|
+
OPENAI_COMPATIBLE_BASE_URLS: dict[str, str] = {
|
|
13
|
+
"openai": "https://api.openai.com/v1",
|
|
14
|
+
"groq": "https://api.groq.com/openai/v1",
|
|
15
|
+
"nvidia": "https://integrate.api.nvidia.com/v1",
|
|
16
|
+
"mistral": "https://api.mistral.ai/v1",
|
|
17
|
+
"together": "https://api.together.xyz/v1",
|
|
18
|
+
"fireworks": "https://api.fireworks.ai/inference/v1",
|
|
19
|
+
"perplexity": "https://api.perplexity.ai",
|
|
20
|
+
"deepseek": "https://api.deepseek.com",
|
|
21
|
+
"openrouter": "https://openrouter.ai/api/v1",
|
|
22
|
+
"local": "http://localhost:8000/v1",
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def create_openai_compatible_firewall(
|
|
27
|
+
provider: str,
|
|
28
|
+
*,
|
|
29
|
+
api_key: str | None = None,
|
|
30
|
+
policy_path: str,
|
|
31
|
+
base_url: str | None = None,
|
|
32
|
+
**client_kwargs: Any,
|
|
33
|
+
) -> FirewallOpenAI:
|
|
34
|
+
"""Create a guarded OpenAI-compatible client for Groq/NVIDIA/Mistral/Together/etc."""
|
|
35
|
+
resolved_base_url = base_url or OPENAI_COMPATIBLE_BASE_URLS.get(provider)
|
|
36
|
+
if not resolved_base_url:
|
|
37
|
+
raise ValueError(f"Unknown OpenAI-compatible provider: {provider}")
|
|
38
|
+
raw = OpenAI(api_key=api_key, base_url=resolved_base_url, **client_kwargs)
|
|
39
|
+
return FirewallOpenAI(raw, policy_path=policy_path)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _stringify_args(value: Any) -> str:
|
|
43
|
+
if isinstance(value, str):
|
|
44
|
+
return value
|
|
45
|
+
try:
|
|
46
|
+
return json.dumps(value if value is not None else {})
|
|
47
|
+
except TypeError:
|
|
48
|
+
return str(value)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _get(obj: Any, name: str, default: Any = None) -> Any:
|
|
52
|
+
if isinstance(obj, dict):
|
|
53
|
+
return obj.get(name, default)
|
|
54
|
+
return getattr(obj, name, default)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _set(obj: Any, name: str, value: Any) -> None:
|
|
58
|
+
if isinstance(obj, dict):
|
|
59
|
+
obj[name] = value
|
|
60
|
+
else:
|
|
61
|
+
setattr(obj, name, value)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _firewall_meta() -> dict[str, Any]:
|
|
65
|
+
return {
|
|
66
|
+
"action": "allow",
|
|
67
|
+
"reason": None,
|
|
68
|
+
"rule_fired": None,
|
|
69
|
+
"stripped_tool_calls": [],
|
|
70
|
+
"blocked_calls": [],
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _enforce_tool_calls(
|
|
75
|
+
calls: list[dict[str, Any]],
|
|
76
|
+
policy: Policy,
|
|
77
|
+
remove_blocked: Callable[[set[str]], None],
|
|
78
|
+
) -> dict[str, Any]:
|
|
79
|
+
meta = _firewall_meta()
|
|
80
|
+
if not calls:
|
|
81
|
+
return meta
|
|
82
|
+
|
|
83
|
+
tool_calls = [
|
|
84
|
+
ToolCall(
|
|
85
|
+
id=call["id"],
|
|
86
|
+
type="function",
|
|
87
|
+
function={"name": call["name"], "arguments": _stringify_args(call.get("arguments"))},
|
|
88
|
+
)
|
|
89
|
+
for call in calls
|
|
90
|
+
]
|
|
91
|
+
findings, _ = check_tool_calls(tool_calls, policy)
|
|
92
|
+
blocked_ids = {f.tool_call_id for f in findings if f.status is Status.BLOCK}
|
|
93
|
+
if not blocked_ids:
|
|
94
|
+
return meta
|
|
95
|
+
|
|
96
|
+
remove_blocked(blocked_ids)
|
|
97
|
+
first_blocked = next(f for f in findings if f.status is Status.BLOCK)
|
|
98
|
+
meta["action"] = "block"
|
|
99
|
+
meta["stripped_tool_calls"] = sorted(blocked_ids)
|
|
100
|
+
meta["rule_fired"] = "deterministic_rules"
|
|
101
|
+
meta["reason"] = "; ".join(first_blocked.reasons)
|
|
102
|
+
meta["blocked_calls"] = [
|
|
103
|
+
{"name": f.tool_name, "reasons": f.reasons}
|
|
104
|
+
for f in findings
|
|
105
|
+
if f.status is Status.BLOCK
|
|
106
|
+
]
|
|
107
|
+
return meta
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _redact_anthropic_messages(body: dict[str, Any], policy: Policy) -> None:
|
|
111
|
+
for msg in body.get("messages", []) or []:
|
|
112
|
+
content = msg.get("content")
|
|
113
|
+
role = msg.get("role", "user")
|
|
114
|
+
if isinstance(content, str) and content:
|
|
115
|
+
redacted, check = pii.scan_and_redact(content, policy, source=role)
|
|
116
|
+
if check.status is Status.FLAG:
|
|
117
|
+
msg["content"] = redacted
|
|
118
|
+
elif isinstance(content, list):
|
|
119
|
+
for part in content:
|
|
120
|
+
if isinstance(part, dict) and part.get("type") == "text":
|
|
121
|
+
text = part.get("text")
|
|
122
|
+
if isinstance(text, str) and text:
|
|
123
|
+
redacted, check = pii.scan_and_redact(text, policy, source=role)
|
|
124
|
+
if check.status is Status.FLAG:
|
|
125
|
+
part["text"] = redacted
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class _AnthropicMessagesWrapper:
|
|
129
|
+
def __init__(self, messages: Any, policy: Policy):
|
|
130
|
+
self._messages = messages
|
|
131
|
+
self._policy = policy
|
|
132
|
+
|
|
133
|
+
def create(self, *args: Any, **kwargs: Any) -> Any:
|
|
134
|
+
body = kwargs if kwargs else (args[0] if args and isinstance(args[0], dict) else {})
|
|
135
|
+
_redact_anthropic_messages(body, self._policy)
|
|
136
|
+
response = self._messages.create(*args, **kwargs)
|
|
137
|
+
content = _get(response, "content", []) or []
|
|
138
|
+
calls = [
|
|
139
|
+
{"id": _get(part, "id"), "name": _get(part, "name"), "arguments": _get(part, "input")}
|
|
140
|
+
for part in content
|
|
141
|
+
if _get(part, "type") == "tool_use"
|
|
142
|
+
]
|
|
143
|
+
|
|
144
|
+
def remove(blocked_ids: set[str]) -> None:
|
|
145
|
+
kept = [
|
|
146
|
+
part
|
|
147
|
+
for part in content
|
|
148
|
+
if _get(part, "type") != "tool_use" or _get(part, "id") not in blocked_ids
|
|
149
|
+
]
|
|
150
|
+
if not kept:
|
|
151
|
+
kept = [{"type": "text", "text": self._policy.block_message}]
|
|
152
|
+
_set(response, "content", kept)
|
|
153
|
+
|
|
154
|
+
_set(response, "firewall", _enforce_tool_calls(calls, self._policy, remove))
|
|
155
|
+
return response
|
|
156
|
+
|
|
157
|
+
def __getattr__(self, name: str) -> Any:
|
|
158
|
+
return getattr(self._messages, name)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class FirewallAnthropic:
|
|
162
|
+
"""Drop-in wrapper for Anthropic Claude clients using native tool_use blocks."""
|
|
163
|
+
|
|
164
|
+
def __init__(self, client: Any, policy_path: str):
|
|
165
|
+
self._client = client
|
|
166
|
+
self._policy = load_policy(policy_path)
|
|
167
|
+
self.messages = _AnthropicMessagesWrapper(client.messages, self._policy)
|
|
168
|
+
|
|
169
|
+
def __getattr__(self, name: str) -> Any:
|
|
170
|
+
return getattr(self._client, name)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class FirewallGeminiModel:
|
|
174
|
+
"""Wrapper for Gemini model objects that strips blocked function_call parts."""
|
|
175
|
+
|
|
176
|
+
def __init__(self, model: Any, policy: Policy):
|
|
177
|
+
self._model = model
|
|
178
|
+
self._policy = policy
|
|
179
|
+
|
|
180
|
+
def generate_content(self, *args: Any, **kwargs: Any) -> Any:
|
|
181
|
+
response = self._model.generate_content(*args, **kwargs)
|
|
182
|
+
candidates = _get(response, "candidates", []) or []
|
|
183
|
+
calls: list[dict[str, Any]] = []
|
|
184
|
+
for candidate_index, candidate in enumerate(candidates):
|
|
185
|
+
content = _get(candidate, "content")
|
|
186
|
+
parts = _get(content, "parts", []) if content is not None else []
|
|
187
|
+
for part_index, part in enumerate(parts):
|
|
188
|
+
fn = _get(part, "function_call") or _get(part, "functionCall")
|
|
189
|
+
if fn is not None:
|
|
190
|
+
calls.append(
|
|
191
|
+
{
|
|
192
|
+
"id": f"gemini-{candidate_index}-{part_index}",
|
|
193
|
+
"candidate_index": candidate_index,
|
|
194
|
+
"part_index": part_index,
|
|
195
|
+
"name": _get(fn, "name"),
|
|
196
|
+
"arguments": _get(fn, "args") or _get(fn, "arguments") or {},
|
|
197
|
+
}
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
def remove(blocked_ids: set[str]) -> None:
|
|
201
|
+
for call in sorted(calls, key=lambda c: c["part_index"], reverse=True):
|
|
202
|
+
if call["id"] not in blocked_ids:
|
|
203
|
+
continue
|
|
204
|
+
candidate = candidates[call["candidate_index"]]
|
|
205
|
+
parts = _get(_get(candidate, "content"), "parts", [])
|
|
206
|
+
if isinstance(parts, list):
|
|
207
|
+
parts.pop(call["part_index"])
|
|
208
|
+
if not parts:
|
|
209
|
+
parts.append({"text": self._policy.block_message})
|
|
210
|
+
|
|
211
|
+
_set(response, "firewall", _enforce_tool_calls(calls, self._policy, remove))
|
|
212
|
+
return response
|
|
213
|
+
|
|
214
|
+
def start_chat(self, *args: Any, **kwargs: Any) -> Any:
|
|
215
|
+
chat = self._model.start_chat(*args, **kwargs)
|
|
216
|
+
return FirewallGeminiChat(chat, self._policy)
|
|
217
|
+
|
|
218
|
+
def __getattr__(self, name: str) -> Any:
|
|
219
|
+
return getattr(self._model, name)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
class FirewallGeminiChat:
|
|
223
|
+
def __init__(self, chat: Any, policy: Policy):
|
|
224
|
+
self._chat = chat
|
|
225
|
+
self._policy = policy
|
|
226
|
+
|
|
227
|
+
def send_message(self, *args: Any, **kwargs: Any) -> Any:
|
|
228
|
+
response = self._chat.send_message(*args, **kwargs)
|
|
229
|
+
return FirewallGeminiModel(
|
|
230
|
+
type("_OneShotGeminiModel", (), {"generate_content": lambda *_a, **_k: response})(),
|
|
231
|
+
self._policy,
|
|
232
|
+
).generate_content()
|
|
233
|
+
|
|
234
|
+
def __getattr__(self, name: str) -> Any:
|
|
235
|
+
return getattr(self._chat, name)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
class FirewallGoogleGenerativeAI:
|
|
239
|
+
"""Drop-in wrapper for google-generativeai clients."""
|
|
240
|
+
|
|
241
|
+
def __init__(self, client: Any, policy_path: str):
|
|
242
|
+
self._client = client
|
|
243
|
+
self._policy = load_policy(policy_path)
|
|
244
|
+
|
|
245
|
+
def GenerativeModel(self, *args: Any, **kwargs: Any) -> FirewallGeminiModel:
|
|
246
|
+
return FirewallGeminiModel(self._client.GenerativeModel(*args, **kwargs), self._policy)
|
|
247
|
+
|
|
248
|
+
def __getattr__(self, name: str) -> Any:
|
|
249
|
+
return getattr(self._client, name)
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import time
|
|
5
|
+
import urllib.parse
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
|
|
8
|
+
from .policy import Policy
|
|
9
|
+
from .schemas import CheckResult, Status, ToolCall
|
|
10
|
+
|
|
11
|
+
_URL_RE = re.compile(r"(?:[a-z][a-z0-9+.\-]*:)?//[^\s\"'<>]+", re.IGNORECASE)
|
|
12
|
+
_EMAIL_RE = re.compile(r"[A-Za-z0-9._%+\-]+@([A-Za-z0-9.\-]+\.[A-Za-z]{2,})")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class ToolFinding:
|
|
17
|
+
tool_call_id: str
|
|
18
|
+
tool_name: str
|
|
19
|
+
status: Status
|
|
20
|
+
reasons: list[str] = field(default_factory=list)
|
|
21
|
+
hosts: list[str] = field(default_factory=list)
|
|
22
|
+
secrets: list[str] = field(default_factory=list)
|
|
23
|
+
arg_hits: list[str] = field(default_factory=list)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _extract_hosts(arguments: str) -> list[str]:
|
|
27
|
+
hosts: list[str] = []
|
|
28
|
+
for url in _URL_RE.findall(arguments):
|
|
29
|
+
netloc = urllib.parse.urlparse(url).netloc.lower()
|
|
30
|
+
if netloc:
|
|
31
|
+
hosts.append(netloc.split("@")[-1].split(":")[0])
|
|
32
|
+
for match in _EMAIL_RE.finditer(arguments):
|
|
33
|
+
hosts.append(match.group(1).lower())
|
|
34
|
+
return hosts
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def find_secrets(text: str, policy: Policy) -> list[str]:
|
|
38
|
+
hits: list[str] = []
|
|
39
|
+
for name, pattern in policy.compiled_secrets():
|
|
40
|
+
if pattern.search(text or ""):
|
|
41
|
+
hits.append(name)
|
|
42
|
+
return hits
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def check_arg_rules(tool: str, arguments: str, policy: Policy) -> list[str]:
|
|
46
|
+
hits: list[str] = []
|
|
47
|
+
for rule, pattern in policy.compiled_arg_rules():
|
|
48
|
+
if rule.applies_to(tool) and pattern.search(arguments or ""):
|
|
49
|
+
hits.append(rule.reason)
|
|
50
|
+
return hits
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def inspect_tool_call(tc: ToolCall, policy: Policy) -> ToolFinding:
|
|
54
|
+
name = tc.function.name or "(unnamed)"
|
|
55
|
+
args = tc.function.arguments or ""
|
|
56
|
+
finding = ToolFinding(tool_call_id=tc.id, tool_name=name, status=Status.PASS)
|
|
57
|
+
|
|
58
|
+
if not policy.tool_allowed(name):
|
|
59
|
+
finding.status = Status.BLOCK
|
|
60
|
+
verb = "denied" if name in policy.tool_denylist else "not on allowlist"
|
|
61
|
+
finding.reasons.append(f"tool '{name}' is {verb}")
|
|
62
|
+
|
|
63
|
+
hosts = _extract_hosts(args)
|
|
64
|
+
finding.hosts = hosts
|
|
65
|
+
bad_hosts = [host for host in hosts if not policy.host_allowed(host)]
|
|
66
|
+
if bad_hosts:
|
|
67
|
+
finding.status = Status.BLOCK
|
|
68
|
+
finding.reasons.append(
|
|
69
|
+
f"egress to non-allowlisted host(s): {', '.join(sorted(set(bad_hosts)))}"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
secrets = find_secrets(args, policy)
|
|
73
|
+
finding.secrets = secrets
|
|
74
|
+
if secrets:
|
|
75
|
+
finding.status = Status.BLOCK
|
|
76
|
+
finding.reasons.append(f"secret(s) in tool args: {', '.join(secrets)}")
|
|
77
|
+
|
|
78
|
+
arg_hits = check_arg_rules(name, args, policy)
|
|
79
|
+
finding.arg_hits = arg_hits
|
|
80
|
+
if arg_hits:
|
|
81
|
+
finding.status = Status.BLOCK
|
|
82
|
+
finding.reasons.extend(arg_hits)
|
|
83
|
+
|
|
84
|
+
return finding
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def check_tool_calls(
|
|
88
|
+
tool_calls: list[ToolCall], policy: Policy
|
|
89
|
+
) -> tuple[list[ToolFinding], CheckResult]:
|
|
90
|
+
started_at = time.perf_counter()
|
|
91
|
+
findings = [inspect_tool_call(tool_call, policy) for tool_call in tool_calls]
|
|
92
|
+
blocked = [finding for finding in findings if finding.status is Status.BLOCK]
|
|
93
|
+
detail = (
|
|
94
|
+
"; ".join(reason for finding in blocked for reason in finding.reasons)
|
|
95
|
+
if blocked
|
|
96
|
+
else f"{len(tool_calls)} tool call(s) within policy"
|
|
97
|
+
)
|
|
98
|
+
return findings, CheckResult(
|
|
99
|
+
name="deterministic_rules",
|
|
100
|
+
status=Status.BLOCK if blocked else Status.PASS,
|
|
101
|
+
detail=detail,
|
|
102
|
+
latency_ms=(time.perf_counter() - started_at) * 1000,
|
|
103
|
+
meta={"blocked_tools": [finding.tool_name for finding in blocked]},
|
|
104
|
+
)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Status(Enum):
|
|
9
|
+
PASS = "pass"
|
|
10
|
+
FLAG = "flag"
|
|
11
|
+
BLOCK = "block"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class FunctionCall:
|
|
16
|
+
name: str = ""
|
|
17
|
+
arguments: str = ""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class ToolCall:
|
|
22
|
+
id: str = ""
|
|
23
|
+
type: str = "function"
|
|
24
|
+
function: FunctionCall = field(default_factory=FunctionCall)
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
id: str = "",
|
|
29
|
+
type: str = "function",
|
|
30
|
+
function: FunctionCall | dict[str, Any] | None = None,
|
|
31
|
+
):
|
|
32
|
+
self.id = id
|
|
33
|
+
self.type = type
|
|
34
|
+
if isinstance(function, FunctionCall):
|
|
35
|
+
self.function = function
|
|
36
|
+
elif isinstance(function, dict):
|
|
37
|
+
self.function = FunctionCall(
|
|
38
|
+
name=str(function.get("name") or ""),
|
|
39
|
+
arguments=str(function.get("arguments") or ""),
|
|
40
|
+
)
|
|
41
|
+
else:
|
|
42
|
+
self.function = FunctionCall()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class CheckResult:
|
|
47
|
+
name: str
|
|
48
|
+
status: Status = Status.PASS
|
|
49
|
+
detail: str = ""
|
|
50
|
+
latency_ms: float = 0.0
|
|
51
|
+
meta: dict[str, Any] = field(default_factory=dict)
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: voidhack-agent-firewall
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Provider-agnostic action firewall SDK for AI agents
|
|
5
|
+
Author: VoidHack Agent Firewall
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Dinaltium/VoidHackJune26
|
|
8
|
+
Project-URL: Repository, https://github.com/Dinaltium/VoidHackJune26
|
|
9
|
+
Keywords: ai,agents,firewall,guardrails,prompt-injection,langchain,openai,anthropic,gemini
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: openai>=1.59
|
|
14
|
+
Requires-Dist: PyYAML>=6.0
|
|
15
|
+
Provides-Extra: anthropic
|
|
16
|
+
Requires-Dist: anthropic>=0.24; extra == "anthropic"
|
|
17
|
+
Provides-Extra: gemini
|
|
18
|
+
Requires-Dist: google-generativeai>=0.8; extra == "gemini"
|
|
19
|
+
Provides-Extra: langchain
|
|
20
|
+
Requires-Dist: langchain-core>=0.2; extra == "langchain"
|
|
21
|
+
Provides-Extra: all
|
|
22
|
+
Requires-Dist: anthropic>=0.24; extra == "all"
|
|
23
|
+
Requires-Dist: google-generativeai>=0.8; extra == "all"
|
|
24
|
+
Requires-Dist: langchain-core>=0.2; extra == "all"
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: pytest>=8.3; extra == "dev"
|
|
27
|
+
Dynamic: license-file
|
|
28
|
+
|
|
29
|
+
# Agent Firewall
|
|
30
|
+
|
|
31
|
+
> The operating layer that guards what AI agents actually **do**.
|
|
32
|
+
>
|
|
33
|
+
> *Guardrails check what the model says. We check what the agent does.*
|
|
34
|
+
|
|
35
|
+
**VoidHack June 2026** — Theme: *The Operating Layer of the Internet*.
|
|
36
|
+
|
|
37
|
+
A transparent, **OpenAI-compatible proxy** that sits between an AI agent and the
|
|
38
|
+
LLM/world and enforces **action-level** policy. Point your agent's `base_url` at
|
|
39
|
+
it — nothing else changes — and every tool call, egress destination, secret, and
|
|
40
|
+
token budget is checked before the agent can act.
|
|
41
|
+
|
|
42
|
+
The enforcement engine is provider-agnostic: OpenAI-compatible providers
|
|
43
|
+
(OpenAI, Groq, NVIDIA NIM, Mistral, Together, Fireworks, OpenRouter, local
|
|
44
|
+
gateways) work through the proxy or SDK wrapper, while native Claude/Anthropic
|
|
45
|
+
and Gemini adapters translate their tool-call formats into the same policy
|
|
46
|
+
checks. LangChain integrations block tool execution regardless of the model
|
|
47
|
+
provider.
|
|
48
|
+
|
|
49
|
+
## Why
|
|
50
|
+
|
|
51
|
+
Prompt injection is unsolved (OWASP LLM01:2025). Existing guardrails classify
|
|
52
|
+
*text* and approve the words — then the agent emails your secrets in the next
|
|
53
|
+
call. The firewall enforces at the layer where damage happens: **the action**.
|
|
54
|
+
|
|
55
|
+
It blocks by **stripping the disallowed `tool_call` out of the model's response
|
|
56
|
+
before the agent ever sees it** — prevention, not a warning. Default **fail-closed**.
|
|
57
|
+
|
|
58
|
+
## What it enforces
|
|
59
|
+
|
|
60
|
+
- **Tool allowlist** — default-deny; `send_email`, `run_shell`, … are blocked.
|
|
61
|
+
- **Egress allowlist** — URLs/email domains must be on the allowlist.
|
|
62
|
+
- **Injection scan** — heuristic + Meta **Prompt Guard 2** on tool results.
|
|
63
|
+
- **PII / secret redaction** — in-flight, both directions (regex; Presidio optional).
|
|
64
|
+
- **Cost guard** — per-session token budget.
|
|
65
|
+
- **Signed receipts** — every decision is HMAC-signed and auditable, streamed
|
|
66
|
+
live to a control-plane dashboard.
|
|
67
|
+
- **Safeguard reasoner** — `gpt-oss-safeguard-20b` adds an auditable explanation
|
|
68
|
+
on flagged actions (policy-following; reads `policies/policy.yaml`).
|
|
69
|
+
|
|
70
|
+
## Demo (before / after)
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
# BREACH — agent talks straight to the model; it emails data externally
|
|
74
|
+
python -m agent.run_attack --task email --direct
|
|
75
|
+
|
|
76
|
+
# SAFE — same task through the firewall; send_email is blocked, 0 exfiltration
|
|
77
|
+
python -m agent.run_attack --task email
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
The dashboard (`/`) shows decisions stream in live; **Run demo attack** replays a
|
|
81
|
+
spread of attacks through the real engine. See [docs/RUNBOOK.md](docs/RUNBOOK.md).
|
|
82
|
+
|
|
83
|
+
### Mission Control (`/mission`)
|
|
84
|
+
|
|
85
|
+
An interactive page where you hand an autonomous agent a real goal (editable task
|
|
86
|
+
+ a knowledge document you can poison), toggle the **firewall ON/OFF**, and watch
|
|
87
|
+
a **live LLM** execute step by step. Governed: the agent gets hijacked but every
|
|
88
|
+
dangerous call is stripped — *"Firewall held."* Ungoverned: the same agent
|
|
89
|
+
exfiltrates — *"Breach."* An Impact panel proves what reached the outside world.
|
|
90
|
+
|
|
91
|
+
## Stack
|
|
92
|
+
|
|
93
|
+
| Layer | Tech |
|
|
94
|
+
|-------|------|
|
|
95
|
+
| Proxy | Python 3.12 · FastAPI 0.136 · Uvicorn · httpx · Pydantic 2 |
|
|
96
|
+
| Detection | deterministic rules · Prompt Guard 2 (Groq) · regex/Presidio PII |
|
|
97
|
+
| Reasoner | gpt-oss-safeguard-20b (Groq, selective) |
|
|
98
|
+
| Store | SQLite · SQLAlchemy 2.0 · HMAC-SHA256 receipts |
|
|
99
|
+
| Dashboard | Next.js 16 · Tailwind 4 · SSE live feed |
|
|
100
|
+
| Demo agent | OpenAI SDK · llama-3.3-70b-versatile |
|
|
101
|
+
|
|
102
|
+
All models run on the **Groq free tier** — zero cost, no card.
|
|
103
|
+
|
|
104
|
+
## SDKs and provider adapters
|
|
105
|
+
|
|
106
|
+
Install from npm:
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
npm install voidhack-agent-firewall
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Install from PyPI once published:
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
pip install voidhack-agent-firewall
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
- **Python**: `FirewallOpenAI`, `FirewallAnthropic`, `FirewallGoogleGenerativeAI`,
|
|
119
|
+
`FirewallCallbackHandler`, and `create_openai_compatible_firewall`.
|
|
120
|
+
- **Node.js**: `FirewallOpenAI`, `FirewallAnthropic`,
|
|
121
|
+
`FirewallGoogleGenerativeAI`, LangChain.js callback support, and
|
|
122
|
+
`createFirewallOpenAICompatible`.
|
|
123
|
+
|
|
124
|
+
See [docs/SDK_INTEGRATION.md](docs/SDK_INTEGRATION.md) for Claude, Gemini,
|
|
125
|
+
Groq, NVIDIA, Mistral, Together, and LangChain examples.
|
|
126
|
+
|
|
127
|
+
## Layout
|
|
128
|
+
|
|
129
|
+
```
|
|
130
|
+
proxy/ FastAPI firewall (app/ + tests/)
|
|
131
|
+
dashboard/ Next.js 16 control-plane UI
|
|
132
|
+
agent/ demo victim agent + poisoned document
|
|
133
|
+
policies/ policy.yaml
|
|
134
|
+
docs/ DESIGN · ARCHITECTURE · RUNBOOK
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## Status
|
|
138
|
+
|
|
139
|
+
Working end-to-end. Backend: ruff + mypy + 34 pytest green. Dashboard: biome +
|
|
140
|
+
tsc + build + 2 Playwright e2e green. Verified live against Groq.
|
|
141
|
+
|
|
142
|
+
See [docs/DESIGN.md](docs/DESIGN.md) for the design + decision log and
|
|
143
|
+
[docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for components and data flow.
|
|
144
|
+
|
|
145
|
+
## License
|
|
146
|
+
|
|
147
|
+
MIT
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
voidhack_agent_firewall/__init__.py,sha256=OZuzSztcGA86ub2hngEFkn1u3mnDW4xWCmL4f7bnIBI,491
|
|
2
|
+
voidhack_agent_firewall/client.py,sha256=xr2QztCpnoImZ0vRH_vvLzZXctYyn8rBzs-pCA2_mOE,5086
|
|
3
|
+
voidhack_agent_firewall/langchain.py,sha256=xSOgEZaItVWZtRPoz64b3GSYWdRFqltImIOFizTgDi4,2411
|
|
4
|
+
voidhack_agent_firewall/pii.py,sha256=zuvma6_u5EHN42wVuq6PaJ_3FBptXGGZvkuDvkN3uEA,1530
|
|
5
|
+
voidhack_agent_firewall/policy.py,sha256=pVQpK6cFhWABicZDHHy-qdHMK_gsxw4JNEwjS1AcF4o,3331
|
|
6
|
+
voidhack_agent_firewall/providers.py,sha256=o0M44h6qVrircwynQ2Dp2meKTq1RlTScJoLgZQLFDX0,8948
|
|
7
|
+
voidhack_agent_firewall/rules.py,sha256=iBC3Vh6ycm5IaShpP-XvEb_BHz458J5_Ct4CR9kcta4,3495
|
|
8
|
+
voidhack_agent_firewall/schemas.py,sha256=ZDLxGaNgRejv2LG7FD56unu0qwAFtHUZU91Hy5xyggY,1156
|
|
9
|
+
voidhack_agent_firewall-1.0.0.dist-info/licenses/LICENSE,sha256=MkOSykGmBxEgaEqe0o6Rr-Gj_G0WTh0c6U5Zk-wyRQI,1075
|
|
10
|
+
voidhack_agent_firewall-1.0.0.dist-info/METADATA,sha256=3TAlRYTF0SqTGHu-8Dui_AkZrXwrpAaoE0IW4xEZuyk,5758
|
|
11
|
+
voidhack_agent_firewall-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
12
|
+
voidhack_agent_firewall-1.0.0.dist-info/top_level.txt,sha256=8p4vG-kQ_vPz6J7UcMY18io8sMKis5D0hjjj9XPojhE,24
|
|
13
|
+
voidhack_agent_firewall-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 RAFAN AHAMAD SHEIK
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
voidhack_agent_firewall
|