zu-backends 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zu_backends-0.2.0/.gitignore +66 -0
- zu_backends-0.2.0/PKG-INFO +57 -0
- zu_backends-0.2.0/README.md +34 -0
- zu_backends-0.2.0/pyproject.toml +52 -0
- zu_backends-0.2.0/src/zu_backends/__init__.py +8 -0
- zu_backends-0.2.0/src/zu_backends/egress_proxy.py +406 -0
- zu_backends-0.2.0/src/zu_backends/encryption.py +170 -0
- zu_backends-0.2.0/src/zu_backends/host_monitor.py +220 -0
- zu_backends-0.2.0/src/zu_backends/jsonl_sink.py +118 -0
- zu_backends-0.2.0/src/zu_backends/local_docker.py +511 -0
- zu_backends-0.2.0/src/zu_backends/mitm.py +108 -0
- zu_backends-0.2.0/src/zu_backends/scripted_sandbox.py +108 -0
- zu_backends-0.2.0/src/zu_backends/seccomp/redteam-audit.json +17 -0
- zu_backends-0.2.0/src/zu_backends/seccomp/redteam-block.json +24 -0
- zu_backends-0.2.0/src/zu_backends/sqlite_sink.py +192 -0
- zu_backends-0.2.0/src/zu_backends/triggers.py +95 -0
- zu_backends-0.2.0/tests/test_egress_proxy.py +170 -0
- zu_backends-0.2.0/tests/test_encryption.py +193 -0
- zu_backends-0.2.0/tests/test_host_monitor.py +122 -0
- zu_backends-0.2.0/tests/test_jsonl_sink.py +65 -0
- zu_backends-0.2.0/tests/test_local_docker.py +395 -0
- zu_backends-0.2.0/tests/test_mitm.py +118 -0
- zu_backends-0.2.0/tests/test_sqlite_sink.py +136 -0
- zu_backends-0.2.0/tests/test_triggers.py +79 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.eggs/
|
|
6
|
+
build/
|
|
7
|
+
dist/
|
|
8
|
+
|
|
9
|
+
# uv / venv
|
|
10
|
+
.venv/
|
|
11
|
+
uv.lock.bak
|
|
12
|
+
|
|
13
|
+
# Test / type caches
|
|
14
|
+
.pytest_cache/
|
|
15
|
+
.mypy_cache/
|
|
16
|
+
.ruff_cache/
|
|
17
|
+
.coverage
|
|
18
|
+
htmlcov/
|
|
19
|
+
|
|
20
|
+
# Zu runtime artifacts
|
|
21
|
+
*.db
|
|
22
|
+
zu.db
|
|
23
|
+
zu.yaml.local
|
|
24
|
+
zu_review.jsonl
|
|
25
|
+
*.review.jsonl
|
|
26
|
+
# Per-agent cost telemetry ledger — machine-local run history, not source.
|
|
27
|
+
cost.jsonl
|
|
28
|
+
# A recorded replay path is learned per-run and machine-local — regenerated on
|
|
29
|
+
# every successful run, not source. The agent ships; its track does not.
|
|
30
|
+
track.json
|
|
31
|
+
# …except the flagship example ships its track on purpose, as a demo of the
|
|
32
|
+
# record/replay convergence (committed; re-runs show as ordinary modifications).
|
|
33
|
+
!examples/agents/vet-appointment/track.json
|
|
34
|
+
|
|
35
|
+
# Editor / OS
|
|
36
|
+
.idea/
|
|
37
|
+
.vscode/
|
|
38
|
+
.DS_Store
|
|
39
|
+
|
|
40
|
+
# Claude Code local session state
|
|
41
|
+
.claude/
|
|
42
|
+
|
|
43
|
+
# Secrets
|
|
44
|
+
.env
|
|
45
|
+
.env.*
|
|
46
|
+
!.env.example
|
|
47
|
+
|
|
48
|
+
# Microsoft Office temp/lock files
|
|
49
|
+
~$*
|
|
50
|
+
|
|
51
|
+
# Internal design / strategy docs — kept local, never in the public repo
|
|
52
|
+
*.docx
|
|
53
|
+
*.pdf
|
|
54
|
+
# BUILD.md is the internal build-sequence / deferred-gaps ledger — kept local.
|
|
55
|
+
# (ARCHITECTURE.md is public: an onboarding agent needs the structural map.)
|
|
56
|
+
docs/BUILD.md
|
|
57
|
+
|
|
58
|
+
# Local secret — API key for live validation, never commit
|
|
59
|
+
zu_demo_key.md
|
|
60
|
+
*_key.md
|
|
61
|
+
|
|
62
|
+
# Local PyPI publish token — never commit
|
|
63
|
+
/pypi
|
|
64
|
+
|
|
65
|
+
# Local Discord credentials (bot token / app secrets) — never commit
|
|
66
|
+
/discord
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: zu-backends
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Zu sandbox backends and event sinks: local-docker, sqlite
|
|
5
|
+
Project-URL: Homepage, https://github.com/k3-mt/zu
|
|
6
|
+
Project-URL: Repository, https://github.com/k3-mt/zu
|
|
7
|
+
License-Expression: Apache-2.0
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
|
15
|
+
Classifier: Typing :: Typed
|
|
16
|
+
Requires-Python: >=3.11
|
|
17
|
+
Requires-Dist: zu-core==0.2.0
|
|
18
|
+
Provides-Extra: docker
|
|
19
|
+
Requires-Dist: docker>=7; extra == 'docker'
|
|
20
|
+
Provides-Extra: encryption
|
|
21
|
+
Requires-Dist: cryptography>=42; extra == 'encryption'
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
# zu-backends
|
|
25
|
+
|
|
26
|
+
Infrastructure adapters: the **`SandboxBackend`** port (provision and run a
|
|
27
|
+
tier's environment) and the **`EventSink`** port (persist and query the event
|
|
28
|
+
log). These are the durable/isolation seams the core defers behind a port.
|
|
29
|
+
|
|
30
|
+
## Registered plugins
|
|
31
|
+
|
|
32
|
+
### Sandbox backends (`zu.backends`)
|
|
33
|
+
|
|
34
|
+
| Name | Class | Notes |
|
|
35
|
+
|------|-------|-------|
|
|
36
|
+
| `local-docker` | `LocalDockerBackend` | Runs a tier's container via the local Docker daemon. Network is disabled by default and enabled only for the render tier; *scoped* egress (allowlist / DNS-pinned) is the deferred egress-policy work. Needs the `[docker]` extra. |
|
|
37
|
+
|
|
38
|
+
### Event sinks (`zu.sinks`)
|
|
39
|
+
|
|
40
|
+
| Name | Class | Notes |
|
|
41
|
+
|------|-------|-------|
|
|
42
|
+
| `sqlite` | `SqliteSink` | The durable canonical store: WAL, `synchronous=FULL`, `busy_timeout`, single writer, keyset streaming, idempotent append. |
|
|
43
|
+
| `jsonl` | `JsonlSink` | One JSON object per line — a greppable secondary/trace sink that log shippers tail. |
|
|
44
|
+
|
|
45
|
+
The payload codec seam (`zu_core.codec`) lets a sink encrypt payloads at rest;
|
|
46
|
+
an AES-256-GCM codec ships behind the optional `[encryption]` extra
|
|
47
|
+
(`zu_backends.encryption`).
|
|
48
|
+
|
|
49
|
+
## Extend
|
|
50
|
+
|
|
51
|
+
Implement the `SandboxBackend` or `EventSink` shape, register under
|
|
52
|
+
`zu.backends` / `zu.sinks`, and add a deterministic test (inject a fake Docker
|
|
53
|
+
client / use a temp DB path — no real daemon needed offline).
|
|
54
|
+
|
|
55
|
+
## Tests
|
|
56
|
+
|
|
57
|
+
`uv run pytest packages/zu-backends` — offline.
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# zu-backends
|
|
2
|
+
|
|
3
|
+
Infrastructure adapters: the **`SandboxBackend`** port (provision and run a
|
|
4
|
+
tier's environment) and the **`EventSink`** port (persist and query the event
|
|
5
|
+
log). These are the durable/isolation seams the core defers behind a port.
|
|
6
|
+
|
|
7
|
+
## Registered plugins
|
|
8
|
+
|
|
9
|
+
### Sandbox backends (`zu.backends`)
|
|
10
|
+
|
|
11
|
+
| Name | Class | Notes |
|
|
12
|
+
|------|-------|-------|
|
|
13
|
+
| `local-docker` | `LocalDockerBackend` | Runs a tier's container via the local Docker daemon. Network is disabled by default and enabled only for the render tier; *scoped* egress (allowlist / DNS-pinned) is the deferred egress-policy work. Needs the `[docker]` extra. |
|
|
14
|
+
|
|
15
|
+
### Event sinks (`zu.sinks`)
|
|
16
|
+
|
|
17
|
+
| Name | Class | Notes |
|
|
18
|
+
|------|-------|-------|
|
|
19
|
+
| `sqlite` | `SqliteSink` | The durable canonical store: WAL, `synchronous=FULL`, `busy_timeout`, single writer, keyset streaming, idempotent append. |
|
|
20
|
+
| `jsonl` | `JsonlSink` | One JSON object per line — a greppable secondary/trace sink that log shippers tail. |
|
|
21
|
+
|
|
22
|
+
The payload codec seam (`zu_core.codec`) lets a sink encrypt payloads at rest;
|
|
23
|
+
an AES-256-GCM codec ships behind the optional `[encryption]` extra
|
|
24
|
+
(`zu_backends.encryption`).
|
|
25
|
+
|
|
26
|
+
## Extend
|
|
27
|
+
|
|
28
|
+
Implement the `SandboxBackend` or `EventSink` shape, register under
|
|
29
|
+
`zu.backends` / `zu.sinks`, and add a deterministic test (inject a fake Docker
|
|
30
|
+
client / use a temp DB path — no real daemon needed offline).
|
|
31
|
+
|
|
32
|
+
## Tests
|
|
33
|
+
|
|
34
|
+
`uv run pytest packages/zu-backends` — offline.
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "zu-backends"
|
|
3
|
+
version = "0.2.0"
|
|
4
|
+
description = "Zu sandbox backends and event sinks: local-docker, sqlite"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
license = "Apache-2.0"
|
|
8
|
+
classifiers = [
|
|
9
|
+
"Development Status :: 4 - Beta",
|
|
10
|
+
"Intended Audience :: Developers",
|
|
11
|
+
"License :: OSI Approved :: Apache Software License",
|
|
12
|
+
"Programming Language :: Python :: 3",
|
|
13
|
+
"Programming Language :: Python :: 3.11",
|
|
14
|
+
"Programming Language :: Python :: 3.12",
|
|
15
|
+
"Topic :: Software Development :: Libraries :: Application Frameworks",
|
|
16
|
+
"Typing :: Typed",
|
|
17
|
+
]
|
|
18
|
+
dependencies = ["zu-core==0.2.0"]
|
|
19
|
+
|
|
20
|
+
[project.optional-dependencies]
|
|
21
|
+
docker = ["docker>=7"]
|
|
22
|
+
encryption = ["cryptography>=42"]
|
|
23
|
+
|
|
24
|
+
# The egress proxy as a sidecar container for the red-team container form
|
|
25
|
+
# (RED_TEAM_CONTAINER.md §3.1): the target's sole route off-box, streaming its
|
|
26
|
+
# connection log as JSONL on stdout.
|
|
27
|
+
[project.scripts]
|
|
28
|
+
zu-egress-proxy = "zu_backends.egress_proxy:main"
|
|
29
|
+
|
|
30
|
+
[project.entry-points."zu.backends"]
|
|
31
|
+
local-docker = "zu_backends.local_docker:LocalDockerBackend"
|
|
32
|
+
|
|
33
|
+
[project.entry-points."zu.sinks"]
|
|
34
|
+
sqlite = "zu_backends.sqlite_sink:SqliteSink"
|
|
35
|
+
jsonl = "zu_backends.jsonl_sink:JsonlSink"
|
|
36
|
+
|
|
37
|
+
[project.entry-points."zu.triggers"]
|
|
38
|
+
webhook = "zu_backends.triggers:WebhookTrigger"
|
|
39
|
+
queue = "zu_backends.triggers:QueueTrigger"
|
|
40
|
+
schedule = "zu_backends.triggers:ScheduleTrigger"
|
|
41
|
+
object-store = "zu_backends.triggers:ObjectStoreTrigger"
|
|
42
|
+
|
|
43
|
+
[project.urls]
|
|
44
|
+
Homepage = "https://github.com/k3-mt/zu"
|
|
45
|
+
Repository = "https://github.com/k3-mt/zu"
|
|
46
|
+
|
|
47
|
+
[build-system]
|
|
48
|
+
requires = ["hatchling"]
|
|
49
|
+
build-backend = "hatchling.build"
|
|
50
|
+
|
|
51
|
+
[tool.hatch.build.targets.wheel]
|
|
52
|
+
packages = ["src/zu_backends"]
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""Zu infrastructure adapters: sandbox backends and event sinks.
|
|
2
|
+
|
|
3
|
+
The SandboxBackend interface is the load-bearing proof of the backend-agnostic
|
|
4
|
+
positioning — kept clean even with the single local-docker adapter, so Modal,
|
|
5
|
+
E2B, and microVMs are later adapters, not a rewrite. The EventSink is the
|
|
6
|
+
storage seam: SQLite by default, Postgres and the hosted central log later,
|
|
7
|
+
all behind one contract.
|
|
8
|
+
"""
|
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
"""LocalEgressProxy — the real egress proxy for the red-team container form (P1).
|
|
2
|
+
|
|
3
|
+
This is the *enforcement* the in-process gate can only observe: a forward proxy
|
|
4
|
+
that is the target container's **sole route off-box**, so the verdict rests on a
|
|
5
|
+
record the target cannot author (RED_TEAM_CONTAINER.md §3.1). It does two jobs:
|
|
6
|
+
|
|
7
|
+
* **Logs** every connection the target makes — host, port, scheme, bytes — as a
|
|
8
|
+
structured record. This, not the tool's self-reported observation, is what the
|
|
9
|
+
``EgressBreach``/``ExfilBreach`` observers read in the container form.
|
|
10
|
+
* **Enforces** the declared allowlist: a CONNECT/request to a host outside the
|
|
11
|
+
allowlist (or to an internal/metadata host, ever) is **refused** and logged with
|
|
12
|
+
``allowed: false``. A benign plugin has no business reaching off-allowlist, so
|
|
13
|
+
the refused *attempt* is itself the breach.
|
|
14
|
+
|
|
15
|
+
It implements the ``EgressProxy`` port (``launch``/``connections``/``close``), so
|
|
16
|
+
``ContainerGate`` drives it exactly like the scripted stand-in — the P0 pipeline
|
|
17
|
+
becomes the P1 pipeline by swapping this in. Pure stdlib asyncio: no Docker, no
|
|
18
|
+
optional dependency, and unit-testable over loopback.
|
|
19
|
+
|
|
20
|
+
Scope note: the proxy is the only egress *path*, but the hard guarantee that a
|
|
21
|
+
tool cannot bypass it (open a raw socket directly) is the container's network
|
|
22
|
+
policy (default-DROP), configured by the ``SandboxBackend`` — not this process.
|
|
23
|
+
The proxy is where egress is *seen and allowed*; the network policy is where
|
|
24
|
+
bypass is *prevented*. Both are needed; this is the former.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import asyncio
|
|
30
|
+
import ipaddress
|
|
31
|
+
from dataclasses import dataclass
|
|
32
|
+
from typing import Any
|
|
33
|
+
from urllib.parse import urlsplit
|
|
34
|
+
|
|
35
|
+
# The egress allowlist sentinel (mirrors zu_core.ports.EGRESS_OPEN). Kept as a
|
|
36
|
+
# literal so this stdlib-only module needs no import for one constant.
|
|
37
|
+
_EGRESS_OPEN = "*"
|
|
38
|
+
_PROXY_ERROR_CODES = {"refused": b"HTTP/1.1 403 Forbidden\r\n\r\n",
|
|
39
|
+
"upstream": b"HTTP/1.1 502 Bad Gateway\r\n\r\n"}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _is_internal_host(host: str) -> bool:
|
|
43
|
+
"""A host no plugin may ever reach: loopback / private / link-local (cloud
|
|
44
|
+
metadata 169.254.169.254) or the well-known internal names. A literal IP is
|
|
45
|
+
decided structurally; a name only by the known internal spellings (we do not
|
|
46
|
+
resolve names here — that is the DNS-pin's job in the backend)."""
|
|
47
|
+
lowered = (host or "").lower()
|
|
48
|
+
if lowered in {"localhost", "metadata.google.internal"}:
|
|
49
|
+
return True
|
|
50
|
+
try:
|
|
51
|
+
ip = ipaddress.ip_address(host)
|
|
52
|
+
except ValueError:
|
|
53
|
+
return False
|
|
54
|
+
return ip.is_loopback or ip.is_private or ip.is_link_local or ip.is_reserved
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class _ProxyHandle:
|
|
59
|
+
"""Live handle to a running proxy: its address, the asyncio server, and the
|
|
60
|
+
connection log accumulated this run."""
|
|
61
|
+
|
|
62
|
+
host: str
|
|
63
|
+
port: int
|
|
64
|
+
server: asyncio.AbstractServer
|
|
65
|
+
log: list[dict]
|
|
66
|
+
allow: set[str]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dataclass
|
|
70
|
+
class LocalEgressProxy:
|
|
71
|
+
"""A CONNECT + absolute-form HTTP forward proxy that logs and allowlist-gates
|
|
72
|
+
egress. ``block_internal`` is the SSRF guard (refuse loopback/private/metadata
|
|
73
|
+
even if somehow allowlisted); disable it only in loopback tests."""
|
|
74
|
+
|
|
75
|
+
name = "local-egress-proxy"
|
|
76
|
+
bind_host: str = "127.0.0.1"
|
|
77
|
+
bind_port: int = 0 # 0 -> an ephemeral port the OS assigns
|
|
78
|
+
block_internal: bool = True
|
|
79
|
+
# Per-tunnel idle/copy bound so a wedged upstream can't hang the run forever.
|
|
80
|
+
io_timeout_s: float = 30.0
|
|
81
|
+
# P2 TLS MITM: a MitmCA enables decrypting HTTPS to record the request URL/body
|
|
82
|
+
# (so ExfilBreach can see a secret in an HTTPS query). None -> blind CONNECT
|
|
83
|
+
# tunnel (P1): the host is logged, the payload is not. ``upstream_ssl`` overrides
|
|
84
|
+
# the context used to re-originate TLS upstream (tests inject an unverified one).
|
|
85
|
+
mitm: Any = None
|
|
86
|
+
upstream_ssl: Any = None
|
|
87
|
+
# Cap on the request body captured for the exfil log (bytes).
|
|
88
|
+
body_cap: int = 65536
|
|
89
|
+
# Optional callback(entry: dict) invoked once per finished connection — the
|
|
90
|
+
# sidecar CLI uses it to stream the connection log as JSONL on stdout.
|
|
91
|
+
on_connection: Any = None
|
|
92
|
+
|
|
93
|
+
async def launch(self, spec: dict) -> _ProxyHandle:
|
|
94
|
+
"""Start the proxy for one run against the union allowlist in
|
|
95
|
+
``spec['allowlist']`` (``['*']`` permits any host). Returns a handle
|
|
96
|
+
carrying the bound ``{host, port}`` the container routes through."""
|
|
97
|
+
allow = set(spec.get("allowlist") or [])
|
|
98
|
+
log: list[dict] = []
|
|
99
|
+
|
|
100
|
+
async def on_client(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None:
|
|
101
|
+
await self._serve(reader, writer, allow, log)
|
|
102
|
+
|
|
103
|
+
server = await asyncio.start_server(on_client, self.bind_host, self.bind_port)
|
|
104
|
+
sock = server.sockets[0].getsockname()
|
|
105
|
+
return _ProxyHandle(host=sock[0], port=sock[1], server=server, log=log, allow=allow)
|
|
106
|
+
|
|
107
|
+
def connections(self, handle: _ProxyHandle) -> list[dict]:
|
|
108
|
+
return [dict(c) for c in handle.log]
|
|
109
|
+
|
|
110
|
+
async def close(self, handle: _ProxyHandle) -> None:
|
|
111
|
+
handle.server.close()
|
|
112
|
+
try:
|
|
113
|
+
await handle.server.wait_closed()
|
|
114
|
+
except Exception: # noqa: BLE001 - teardown must not raise over the result
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
# --- connection handling ---------------------------------------------
|
|
118
|
+
|
|
119
|
+
def _allowed(self, host: str, allow: set[str]) -> bool:
|
|
120
|
+
if self.block_internal and _is_internal_host(host):
|
|
121
|
+
return False
|
|
122
|
+
if _EGRESS_OPEN in allow:
|
|
123
|
+
return True
|
|
124
|
+
return host in allow
|
|
125
|
+
|
|
126
|
+
async def _serve(
|
|
127
|
+
self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter,
|
|
128
|
+
allow: set[str], log: list[dict],
|
|
129
|
+
) -> None:
|
|
130
|
+
peer = writer.get_extra_info("peername")
|
|
131
|
+
client = f"{peer[0]}:{peer[1]}" if peer else "?"
|
|
132
|
+
entry: dict | None = None
|
|
133
|
+
try:
|
|
134
|
+
header_block, request_line = await self._read_headers(reader)
|
|
135
|
+
if not request_line:
|
|
136
|
+
return
|
|
137
|
+
method, target, _, _rest = (request_line.decode("latin1") + " ").split(" ", 3)
|
|
138
|
+
method = method.upper()
|
|
139
|
+
if method == "CONNECT":
|
|
140
|
+
host, _, port_s = target.partition(":")
|
|
141
|
+
port, scheme = (int(port_s) if port_s.isdigit() else 443), "https"
|
|
142
|
+
else: # absolute-form: METHOD http://host[:port]/path HTTP/1.1
|
|
143
|
+
parts = urlsplit(target)
|
|
144
|
+
host, port = parts.hostname or "", parts.port or 80
|
|
145
|
+
scheme = parts.scheme or "http"
|
|
146
|
+
entry = {"client": client, "host": host, "port": port, "scheme": scheme,
|
|
147
|
+
"bytes_out": 0, "allowed": False}
|
|
148
|
+
log.append(entry)
|
|
149
|
+
|
|
150
|
+
if not host or not self._allowed(host, allow):
|
|
151
|
+
writer.write(_PROXY_ERROR_CODES["refused"])
|
|
152
|
+
await writer.drain()
|
|
153
|
+
return
|
|
154
|
+
entry["allowed"] = True
|
|
155
|
+
if method == "CONNECT" and self.mitm is not None:
|
|
156
|
+
await self._mitm_forward(reader, writer, host, port, entry)
|
|
157
|
+
else:
|
|
158
|
+
await self._forward(reader, writer, host, port, method, target,
|
|
159
|
+
header_block, request_line, entry)
|
|
160
|
+
except Exception: # noqa: BLE001 - a proxy hiccup is an observation, not a crash
|
|
161
|
+
try:
|
|
162
|
+
writer.write(_PROXY_ERROR_CODES["upstream"])
|
|
163
|
+
await writer.drain()
|
|
164
|
+
except Exception: # noqa: BLE001
|
|
165
|
+
pass
|
|
166
|
+
finally:
|
|
167
|
+
# Stream the finalised connection record (used by the sidecar CLI to
|
|
168
|
+
# emit one JSONL line per connection on stdout, which the host control
|
|
169
|
+
# plane reads via `docker logs`).
|
|
170
|
+
if entry is not None and self.on_connection is not None:
|
|
171
|
+
try:
|
|
172
|
+
self.on_connection(entry)
|
|
173
|
+
except Exception: # noqa: BLE001 - a logging hook must never break the proxy
|
|
174
|
+
pass
|
|
175
|
+
writer.close()
|
|
176
|
+
try:
|
|
177
|
+
await writer.wait_closed()
|
|
178
|
+
except Exception: # noqa: BLE001
|
|
179
|
+
pass
|
|
180
|
+
|
|
181
|
+
async def _read_headers(self, reader: asyncio.StreamReader) -> tuple[bytes, bytes]:
|
|
182
|
+
"""Read the request line + headers up to the blank line, returning the raw
|
|
183
|
+
block and the request line. Bounded so a client cannot stream headers
|
|
184
|
+
forever."""
|
|
185
|
+
request_line = await asyncio.wait_for(reader.readline(), self.io_timeout_s)
|
|
186
|
+
block = request_line
|
|
187
|
+
while True:
|
|
188
|
+
line = await asyncio.wait_for(reader.readline(), self.io_timeout_s)
|
|
189
|
+
block += line
|
|
190
|
+
if line in (b"\r\n", b"\n", b""):
|
|
191
|
+
break
|
|
192
|
+
return block, request_line
|
|
193
|
+
|
|
194
|
+
async def _forward(
|
|
195
|
+
self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter,
|
|
196
|
+
host: str, port: int, method: str, target: str,
|
|
197
|
+
header_block: bytes, request_line: bytes, entry: dict,
|
|
198
|
+
) -> None:
|
|
199
|
+
up_reader, up_writer = await asyncio.wait_for(
|
|
200
|
+
asyncio.open_connection(host, port), self.io_timeout_s)
|
|
201
|
+
try:
|
|
202
|
+
if method == "CONNECT":
|
|
203
|
+
writer.write(b"HTTP/1.1 200 Connection Established\r\n\r\n")
|
|
204
|
+
await writer.drain()
|
|
205
|
+
else:
|
|
206
|
+
# Rewrite absolute-form to origin-form and forward the request
|
|
207
|
+
# (headers + any body the client sends next) to the upstream.
|
|
208
|
+
parts = urlsplit(target)
|
|
209
|
+
path = parts.path or "/"
|
|
210
|
+
if parts.query:
|
|
211
|
+
path += "?" + parts.query
|
|
212
|
+
rest = header_block[len(request_line):]
|
|
213
|
+
new_line = f"{method} {path} HTTP/1.1\r\n".encode("latin1")
|
|
214
|
+
up_writer.write(new_line + rest)
|
|
215
|
+
await up_writer.drain()
|
|
216
|
+
entry["bytes_out"] += len(new_line) + len(rest)
|
|
217
|
+
await self._pump(reader, up_reader, writer, up_writer, entry)
|
|
218
|
+
finally:
|
|
219
|
+
up_writer.close()
|
|
220
|
+
try:
|
|
221
|
+
await up_writer.wait_closed()
|
|
222
|
+
except Exception: # noqa: BLE001
|
|
223
|
+
pass
|
|
224
|
+
|
|
225
|
+
def _upstream_ssl(self) -> Any:
|
|
226
|
+
import ssl
|
|
227
|
+
|
|
228
|
+
return self.upstream_ssl if self.upstream_ssl is not None else ssl.create_default_context()
|
|
229
|
+
|
|
230
|
+
async def _read_bounded_body(
|
|
231
|
+
self, reader: asyncio.StreamReader, header_block: bytes
|
|
232
|
+
) -> tuple[bytes, bytes]:
|
|
233
|
+
"""Read up to ``body_cap`` bytes of the request body so a secret smuggled
|
|
234
|
+
into a POST body — not just a query string — lands in the exfil log.
|
|
235
|
+
|
|
236
|
+
Returns ``(raw, decoded)``: ``raw`` is the on-wire bytes to forward upstream
|
|
237
|
+
verbatim; ``decoded`` is the inspectable plaintext for the log (identical to
|
|
238
|
+
``raw`` for a Content-Length body, dechunked for a chunked one). Both
|
|
239
|
+
Content-Length AND ``Transfer-Encoding: chunked`` are handled — chunked is a
|
|
240
|
+
trivial framing any HTTP client can use to evade a Content-Length-only
|
|
241
|
+
capture, which would otherwise be a gaping exfil bypass. Best-effort: a
|
|
242
|
+
short/absent body is fine."""
|
|
243
|
+
headers = header_block.lower()
|
|
244
|
+
if b"transfer-encoding:" in headers and b"chunked" in headers:
|
|
245
|
+
return await self._read_chunked_body(reader)
|
|
246
|
+
length = 0
|
|
247
|
+
for line in header_block.split(b"\r\n"):
|
|
248
|
+
if line.lower().startswith(b"content-length:"):
|
|
249
|
+
try:
|
|
250
|
+
length = int(line.split(b":", 1)[1].strip())
|
|
251
|
+
except ValueError:
|
|
252
|
+
length = 0
|
|
253
|
+
if length <= 0:
|
|
254
|
+
return b"", b""
|
|
255
|
+
try:
|
|
256
|
+
raw = await asyncio.wait_for(
|
|
257
|
+
reader.readexactly(min(length, self.body_cap)), self.io_timeout_s
|
|
258
|
+
)
|
|
259
|
+
return raw, raw
|
|
260
|
+
except (TimeoutError, asyncio.IncompleteReadError):
|
|
261
|
+
return b"", b""
|
|
262
|
+
|
|
263
|
+
async def _read_chunked_body(self, reader: asyncio.StreamReader) -> tuple[bytes, bytes]:
|
|
264
|
+
"""Read a ``Transfer-Encoding: chunked`` body up to ``body_cap``, returning
|
|
265
|
+
the raw on-wire framing (to forward) and the dechunked plaintext (to log)."""
|
|
266
|
+
raw = bytearray()
|
|
267
|
+
decoded = bytearray()
|
|
268
|
+
try:
|
|
269
|
+
while len(raw) < self.body_cap:
|
|
270
|
+
size_line = await asyncio.wait_for(reader.readline(), self.io_timeout_s)
|
|
271
|
+
if not size_line:
|
|
272
|
+
break
|
|
273
|
+
raw.extend(size_line)
|
|
274
|
+
token = size_line.split(b";", 1)[0].strip() # size, ignoring any ;ext
|
|
275
|
+
try:
|
|
276
|
+
size = int(token, 16)
|
|
277
|
+
except ValueError:
|
|
278
|
+
break
|
|
279
|
+
if size == 0:
|
|
280
|
+
raw.extend(await asyncio.wait_for(reader.readline(), self.io_timeout_s))
|
|
281
|
+
break # last chunk (and any trailing CRLF/trailers)
|
|
282
|
+
data = await asyncio.wait_for(reader.readexactly(size), self.io_timeout_s)
|
|
283
|
+
raw.extend(data)
|
|
284
|
+
decoded.extend(data[: max(0, self.body_cap - len(decoded))])
|
|
285
|
+
raw.extend(await asyncio.wait_for(reader.readexactly(2), self.io_timeout_s))
|
|
286
|
+
except (TimeoutError, asyncio.IncompleteReadError):
|
|
287
|
+
pass
|
|
288
|
+
return bytes(raw), bytes(decoded)
|
|
289
|
+
|
|
290
|
+
async def _mitm_forward(
|
|
291
|
+
self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter,
|
|
292
|
+
host: str, port: int, entry: dict,
|
|
293
|
+
) -> None:
|
|
294
|
+
"""TLS MITM (P2): become the client's TLS server with a minted leaf, read
|
|
295
|
+
the decrypted request (recording its URL/body into the connection log for
|
|
296
|
+
``ExfilBreach``), then re-originate TLS to the real upstream and pump the
|
|
297
|
+
response back. The exfil record is written BEFORE the upstream hop, so even
|
|
298
|
+
an unreachable upstream cannot hide a secret the client tried to send."""
|
|
299
|
+
writer.write(b"HTTP/1.1 200 Connection Established\r\n\r\n")
|
|
300
|
+
await writer.drain()
|
|
301
|
+
# Impersonate the upstream to the in-container client.
|
|
302
|
+
await writer.start_tls(self.mitm.leaf_context(host))
|
|
303
|
+
header_block, request_line = await self._read_headers(reader)
|
|
304
|
+
try:
|
|
305
|
+
method, path, _ = (request_line.decode("latin1") + " ").split(" ", 2)
|
|
306
|
+
except ValueError:
|
|
307
|
+
path = "/"
|
|
308
|
+
entry["url"] = f"https://{host}{path.strip()}"
|
|
309
|
+
raw_body, decoded_body = await self._read_bounded_body(reader, header_block)
|
|
310
|
+
if decoded_body:
|
|
311
|
+
entry["body"] = decoded_body.decode("latin1", "replace")[: self.body_cap]
|
|
312
|
+
entry["bytes_out"] += len(header_block) + len(raw_body)
|
|
313
|
+
# Re-originate TLS upstream and pump the response (re-encrypted to client).
|
|
314
|
+
up_reader, up_writer = await asyncio.wait_for(
|
|
315
|
+
asyncio.open_connection(host, port, ssl=self._upstream_ssl(), server_hostname=host),
|
|
316
|
+
self.io_timeout_s)
|
|
317
|
+
try:
|
|
318
|
+
up_writer.write(header_block + raw_body)
|
|
319
|
+
await up_writer.drain()
|
|
320
|
+
await self._pump(reader, up_reader, writer, up_writer, entry)
|
|
321
|
+
finally:
|
|
322
|
+
up_writer.close()
|
|
323
|
+
try:
|
|
324
|
+
await up_writer.wait_closed()
|
|
325
|
+
except Exception: # noqa: BLE001
|
|
326
|
+
pass
|
|
327
|
+
|
|
328
|
+
async def _pump(
|
|
329
|
+
self, c_reader: asyncio.StreamReader, u_reader: asyncio.StreamReader,
|
|
330
|
+
c_writer: asyncio.StreamWriter, u_writer: asyncio.StreamWriter, entry: dict,
|
|
331
|
+
) -> None:
|
|
332
|
+
async def copy(src: asyncio.StreamReader, dst: asyncio.StreamWriter, count: bool) -> None:
|
|
333
|
+
try:
|
|
334
|
+
while True:
|
|
335
|
+
chunk = await src.read(65536)
|
|
336
|
+
if not chunk:
|
|
337
|
+
break
|
|
338
|
+
if count:
|
|
339
|
+
entry["bytes_out"] += len(chunk)
|
|
340
|
+
dst.write(chunk)
|
|
341
|
+
await dst.drain()
|
|
342
|
+
except Exception: # noqa: BLE001 - either side closing ends the copy
|
|
343
|
+
pass
|
|
344
|
+
finally:
|
|
345
|
+
try:
|
|
346
|
+
dst.write_eof()
|
|
347
|
+
except Exception: # noqa: BLE001
|
|
348
|
+
pass
|
|
349
|
+
|
|
350
|
+
await asyncio.wait(
|
|
351
|
+
{asyncio.create_task(copy(c_reader, u_writer, True)),
|
|
352
|
+
asyncio.create_task(copy(u_reader, c_writer, False))},
|
|
353
|
+
timeout=self.io_timeout_s,
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def main(argv: list[str] | None = None) -> int:
|
|
358
|
+
"""``zu-egress-proxy`` — run the proxy as a sidecar container, the target's sole
|
|
359
|
+
route off-box (RED_TEAM_CONTAINER.md §3.1). Each finished connection is printed
|
|
360
|
+
as one JSONL line on stdout, which the host control plane reads via
|
|
361
|
+
``docker logs``. Config via env:
|
|
362
|
+
|
|
363
|
+
ZU_EGRESS_ALLOWLIST comma-separated hosts (``*`` = open) [default ``*``]
|
|
364
|
+
ZU_EGRESS_BIND bind address [default 0.0.0.0]
|
|
365
|
+
ZU_EGRESS_PORT bind port [default 8080]
|
|
366
|
+
ZU_EGRESS_MITM ``1`` -> TLS MITM (decrypt HTTPS to log URL/body) [off]
|
|
367
|
+
ZU_EGRESS_CA_OUT path to write the per-run CA cert PEM (so the target
|
|
368
|
+
can trust it); only used when MITM is on
|
|
369
|
+
"""
|
|
370
|
+
import json
|
|
371
|
+
import os
|
|
372
|
+
|
|
373
|
+
allow = [h for h in (os.environ.get("ZU_EGRESS_ALLOWLIST", "*")).split(",") if h]
|
|
374
|
+
bind = os.environ.get("ZU_EGRESS_BIND", "0.0.0.0")
|
|
375
|
+
port = int(os.environ.get("ZU_EGRESS_PORT", "8080"))
|
|
376
|
+
|
|
377
|
+
mitm = None
|
|
378
|
+
if os.environ.get("ZU_EGRESS_MITM") == "1":
|
|
379
|
+
from .mitm import MitmCA
|
|
380
|
+
|
|
381
|
+
mitm = MitmCA()
|
|
382
|
+
ca_out = os.environ.get("ZU_EGRESS_CA_OUT")
|
|
383
|
+
if ca_out:
|
|
384
|
+
with open(ca_out, "wb") as fh:
|
|
385
|
+
fh.write(mitm.ca_cert_pem())
|
|
386
|
+
|
|
387
|
+
def emit(entry: dict) -> None:
|
|
388
|
+
print(json.dumps(entry), flush=True)
|
|
389
|
+
|
|
390
|
+
proxy = LocalEgressProxy(bind_host=bind, bind_port=port, on_connection=emit, mitm=mitm)
|
|
391
|
+
|
|
392
|
+
async def serve() -> None:
|
|
393
|
+
await proxy.launch({"allowlist": allow})
|
|
394
|
+
print(json.dumps({"event": "proxy.ready", "bind": bind, "port": port,
|
|
395
|
+
"allowlist": allow, "mitm": mitm is not None}), flush=True)
|
|
396
|
+
await asyncio.Event().wait() # run until the container is stopped
|
|
397
|
+
|
|
398
|
+
try:
|
|
399
|
+
asyncio.run(serve())
|
|
400
|
+
except KeyboardInterrupt: # pragma: no cover - container stop
|
|
401
|
+
pass
|
|
402
|
+
return 0
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
if __name__ == "__main__": # pragma: no cover - module CLI entry
|
|
406
|
+
raise SystemExit(main())
|