zu-backends 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ """Zu infrastructure adapters: sandbox backends and event sinks.
2
+
3
+ The SandboxBackend interface is the load-bearing proof of the backend-agnostic
4
+ positioning — kept clean even with the single local-docker adapter, so Modal,
5
+ E2B, and microVMs are later adapters, not a rewrite. The EventSink is the
6
+ storage seam: SQLite by default, Postgres and the hosted central log later,
7
+ all behind one contract.
8
+ """
@@ -0,0 +1,406 @@
1
+ """LocalEgressProxy — the real egress proxy for the red-team container form (P1).
2
+
3
+ This is the *enforcement* the in-process gate can only observe: a forward proxy
4
+ that is the target container's **sole route off-box**, so the verdict rests on a
5
+ record the target cannot author (RED_TEAM_CONTAINER.md §3.1). It does two jobs:
6
+
7
+ * **Logs** every connection the target makes — host, port, scheme, bytes — as a
8
+ structured record. This, not the tool's self-reported observation, is what the
9
+ ``EgressBreach``/``ExfilBreach`` observers read in the container form.
10
+ * **Enforces** the declared allowlist: a CONNECT/request to a host outside the
11
+ allowlist (or to an internal/metadata host, ever) is **refused** and logged with
12
+ ``allowed: false``. A benign plugin has no business reaching off-allowlist, so
13
+ the refused *attempt* is itself the breach.
14
+
15
+ It implements the ``EgressProxy`` port (``launch``/``connections``/``close``), so
16
+ ``ContainerGate`` drives it exactly like the scripted stand-in — the P0 pipeline
17
+ becomes the P1 pipeline by swapping this in. Pure stdlib asyncio: no Docker, no
18
+ optional dependency, and unit-testable over loopback.
19
+
20
+ Scope note: the proxy is the only egress *path*, but the hard guarantee that a
21
+ tool cannot bypass it (open a raw socket directly) is the container's network
22
+ policy (default-DROP), configured by the ``SandboxBackend`` — not this process.
23
+ The proxy is where egress is *seen and allowed*; the network policy is where
24
+ bypass is *prevented*. Both are needed; this is the former.
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import asyncio
30
+ import ipaddress
31
+ from dataclasses import dataclass
32
+ from typing import Any
33
+ from urllib.parse import urlsplit
34
+
35
+ # The egress allowlist sentinel (mirrors zu_core.ports.EGRESS_OPEN). Kept as a
36
+ # literal so this stdlib-only module needs no import for one constant.
37
+ _EGRESS_OPEN = "*"
38
+ _PROXY_ERROR_CODES = {"refused": b"HTTP/1.1 403 Forbidden\r\n\r\n",
39
+ "upstream": b"HTTP/1.1 502 Bad Gateway\r\n\r\n"}
40
+
41
+
42
+ def _is_internal_host(host: str) -> bool:
43
+ """A host no plugin may ever reach: loopback / private / link-local (cloud
44
+ metadata 169.254.169.254) or the well-known internal names. A literal IP is
45
+ decided structurally; a name only by the known internal spellings (we do not
46
+ resolve names here — that is the DNS-pin's job in the backend)."""
47
+ lowered = (host or "").lower()
48
+ if lowered in {"localhost", "metadata.google.internal"}:
49
+ return True
50
+ try:
51
+ ip = ipaddress.ip_address(host)
52
+ except ValueError:
53
+ return False
54
+ return ip.is_loopback or ip.is_private or ip.is_link_local or ip.is_reserved
55
+
56
+
57
+ @dataclass
58
+ class _ProxyHandle:
59
+ """Live handle to a running proxy: its address, the asyncio server, and the
60
+ connection log accumulated this run."""
61
+
62
+ host: str
63
+ port: int
64
+ server: asyncio.AbstractServer
65
+ log: list[dict]
66
+ allow: set[str]
67
+
68
+
69
+ @dataclass
70
+ class LocalEgressProxy:
71
+ """A CONNECT + absolute-form HTTP forward proxy that logs and allowlist-gates
72
+ egress. ``block_internal`` is the SSRF guard (refuse loopback/private/metadata
73
+ even if somehow allowlisted); disable it only in loopback tests."""
74
+
75
+ name = "local-egress-proxy"
76
+ bind_host: str = "127.0.0.1"
77
+ bind_port: int = 0 # 0 -> an ephemeral port the OS assigns
78
+ block_internal: bool = True
79
+ # Per-tunnel idle/copy bound so a wedged upstream can't hang the run forever.
80
+ io_timeout_s: float = 30.0
81
+ # P2 TLS MITM: a MitmCA enables decrypting HTTPS to record the request URL/body
82
+ # (so ExfilBreach can see a secret in an HTTPS query). None -> blind CONNECT
83
+ # tunnel (P1): the host is logged, the payload is not. ``upstream_ssl`` overrides
84
+ # the context used to re-originate TLS upstream (tests inject an unverified one).
85
+ mitm: Any = None
86
+ upstream_ssl: Any = None
87
+ # Cap on the request body captured for the exfil log (bytes).
88
+ body_cap: int = 65536
89
+ # Optional callback(entry: dict) invoked once per finished connection — the
90
+ # sidecar CLI uses it to stream the connection log as JSONL on stdout.
91
+ on_connection: Any = None
92
+
93
+ async def launch(self, spec: dict) -> _ProxyHandle:
94
+ """Start the proxy for one run against the union allowlist in
95
+ ``spec['allowlist']`` (``['*']`` permits any host). Returns a handle
96
+ carrying the bound ``{host, port}`` the container routes through."""
97
+ allow = set(spec.get("allowlist") or [])
98
+ log: list[dict] = []
99
+
100
+ async def on_client(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None:
101
+ await self._serve(reader, writer, allow, log)
102
+
103
+ server = await asyncio.start_server(on_client, self.bind_host, self.bind_port)
104
+ sock = server.sockets[0].getsockname()
105
+ return _ProxyHandle(host=sock[0], port=sock[1], server=server, log=log, allow=allow)
106
+
107
+ def connections(self, handle: _ProxyHandle) -> list[dict]:
108
+ return [dict(c) for c in handle.log]
109
+
110
+ async def close(self, handle: _ProxyHandle) -> None:
111
+ handle.server.close()
112
+ try:
113
+ await handle.server.wait_closed()
114
+ except Exception: # noqa: BLE001 - teardown must not raise over the result
115
+ pass
116
+
117
+ # --- connection handling ---------------------------------------------
118
+
119
+ def _allowed(self, host: str, allow: set[str]) -> bool:
120
+ if self.block_internal and _is_internal_host(host):
121
+ return False
122
+ if _EGRESS_OPEN in allow:
123
+ return True
124
+ return host in allow
125
+
126
+ async def _serve(
127
+ self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter,
128
+ allow: set[str], log: list[dict],
129
+ ) -> None:
130
+ peer = writer.get_extra_info("peername")
131
+ client = f"{peer[0]}:{peer[1]}" if peer else "?"
132
+ entry: dict | None = None
133
+ try:
134
+ header_block, request_line = await self._read_headers(reader)
135
+ if not request_line:
136
+ return
137
+ method, target, _, _rest = (request_line.decode("latin1") + " ").split(" ", 3)
138
+ method = method.upper()
139
+ if method == "CONNECT":
140
+ host, _, port_s = target.partition(":")
141
+ port, scheme = (int(port_s) if port_s.isdigit() else 443), "https"
142
+ else: # absolute-form: METHOD http://host[:port]/path HTTP/1.1
143
+ parts = urlsplit(target)
144
+ host, port = parts.hostname or "", parts.port or 80
145
+ scheme = parts.scheme or "http"
146
+ entry = {"client": client, "host": host, "port": port, "scheme": scheme,
147
+ "bytes_out": 0, "allowed": False}
148
+ log.append(entry)
149
+
150
+ if not host or not self._allowed(host, allow):
151
+ writer.write(_PROXY_ERROR_CODES["refused"])
152
+ await writer.drain()
153
+ return
154
+ entry["allowed"] = True
155
+ if method == "CONNECT" and self.mitm is not None:
156
+ await self._mitm_forward(reader, writer, host, port, entry)
157
+ else:
158
+ await self._forward(reader, writer, host, port, method, target,
159
+ header_block, request_line, entry)
160
+ except Exception: # noqa: BLE001 - a proxy hiccup is an observation, not a crash
161
+ try:
162
+ writer.write(_PROXY_ERROR_CODES["upstream"])
163
+ await writer.drain()
164
+ except Exception: # noqa: BLE001
165
+ pass
166
+ finally:
167
+ # Stream the finalised connection record (used by the sidecar CLI to
168
+ # emit one JSONL line per connection on stdout, which the host control
169
+ # plane reads via `docker logs`).
170
+ if entry is not None and self.on_connection is not None:
171
+ try:
172
+ self.on_connection(entry)
173
+ except Exception: # noqa: BLE001 - a logging hook must never break the proxy
174
+ pass
175
+ writer.close()
176
+ try:
177
+ await writer.wait_closed()
178
+ except Exception: # noqa: BLE001
179
+ pass
180
+
181
+ async def _read_headers(self, reader: asyncio.StreamReader) -> tuple[bytes, bytes]:
182
+ """Read the request line + headers up to the blank line, returning the raw
183
+ block and the request line. Bounded so a client cannot stream headers
184
+ forever."""
185
+ request_line = await asyncio.wait_for(reader.readline(), self.io_timeout_s)
186
+ block = request_line
187
+ while True:
188
+ line = await asyncio.wait_for(reader.readline(), self.io_timeout_s)
189
+ block += line
190
+ if line in (b"\r\n", b"\n", b""):
191
+ break
192
+ return block, request_line
193
+
194
+ async def _forward(
195
+ self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter,
196
+ host: str, port: int, method: str, target: str,
197
+ header_block: bytes, request_line: bytes, entry: dict,
198
+ ) -> None:
199
+ up_reader, up_writer = await asyncio.wait_for(
200
+ asyncio.open_connection(host, port), self.io_timeout_s)
201
+ try:
202
+ if method == "CONNECT":
203
+ writer.write(b"HTTP/1.1 200 Connection Established\r\n\r\n")
204
+ await writer.drain()
205
+ else:
206
+ # Rewrite absolute-form to origin-form and forward the request
207
+ # (headers + any body the client sends next) to the upstream.
208
+ parts = urlsplit(target)
209
+ path = parts.path or "/"
210
+ if parts.query:
211
+ path += "?" + parts.query
212
+ rest = header_block[len(request_line):]
213
+ new_line = f"{method} {path} HTTP/1.1\r\n".encode("latin1")
214
+ up_writer.write(new_line + rest)
215
+ await up_writer.drain()
216
+ entry["bytes_out"] += len(new_line) + len(rest)
217
+ await self._pump(reader, up_reader, writer, up_writer, entry)
218
+ finally:
219
+ up_writer.close()
220
+ try:
221
+ await up_writer.wait_closed()
222
+ except Exception: # noqa: BLE001
223
+ pass
224
+
225
+ def _upstream_ssl(self) -> Any:
226
+ import ssl
227
+
228
+ return self.upstream_ssl if self.upstream_ssl is not None else ssl.create_default_context()
229
+
230
+ async def _read_bounded_body(
231
+ self, reader: asyncio.StreamReader, header_block: bytes
232
+ ) -> tuple[bytes, bytes]:
233
+ """Read up to ``body_cap`` bytes of the request body so a secret smuggled
234
+ into a POST body — not just a query string — lands in the exfil log.
235
+
236
+ Returns ``(raw, decoded)``: ``raw`` is the on-wire bytes to forward upstream
237
+ verbatim; ``decoded`` is the inspectable plaintext for the log (identical to
238
+ ``raw`` for a Content-Length body, dechunked for a chunked one). Both
239
+ Content-Length AND ``Transfer-Encoding: chunked`` are handled — chunked is a
240
+ trivial framing any HTTP client can use to evade a Content-Length-only
241
+ capture, which would otherwise be a gaping exfil bypass. Best-effort: a
242
+ short/absent body is fine."""
243
+ headers = header_block.lower()
244
+ if b"transfer-encoding:" in headers and b"chunked" in headers:
245
+ return await self._read_chunked_body(reader)
246
+ length = 0
247
+ for line in header_block.split(b"\r\n"):
248
+ if line.lower().startswith(b"content-length:"):
249
+ try:
250
+ length = int(line.split(b":", 1)[1].strip())
251
+ except ValueError:
252
+ length = 0
253
+ if length <= 0:
254
+ return b"", b""
255
+ try:
256
+ raw = await asyncio.wait_for(
257
+ reader.readexactly(min(length, self.body_cap)), self.io_timeout_s
258
+ )
259
+ return raw, raw
260
+ except (TimeoutError, asyncio.IncompleteReadError):
261
+ return b"", b""
262
+
263
+ async def _read_chunked_body(self, reader: asyncio.StreamReader) -> tuple[bytes, bytes]:
264
+ """Read a ``Transfer-Encoding: chunked`` body up to ``body_cap``, returning
265
+ the raw on-wire framing (to forward) and the dechunked plaintext (to log)."""
266
+ raw = bytearray()
267
+ decoded = bytearray()
268
+ try:
269
+ while len(raw) < self.body_cap:
270
+ size_line = await asyncio.wait_for(reader.readline(), self.io_timeout_s)
271
+ if not size_line:
272
+ break
273
+ raw.extend(size_line)
274
+ token = size_line.split(b";", 1)[0].strip() # size, ignoring any ;ext
275
+ try:
276
+ size = int(token, 16)
277
+ except ValueError:
278
+ break
279
+ if size == 0:
280
+ raw.extend(await asyncio.wait_for(reader.readline(), self.io_timeout_s))
281
+ break # last chunk (and any trailing CRLF/trailers)
282
+ data = await asyncio.wait_for(reader.readexactly(size), self.io_timeout_s)
283
+ raw.extend(data)
284
+ decoded.extend(data[: max(0, self.body_cap - len(decoded))])
285
+ raw.extend(await asyncio.wait_for(reader.readexactly(2), self.io_timeout_s))
286
+ except (TimeoutError, asyncio.IncompleteReadError):
287
+ pass
288
+ return bytes(raw), bytes(decoded)
289
+
290
+ async def _mitm_forward(
291
+ self, reader: asyncio.StreamReader, writer: asyncio.StreamWriter,
292
+ host: str, port: int, entry: dict,
293
+ ) -> None:
294
+ """TLS MITM (P2): become the client's TLS server with a minted leaf, read
295
+ the decrypted request (recording its URL/body into the connection log for
296
+ ``ExfilBreach``), then re-originate TLS to the real upstream and pump the
297
+ response back. The exfil record is written BEFORE the upstream hop, so even
298
+ an unreachable upstream cannot hide a secret the client tried to send."""
299
+ writer.write(b"HTTP/1.1 200 Connection Established\r\n\r\n")
300
+ await writer.drain()
301
+ # Impersonate the upstream to the in-container client.
302
+ await writer.start_tls(self.mitm.leaf_context(host))
303
+ header_block, request_line = await self._read_headers(reader)
304
+ try:
305
+ method, path, _ = (request_line.decode("latin1") + " ").split(" ", 2)
306
+ except ValueError:
307
+ path = "/"
308
+ entry["url"] = f"https://{host}{path.strip()}"
309
+ raw_body, decoded_body = await self._read_bounded_body(reader, header_block)
310
+ if decoded_body:
311
+ entry["body"] = decoded_body.decode("latin1", "replace")[: self.body_cap]
312
+ entry["bytes_out"] += len(header_block) + len(raw_body)
313
+ # Re-originate TLS upstream and pump the response (re-encrypted to client).
314
+ up_reader, up_writer = await asyncio.wait_for(
315
+ asyncio.open_connection(host, port, ssl=self._upstream_ssl(), server_hostname=host),
316
+ self.io_timeout_s)
317
+ try:
318
+ up_writer.write(header_block + raw_body)
319
+ await up_writer.drain()
320
+ await self._pump(reader, up_reader, writer, up_writer, entry)
321
+ finally:
322
+ up_writer.close()
323
+ try:
324
+ await up_writer.wait_closed()
325
+ except Exception: # noqa: BLE001
326
+ pass
327
+
328
+ async def _pump(
329
+ self, c_reader: asyncio.StreamReader, u_reader: asyncio.StreamReader,
330
+ c_writer: asyncio.StreamWriter, u_writer: asyncio.StreamWriter, entry: dict,
331
+ ) -> None:
332
+ async def copy(src: asyncio.StreamReader, dst: asyncio.StreamWriter, count: bool) -> None:
333
+ try:
334
+ while True:
335
+ chunk = await src.read(65536)
336
+ if not chunk:
337
+ break
338
+ if count:
339
+ entry["bytes_out"] += len(chunk)
340
+ dst.write(chunk)
341
+ await dst.drain()
342
+ except Exception: # noqa: BLE001 - either side closing ends the copy
343
+ pass
344
+ finally:
345
+ try:
346
+ dst.write_eof()
347
+ except Exception: # noqa: BLE001
348
+ pass
349
+
350
+ await asyncio.wait(
351
+ {asyncio.create_task(copy(c_reader, u_writer, True)),
352
+ asyncio.create_task(copy(u_reader, c_writer, False))},
353
+ timeout=self.io_timeout_s,
354
+ )
355
+
356
+
357
+ def main(argv: list[str] | None = None) -> int:
358
+ """``zu-egress-proxy`` — run the proxy as a sidecar container, the target's sole
359
+ route off-box (RED_TEAM_CONTAINER.md §3.1). Each finished connection is printed
360
+ as one JSONL line on stdout, which the host control plane reads via
361
+ ``docker logs``. Config via env:
362
+
363
+ ZU_EGRESS_ALLOWLIST comma-separated hosts (``*`` = open) [default ``*``]
364
+ ZU_EGRESS_BIND bind address [default 0.0.0.0]
365
+ ZU_EGRESS_PORT bind port [default 8080]
366
+ ZU_EGRESS_MITM ``1`` -> TLS MITM (decrypt HTTPS to log URL/body) [off]
367
+ ZU_EGRESS_CA_OUT path to write the per-run CA cert PEM (so the target
368
+ can trust it); only used when MITM is on
369
+ """
370
+ import json
371
+ import os
372
+
373
+ allow = [h for h in (os.environ.get("ZU_EGRESS_ALLOWLIST", "*")).split(",") if h]
374
+ bind = os.environ.get("ZU_EGRESS_BIND", "0.0.0.0")
375
+ port = int(os.environ.get("ZU_EGRESS_PORT", "8080"))
376
+
377
+ mitm = None
378
+ if os.environ.get("ZU_EGRESS_MITM") == "1":
379
+ from .mitm import MitmCA
380
+
381
+ mitm = MitmCA()
382
+ ca_out = os.environ.get("ZU_EGRESS_CA_OUT")
383
+ if ca_out:
384
+ with open(ca_out, "wb") as fh:
385
+ fh.write(mitm.ca_cert_pem())
386
+
387
+ def emit(entry: dict) -> None:
388
+ print(json.dumps(entry), flush=True)
389
+
390
+ proxy = LocalEgressProxy(bind_host=bind, bind_port=port, on_connection=emit, mitm=mitm)
391
+
392
+ async def serve() -> None:
393
+ await proxy.launch({"allowlist": allow})
394
+ print(json.dumps({"event": "proxy.ready", "bind": bind, "port": port,
395
+ "allowlist": allow, "mitm": mitm is not None}), flush=True)
396
+ await asyncio.Event().wait() # run until the container is stopped
397
+
398
+ try:
399
+ asyncio.run(serve())
400
+ except KeyboardInterrupt: # pragma: no cover - container stop
401
+ pass
402
+ return 0
403
+
404
+
405
+ if __name__ == "__main__": # pragma: no cover - module CLI entry
406
+ raise SystemExit(main())
@@ -0,0 +1,170 @@
1
+ """AES-256-GCM payload codecs — encryption-at-rest behind the optional extra.
2
+
3
+ Install with ``zu-backends[encryption]`` (pulls in ``cryptography``). Pass a
4
+ codec to a durable sink to encrypt event payloads at rest. Two codecs ship:
5
+
6
+ * ``AesGcmCodec`` (version 1) — a single 32-byte key. The simplest form.
7
+ * ``ManagedAesGcmCodec`` (version 2) — keys come from a :class:`KeyProvider`, so
8
+ keys can **rotate** and be sourced from a KMS of the deployment's choice. Each
9
+ blob records the **key id** it was written under, so old rows keep decrypting
10
+ after a rotation. This is the recommended form for a regulated deployment::
11
+
12
+ from zu_backends.sqlite_sink import SqliteSink
13
+ from zu_backends.encryption import ManagedAesGcmCodec
14
+ sink = SqliteSink("zu.db", codec=ManagedAesGcmCodec.from_env())
15
+
16
+ Blob layout — v1: ``[ver=1][nonce][ct+tag]``; v2: ``[ver=2][kid_len][kid][nonce]
17
+ [ct+tag]``. The associated data (AAD) binds the row's indexed columns (event_id,
18
+ trace_id, task_id, type, source), so tampering with any plaintext index column
19
+ makes the row fail to decrypt — it cannot be silently edited to hide a record
20
+ from a filter. Only the payload is encrypted; the index columns stay plaintext
21
+ so the log remains queryable.
22
+
23
+ Key management: ``AesGcmCodec`` takes raw key bytes; ``ManagedAesGcmCodec`` takes
24
+ a ``KeyProvider`` (default: :class:`EnvKeyProvider`). The **KMS choice is the
25
+ deployment's** — implement ``KeyProvider`` against AWS KMS / GCP KMS / Vault and
26
+ pass it in; nothing here is baked to a vendor.
27
+ """
28
+
29
+ from __future__ import annotations
30
+
31
+ import os
32
+
33
+ from cryptography.hazmat.primitives.ciphers.aead import AESGCM
34
+
35
+ from zu_core.codec import KeyProvider
36
+
37
+ _NONCE_LEN = 12
38
+ _KEY_LEN = 32 # AES-256
39
+
40
+
41
+ class AesGcmCodec:
42
+ version = 1
43
+
44
+ def __init__(self, key: bytes) -> None:
45
+ if len(key) != _KEY_LEN:
46
+ raise ValueError(f"AES-256-GCM needs a {_KEY_LEN}-byte key, got {len(key)}")
47
+ self._aes = AESGCM(key)
48
+
49
+ @classmethod
50
+ def from_env(cls, var: str = "ZU_EVENT_KEY") -> AesGcmCodec:
51
+ """Build from a base64/hex 32-byte key in the environment."""
52
+ raw = os.environ.get(var)
53
+ if not raw:
54
+ raise RuntimeError(
55
+ f"{var} is not set; provide a 32-byte key (hex or base64) to "
56
+ "enable encryption-at-rest, or use the default plaintext codec."
57
+ )
58
+ key = _decode_key(raw)
59
+ return cls(key)
60
+
61
+ def encode_body(self, plaintext: str, aad: bytes) -> bytes:
62
+ nonce = os.urandom(_NONCE_LEN)
63
+ ct = self._aes.encrypt(nonce, plaintext.encode("utf-8"), aad)
64
+ return nonce + ct
65
+
66
+ def decode_body(self, body: bytes, aad: bytes) -> str:
67
+ nonce, ct = body[:_NONCE_LEN], body[_NONCE_LEN:]
68
+ return self._aes.decrypt(nonce, ct, aad).decode("utf-8")
69
+
70
+
71
+ class EnvKeyProvider:
72
+ """A :class:`KeyProvider` that reads keys from the environment — the default,
73
+ zero-infrastructure managed-key source, and a model to copy for a real KMS.
74
+
75
+ Keys live in ``ZU_EVENT_KEY_<id>`` env vars (hex or base64, 32 bytes), and
76
+ ``ZU_EVENT_KEY_ID`` names the current one for new writes. To rotate: add a new
77
+ key under a new id, point ``ZU_EVENT_KEY_ID`` at it, and keep the old vars so
78
+ existing rows still decrypt. For back-compat, the bare ``ZU_EVENT_KEY`` is the
79
+ key for id ``"default"`` (the id used when ``ZU_EVENT_KEY_ID`` is unset)."""
80
+
81
+ _PREFIX = "ZU_EVENT_KEY_"
82
+ _LEGACY = "ZU_EVENT_KEY"
83
+ _DEFAULT_ID = "default"
84
+
85
+ def __init__(self, current_key_id: str | None = None) -> None:
86
+ self._current = current_key_id or os.environ.get("ZU_EVENT_KEY_ID", self._DEFAULT_ID)
87
+
88
+ @classmethod
89
+ def from_env(cls) -> EnvKeyProvider:
90
+ return cls()
91
+
92
+ @property
93
+ def current_key_id(self) -> str:
94
+ return self._current
95
+
96
+ def key(self, key_id: str) -> bytes:
97
+ raw = os.environ.get(self._PREFIX + key_id)
98
+ if raw is None and key_id == self._DEFAULT_ID:
99
+ raw = os.environ.get(self._LEGACY) # back-compat: bare ZU_EVENT_KEY
100
+ if not raw:
101
+ raise RuntimeError(
102
+ f"no key for id {key_id!r}: set {self._PREFIX}{key_id} (a 32-byte "
103
+ "hex/base64 key). After rotating, keep old keys so old rows decrypt."
104
+ )
105
+ return _decode_key(raw)
106
+
107
+
108
+ class ManagedAesGcmCodec:
109
+ """AES-256-GCM keyed by a :class:`KeyProvider`, with the key id embedded per
110
+ blob so keys rotate without losing readability of older rows."""
111
+
112
+ version = 2
113
+
114
+ def __init__(self, key_provider: KeyProvider) -> None:
115
+ self._kp = key_provider
116
+
117
+ @classmethod
118
+ def from_env(cls) -> ManagedAesGcmCodec:
119
+ return cls(EnvKeyProvider.from_env())
120
+
121
+ def _aes(self, key_id: str) -> AESGCM:
122
+ key = self._kp.key(key_id)
123
+ if len(key) != _KEY_LEN:
124
+ raise ValueError(f"AES-256-GCM needs a {_KEY_LEN}-byte key, got {len(key)}")
125
+ return AESGCM(key)
126
+
127
+ def encode_body(self, plaintext: str, aad: bytes) -> bytes:
128
+ kid = self._kp.current_key_id
129
+ kid_b = kid.encode("utf-8")
130
+ if not 0 < len(kid_b) <= 255:
131
+ raise ValueError(f"key id must be 1..255 UTF-8 bytes, got {len(kid_b)}")
132
+ nonce = os.urandom(_NONCE_LEN)
133
+ ct = self._aes(kid).encrypt(nonce, plaintext.encode("utf-8"), _bind_kid(aad, kid_b))
134
+ return bytes([len(kid_b)]) + kid_b + nonce + ct
135
+
136
+ def decode_body(self, body: bytes, aad: bytes) -> str:
137
+ klen = body[0]
138
+ kid_b = body[1 : 1 + klen]
139
+ kid = kid_b.decode("utf-8")
140
+ rest = body[1 + klen :]
141
+ nonce, ct = rest[:_NONCE_LEN], rest[_NONCE_LEN:]
142
+ # ``kid`` is bound into the AAD so the key id recorded in the blob is
143
+ # authenticated: an at-rest attacker who rewrites it to point at a
144
+ # different (weaker/known) key makes the row fail to decrypt rather than
145
+ # silently re-key it.
146
+ return self._aes(kid).decrypt(nonce, ct, _bind_kid(aad, kid_b)).decode("utf-8")
147
+
148
+
149
+ def _bind_kid(aad: bytes, kid_b: bytes) -> bytes:
150
+ """The effective GCM AAD for a v2 blob: the row's index columns plus a
151
+ length-framed key id, so the embedded ``kid`` is authenticated alongside the
152
+ plaintext columns. Length-framing keeps ``aad``/``kid`` unambiguous."""
153
+ return aad + bytes([len(kid_b)]) + kid_b
154
+
155
+
156
+ def _decode_key(raw: str) -> bytes:
157
+ import base64
158
+ import binascii
159
+
160
+ raw = raw.strip()
161
+ # try hex first, then base64
162
+ try:
163
+ if len(raw) == _KEY_LEN * 2:
164
+ return bytes.fromhex(raw)
165
+ except ValueError:
166
+ pass
167
+ try:
168
+ return base64.b64decode(raw, validate=True)
169
+ except (binascii.Error, ValueError) as exc:
170
+ raise ValueError("ZU_EVENT_KEY must be a 32-byte key as hex or base64") from exc