zu-core 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zu_core/__init__.py +142 -0
- zu_core/bus.py +129 -0
- zu_core/codec.py +98 -0
- zu_core/content.py +138 -0
- zu_core/contracts.py +83 -0
- zu_core/cost.py +140 -0
- zu_core/events.py +104 -0
- zu_core/eventstore.py +47 -0
- zu_core/loop.py +1089 -0
- zu_core/pipeline.py +140 -0
- zu_core/ports.py +394 -0
- zu_core/projections.py +103 -0
- zu_core/registry.py +169 -0
- zu_core/security.py +77 -0
- zu_core/sinks.py +77 -0
- zu_core/track.py +168 -0
- zu_core/view.py +110 -0
- zu_core-0.2.0.dist-info/METADATA +51 -0
- zu_core-0.2.0.dist-info/RECORD +20 -0
- zu_core-0.2.0.dist-info/WHEEL +4 -0
zu_core/__init__.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Zu core — the small, stable runtime: contracts, ports, registry, loop, bus.
|
|
2
|
+
|
|
3
|
+
Depends only on the standard library and Pydantic. It contains no model SDK,
|
|
4
|
+
no domain branching, and no knowledge of any specific tool or provider.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from . import events
|
|
10
|
+
from .bus import EventBus, SubscriberFailure
|
|
11
|
+
from .codec import IdentityCodec, KeyProvider, PayloadCodec, decode_payload, encode_payload
|
|
12
|
+
from .content import Action, Audio, Content, ContentPart, Image, Observation, Text
|
|
13
|
+
from .contracts import Budget, Event, Result, Status, TaskSpec
|
|
14
|
+
from .eventstore import ALLOWED_EVENT_FILTERS, event_matches, validate_filter
|
|
15
|
+
from .ports import (
|
|
16
|
+
CAP_FS_READ,
|
|
17
|
+
CAP_FS_WRITE,
|
|
18
|
+
CAP_NET,
|
|
19
|
+
CAP_SANDBOX,
|
|
20
|
+
CAP_SUBPROCESS,
|
|
21
|
+
EGRESS_OPEN,
|
|
22
|
+
INTERFACE_ATTR,
|
|
23
|
+
INTERFACE_VERSION,
|
|
24
|
+
Capabilities,
|
|
25
|
+
Detector,
|
|
26
|
+
EventSink,
|
|
27
|
+
Finish,
|
|
28
|
+
ModelProvider,
|
|
29
|
+
ModelRequest,
|
|
30
|
+
ModelResponse,
|
|
31
|
+
Policy,
|
|
32
|
+
RunContext,
|
|
33
|
+
SandboxBackend,
|
|
34
|
+
Scope,
|
|
35
|
+
Severity,
|
|
36
|
+
Tool,
|
|
37
|
+
ToolCall,
|
|
38
|
+
ToolSpec,
|
|
39
|
+
Trigger,
|
|
40
|
+
TriggerEvent,
|
|
41
|
+
Validator,
|
|
42
|
+
Verdict,
|
|
43
|
+
declared_envelope,
|
|
44
|
+
)
|
|
45
|
+
from .projections import SessionState, SessionStore
|
|
46
|
+
from .registry import (
|
|
47
|
+
REGISTRY,
|
|
48
|
+
IncompatibleInterfaceError,
|
|
49
|
+
LoadFailure,
|
|
50
|
+
Registry,
|
|
51
|
+
backend,
|
|
52
|
+
check_interface,
|
|
53
|
+
detector,
|
|
54
|
+
policy,
|
|
55
|
+
provider,
|
|
56
|
+
sink,
|
|
57
|
+
tool,
|
|
58
|
+
trigger,
|
|
59
|
+
validator,
|
|
60
|
+
)
|
|
61
|
+
from .security import SecurityBlock
|
|
62
|
+
from .sinks import MemoryEventSink
|
|
63
|
+
from .view import RENDER_KEYS, scope_event, scope_payload
|
|
64
|
+
|
|
65
|
+
__all__ = [
|
|
66
|
+
# contracts
|
|
67
|
+
"Budget",
|
|
68
|
+
"Event",
|
|
69
|
+
"Result",
|
|
70
|
+
"Status",
|
|
71
|
+
"TaskSpec",
|
|
72
|
+
# multimodal content (the policy currency)
|
|
73
|
+
"Content",
|
|
74
|
+
"ContentPart",
|
|
75
|
+
"Text",
|
|
76
|
+
"Image",
|
|
77
|
+
"Audio",
|
|
78
|
+
"Observation",
|
|
79
|
+
"Action",
|
|
80
|
+
# event bus + taxonomy + projections + sinks + codec
|
|
81
|
+
"EventBus",
|
|
82
|
+
"SubscriberFailure",
|
|
83
|
+
"SessionStore",
|
|
84
|
+
"SessionState",
|
|
85
|
+
"MemoryEventSink",
|
|
86
|
+
"events",
|
|
87
|
+
"ALLOWED_EVENT_FILTERS",
|
|
88
|
+
"event_matches",
|
|
89
|
+
"validate_filter",
|
|
90
|
+
"IdentityCodec",
|
|
91
|
+
"PayloadCodec",
|
|
92
|
+
"KeyProvider",
|
|
93
|
+
"encode_payload",
|
|
94
|
+
"decode_payload",
|
|
95
|
+
"SecurityBlock",
|
|
96
|
+
"scope_event",
|
|
97
|
+
"scope_payload",
|
|
98
|
+
"RENDER_KEYS",
|
|
99
|
+
# ports
|
|
100
|
+
"CAP_NET",
|
|
101
|
+
"CAP_SANDBOX",
|
|
102
|
+
"CAP_FS_READ",
|
|
103
|
+
"CAP_FS_WRITE",
|
|
104
|
+
"CAP_SUBPROCESS",
|
|
105
|
+
"EGRESS_OPEN",
|
|
106
|
+
"INTERFACE_VERSION",
|
|
107
|
+
"INTERFACE_ATTR",
|
|
108
|
+
"declared_envelope",
|
|
109
|
+
"Capabilities",
|
|
110
|
+
"Detector",
|
|
111
|
+
"EventSink",
|
|
112
|
+
"Finish",
|
|
113
|
+
"ModelProvider",
|
|
114
|
+
"ModelRequest",
|
|
115
|
+
"ModelResponse",
|
|
116
|
+
"Policy",
|
|
117
|
+
"RunContext",
|
|
118
|
+
"SandboxBackend",
|
|
119
|
+
"Scope",
|
|
120
|
+
"Severity",
|
|
121
|
+
"Tool",
|
|
122
|
+
"ToolCall",
|
|
123
|
+
"ToolSpec",
|
|
124
|
+
"Trigger",
|
|
125
|
+
"TriggerEvent",
|
|
126
|
+
"Validator",
|
|
127
|
+
"Verdict",
|
|
128
|
+
# registry
|
|
129
|
+
"REGISTRY",
|
|
130
|
+
"LoadFailure",
|
|
131
|
+
"IncompatibleInterfaceError",
|
|
132
|
+
"check_interface",
|
|
133
|
+
"Registry",
|
|
134
|
+
"backend",
|
|
135
|
+
"detector",
|
|
136
|
+
"policy",
|
|
137
|
+
"provider",
|
|
138
|
+
"sink",
|
|
139
|
+
"tool",
|
|
140
|
+
"trigger",
|
|
141
|
+
"validator",
|
|
142
|
+
]
|
zu_core/bus.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""The event bus — one source of truth, projected to destinations (step 3).
|
|
2
|
+
|
|
3
|
+
There is exactly **one canonical event store** (an ``EventSink``) — the single
|
|
4
|
+
source of truth for a run. The bus, on every publish:
|
|
5
|
+
|
|
6
|
+
1. **appends to the canonical store first** — durability before any side
|
|
7
|
+
effect. If that write fails, the failure propagates: you cannot have a run
|
|
8
|
+
whose source of truth is missing a record.
|
|
9
|
+
2. **then fans out to destinations** — projections (derived read models like
|
|
10
|
+
the session store) and secondary sinks (a shipper to OTel, a central log).
|
|
11
|
+
Each destination is isolated: one that raises does not stop the others,
|
|
12
|
+
and its failure is recorded (bounded) rather than disappearing.
|
|
13
|
+
|
|
14
|
+
The canonical store defaults to an in-memory sink and is swapped for a durable
|
|
15
|
+
one (SQLite, Postgres, the hosted central log) by configuration — same port,
|
|
16
|
+
same semantics. Reads (`query`/`stream`/`count`) delegate to the canonical
|
|
17
|
+
store, so there is never a second, divergent copy of the log in the bus.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import inspect
|
|
23
|
+
import logging
|
|
24
|
+
from collections import deque
|
|
25
|
+
from collections.abc import AsyncIterator, Awaitable, Callable
|
|
26
|
+
from typing import NamedTuple
|
|
27
|
+
|
|
28
|
+
from .contracts import Event
|
|
29
|
+
from .ports import EventSink
|
|
30
|
+
from .sinks import MemoryEventSink
|
|
31
|
+
|
|
32
|
+
log = logging.getLogger("zu.bus")
|
|
33
|
+
|
|
34
|
+
Subscriber = Callable[[Event], "Awaitable[None] | None"]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class SubscriberFailure(NamedTuple):
|
|
38
|
+
"""A destination that raised while handling an event — recorded, not lost."""
|
|
39
|
+
|
|
40
|
+
subscriber: Subscriber
|
|
41
|
+
event: Event
|
|
42
|
+
error: Exception
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class EventBus:
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
sink: EventSink | None = None,
|
|
49
|
+
*,
|
|
50
|
+
max_recorded_failures: int = 1000,
|
|
51
|
+
) -> None:
|
|
52
|
+
# The single source of truth. Defaults to in-memory; configure a durable
|
|
53
|
+
# sink for production. Never accompanied by a second in-bus copy.
|
|
54
|
+
self.sink: EventSink = sink if sink is not None else MemoryEventSink()
|
|
55
|
+
self._subscribers: list[Subscriber] = []
|
|
56
|
+
# Secondary sinks attached via ``add_destination`` — tracked so ``aclose``
|
|
57
|
+
# can release their resources (e.g. a sqlite connection) too.
|
|
58
|
+
self._destinations: list[EventSink] = []
|
|
59
|
+
# Bounded so a long-lived bus can't leak memory via recorded failures.
|
|
60
|
+
self.subscriber_failures: deque[SubscriberFailure] = deque(
|
|
61
|
+
maxlen=max_recorded_failures
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
def subscribe(self, fn: Subscriber) -> None:
|
|
65
|
+
"""Register a destination: a projection or any per-event handler."""
|
|
66
|
+
self._subscribers.append(fn)
|
|
67
|
+
|
|
68
|
+
def add_destination(self, sink: EventSink) -> None:
|
|
69
|
+
"""Project the stream to a secondary sink (e.g. a shipper), isolated.
|
|
70
|
+
|
|
71
|
+
The secondary sink is a destination, not the source of truth: its
|
|
72
|
+
failures are isolated like any other subscriber's, never propagated.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
async def _ship(event: Event) -> None:
|
|
76
|
+
await sink.append(event)
|
|
77
|
+
|
|
78
|
+
self._destinations.append(sink)
|
|
79
|
+
self.subscribe(_ship)
|
|
80
|
+
|
|
81
|
+
async def aclose(self) -> None:
|
|
82
|
+
"""Release the canonical store and every secondary destination that holds
|
|
83
|
+
a resource (e.g. a sqlite connection). ``close`` is an optional capability
|
|
84
|
+
on a sink — a sink without one (the in-memory default, the per-append
|
|
85
|
+
jsonl sink) is simply skipped. Each close is isolated so one failure does
|
|
86
|
+
not strand the others. Idempotent: safe to call more than once.
|
|
87
|
+
|
|
88
|
+
The embed facade assembles a fresh bus per run, so calling this in a
|
|
89
|
+
``finally`` is what keeps a long-lived ``Zu`` instance from leaking one
|
|
90
|
+
connection per ``run()``."""
|
|
91
|
+
for sink in [self.sink, *self._destinations]:
|
|
92
|
+
closer = getattr(sink, "close", None)
|
|
93
|
+
if closer is None:
|
|
94
|
+
continue
|
|
95
|
+
try:
|
|
96
|
+
result = closer()
|
|
97
|
+
if inspect.isawaitable(result):
|
|
98
|
+
await result
|
|
99
|
+
except Exception as exc: # noqa: BLE001 - one close failure must not strand the rest
|
|
100
|
+
log.warning("sink %r failed to close: %s", sink, exc)
|
|
101
|
+
|
|
102
|
+
async def publish(self, event: Event) -> None:
|
|
103
|
+
# 1. canonical store first; a failure here propagates (source of truth).
|
|
104
|
+
await self.sink.append(event)
|
|
105
|
+
|
|
106
|
+
# 2. fan out to destinations, isolating any crash.
|
|
107
|
+
for fn in self._subscribers:
|
|
108
|
+
try:
|
|
109
|
+
result = fn(event)
|
|
110
|
+
if inspect.isawaitable(result):
|
|
111
|
+
await result
|
|
112
|
+
except Exception as exc: # noqa: BLE001 - one crash must not stop the rest
|
|
113
|
+
self.subscriber_failures.append(SubscriberFailure(fn, event, exc))
|
|
114
|
+
log.warning("destination %r failed on %s: %s", fn, event.type, exc)
|
|
115
|
+
|
|
116
|
+
# --- reads delegate to the single source of truth ---------------------
|
|
117
|
+
|
|
118
|
+
async def query(
|
|
119
|
+
self, flt: dict | None = None, *, limit: int | None = None, after_seq: int = 0
|
|
120
|
+
) -> list[Event]:
|
|
121
|
+
return await self.sink.query(flt, limit=limit, after_seq=after_seq)
|
|
122
|
+
|
|
123
|
+
def stream(
|
|
124
|
+
self, flt: dict | None = None, *, batch_size: int = 500
|
|
125
|
+
) -> AsyncIterator[Event]:
|
|
126
|
+
return self.sink.stream(flt, batch_size=batch_size)
|
|
127
|
+
|
|
128
|
+
async def count(self, flt: dict | None = None) -> int:
|
|
129
|
+
return await self.sink.count(flt)
|
zu_core/codec.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Payload codec seam — the encryption-at-rest boundary for durable sinks.
|
|
2
|
+
|
|
3
|
+
Encryption-at-rest is deferred as a *cipher* but not as a *format*: an
|
|
4
|
+
append-only log is the worst place to retrofit encryption (you accumulate
|
|
5
|
+
immutable plaintext), so the on-disk envelope is fixed now and the cipher is
|
|
6
|
+
swappable later. Every stored payload blob begins with a one-byte **version
|
|
7
|
+
tag** identifying the codec that wrote it, so a log can hold rows written by
|
|
8
|
+
different codecs (e.g. plaintext rows from before encryption was enabled) and
|
|
9
|
+
still be read back — the durable sink decodes each row by its own tag.
|
|
10
|
+
|
|
11
|
+
Default is `IdentityCodec` (plaintext, zero dependencies). A real AES-256-GCM
|
|
12
|
+
codec ships behind zu-backends' optional ``[encryption]`` extra. The AES codec
|
|
13
|
+
binds the row's indexed columns as associated data (AAD), so a ciphertext can't
|
|
14
|
+
be moved to — or have its index columns edited on — a different row. The default
|
|
15
|
+
`IdentityCodec` is plaintext and provides *no* integrity: it accepts the ``aad``
|
|
16
|
+
argument for interface parity but cannot bind it (there is no authentication tag
|
|
17
|
+
over plaintext), so the move-resistance guarantee applies only once a cipher is
|
|
18
|
+
configured. Managed keys (KMS / envelope encryption / rotation) are a future
|
|
19
|
+
stage; the codec asks for a key, so swapping an env-var key for a KMS provider
|
|
20
|
+
later is a contained change with no on-disk format impact.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
from collections.abc import Mapping
|
|
26
|
+
from typing import Protocol, runtime_checkable
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@runtime_checkable
|
|
30
|
+
class PayloadCodec(Protocol):
|
|
31
|
+
version: int # 0-255; the tag byte written as the first byte of each blob
|
|
32
|
+
|
|
33
|
+
def encode_body(self, plaintext: str, aad: bytes) -> bytes: ...
|
|
34
|
+
|
|
35
|
+
def decode_body(self, body: bytes, aad: bytes) -> str: ...
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@runtime_checkable
|
|
39
|
+
class KeyProvider(Protocol):
|
|
40
|
+
"""Supplies symmetric data keys *by id*, so a codec can rotate keys and a
|
|
41
|
+
deployment can source them from the KMS/secret store of its choice (AWS KMS,
|
|
42
|
+
GCP KMS, Vault, an HSM, …) — the choice belongs to whoever runs Zu, never
|
|
43
|
+
baked in here. The codec never holds a long-lived master key: it asks the
|
|
44
|
+
provider for the *current* key id when writing, and for a specific key id
|
|
45
|
+
(read back off the stored blob) when decrypting an older row.
|
|
46
|
+
|
|
47
|
+
Key rotation is the answer to AES-GCM's nonce-scaling bound too: a fresh
|
|
48
|
+
random 96-bit nonce is safe to ~2^32 events under one key, so rotating the
|
|
49
|
+
data key (a new ``current_key_id``) resets that budget while old rows keep
|
|
50
|
+
decrypting under their own key id. Implement this against a KMS to get
|
|
51
|
+
managed keys with no on-disk format change."""
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def current_key_id(self) -> str: ...
|
|
55
|
+
|
|
56
|
+
def key(self, key_id: str) -> bytes: ...
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class IdentityCodec:
|
|
60
|
+
"""Plaintext. The default: no dependencies, fully queryable on disk.
|
|
61
|
+
|
|
62
|
+
``aad`` is accepted for interface parity with authenticated codecs but is
|
|
63
|
+
intentionally unused: plaintext carries no authentication tag, so there is
|
|
64
|
+
nothing to bind it to. The AAD row-binding guarantee is a property of the
|
|
65
|
+
AES codec only — see the module docstring.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
version = 0
|
|
69
|
+
|
|
70
|
+
def encode_body(self, plaintext: str, aad: bytes) -> bytes:
|
|
71
|
+
return plaintext.encode("utf-8")
|
|
72
|
+
|
|
73
|
+
def decode_body(self, body: bytes, aad: bytes) -> str:
|
|
74
|
+
return body.decode("utf-8")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def encode_payload(codec: PayloadCodec, plaintext: str, aad: bytes = b"") -> bytes:
|
|
78
|
+
"""Tag-then-body: [version byte][codec-specific body]."""
|
|
79
|
+
if not 0 <= codec.version <= 255:
|
|
80
|
+
raise ValueError(f"codec.version must be a byte (0-255), got {codec.version}")
|
|
81
|
+
return bytes([codec.version]) + codec.encode_body(plaintext, aad)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def decode_payload(
|
|
85
|
+
blob: bytes, aad: bytes, registry: Mapping[int, PayloadCodec]
|
|
86
|
+
) -> str:
|
|
87
|
+
"""Dispatch on the leading version byte so mixed-codec logs read back."""
|
|
88
|
+
if not blob:
|
|
89
|
+
raise ValueError("empty payload blob")
|
|
90
|
+
version = blob[0]
|
|
91
|
+
codec = registry.get(version)
|
|
92
|
+
if codec is None:
|
|
93
|
+
raise ValueError(
|
|
94
|
+
f"no codec registered for payload version {version}; "
|
|
95
|
+
"cannot decode (was this row written with an encryption codec "
|
|
96
|
+
"that is not installed/configured?)"
|
|
97
|
+
)
|
|
98
|
+
return codec.decode_body(blob[1:], aad)
|
zu_core/content.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""Typed multimodal content — the modality-agnostic currency of the loop.
|
|
2
|
+
|
|
3
|
+
The policy port (today an LLM, tomorrow a world model or an embodied
|
|
4
|
+
controller) consumes an :class:`Observation` and emits an :class:`Action`. For
|
|
5
|
+
that single seam to serve every modality, the *observation* must carry typed
|
|
6
|
+
content — text, image, audio, sensor — rather than a bare string, and the
|
|
7
|
+
*action* must be typed rather than a guessed-at dict. These models are that
|
|
8
|
+
currency (Engineering Design §8.2, §9).
|
|
9
|
+
|
|
10
|
+
Design notes that are load-bearing:
|
|
11
|
+
|
|
12
|
+
* **Frozen value objects.** A piece of content is a fact about what was
|
|
13
|
+
observed; it is never mutated in place. Like :class:`Event`, the envelope is
|
|
14
|
+
frozen.
|
|
15
|
+
* **Discriminated union.** ``Observation.content`` is a list of a closed set of
|
|
16
|
+
parts, tagged by ``kind`` so Pydantic can round-trip it from JSON on the event
|
|
17
|
+
log without ambiguity. New modalities are added here (a new ``Content``
|
|
18
|
+
subclass + a new ``kind``), never by smuggling an untyped blob through.
|
|
19
|
+
* **Binary is base64 on the wire.** :class:`Image`/:class:`Audio` carry raw
|
|
20
|
+
``bytes`` in memory but serialise as base64 in JSON mode, so an observation is
|
|
21
|
+
safe to journal or hand to the codec without a decode error. Media payloads
|
|
22
|
+
are large; what lands on the event log is the caller's choice (a reference or
|
|
23
|
+
a scoped copy), but the contract itself never crashes a ``model_dump``.
|
|
24
|
+
* **Additive, not a rewrite.** Tools still return plain dicts and the
|
|
25
|
+
interpreter loop still speaks ``ModelRequest``/``ModelResponse``. These types
|
|
26
|
+
are the seam the perception-reduction tools (the Action Surface), the
|
|
27
|
+
HuggingFace task-model adapter, and the generalised Policy port build on; they
|
|
28
|
+
do not disturb the existing contracts.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from __future__ import annotations
|
|
32
|
+
|
|
33
|
+
from typing import Annotated, Literal
|
|
34
|
+
|
|
35
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class Content(BaseModel):
|
|
39
|
+
"""Frozen base for one piece of observed content.
|
|
40
|
+
|
|
41
|
+
A concrete part declares a ``kind`` discriminator so a heterogeneous
|
|
42
|
+
``list[Content]`` round-trips from JSON unambiguously.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
model_config = ConfigDict(frozen=True)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class Text(Content):
|
|
49
|
+
kind: Literal["text"] = "text"
|
|
50
|
+
text: str
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class Image(Content):
|
|
54
|
+
# base64 in/out in JSON mode so a binary payload never breaks model_dump(mode="json").
|
|
55
|
+
model_config = ConfigDict(frozen=True, ser_json_bytes="base64", val_json_bytes="base64")
|
|
56
|
+
|
|
57
|
+
kind: Literal["image"] = "image"
|
|
58
|
+
data: bytes
|
|
59
|
+
mime: str = "image/png"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class Audio(Content):
|
|
63
|
+
model_config = ConfigDict(frozen=True, ser_json_bytes="base64", val_json_bytes="base64")
|
|
64
|
+
|
|
65
|
+
kind: Literal["audio"] = "audio"
|
|
66
|
+
data: bytes
|
|
67
|
+
mime: str = "audio/wav"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
# The closed set of content parts, tagged by ``kind``. Extend it by adding a
|
|
71
|
+
# ``Content`` subclass with a new ``kind`` literal and listing it here — the one
|
|
72
|
+
# place modality support is declared.
|
|
73
|
+
ContentPart = Annotated[Text | Image | Audio, Field(discriminator="kind")]
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class Observation(BaseModel):
|
|
77
|
+
"""The typed input side of the policy — heavy perceptual input, one shape.
|
|
78
|
+
|
|
79
|
+
The :class:`Observation` is what a perception-reduction step (the Action
|
|
80
|
+
Surface, a UI-element detector, a lidar reducer) fills compactly, and what
|
|
81
|
+
the policy reads to choose its next :class:`Action`.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
model_config = ConfigDict(frozen=True)
|
|
85
|
+
|
|
86
|
+
content: list[ContentPart] = Field(default_factory=list)
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def from_text(cls, text: str) -> Observation:
|
|
90
|
+
"""Build a text-only observation — the common case and the bridge from
|
|
91
|
+
the loop's existing string/dict observations."""
|
|
92
|
+
return cls(content=[Text(text=text)])
|
|
93
|
+
|
|
94
|
+
def text(self) -> str:
|
|
95
|
+
"""The concatenated text of every :class:`Text` part (newline-joined).
|
|
96
|
+
|
|
97
|
+
How a text policy, a grounding validator, or a text-classifier detector
|
|
98
|
+
reads an observation without caring which other modalities ride along.
|
|
99
|
+
"""
|
|
100
|
+
return "\n".join(p.text for p in self.content if isinstance(p, Text))
|
|
101
|
+
|
|
102
|
+
def parts(self, kind: str) -> list[ContentPart]:
|
|
103
|
+
"""Every part of a given ``kind`` (``"text"`` | ``"image"`` | ``"audio"``)."""
|
|
104
|
+
return [p for p in self.content if p.kind == kind]
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class Action(BaseModel):
|
|
108
|
+
"""The typed output side of the policy.
|
|
109
|
+
|
|
110
|
+
An LLM policy returns a ``tool_call`` (or final ``text``); a world-model or
|
|
111
|
+
embodied controller returns a ``command`` carrying a control action. The
|
|
112
|
+
harness, bus, detectors, validation, and envelope are unchanged across all
|
|
113
|
+
three — which is the whole point of typing the action rather than the policy
|
|
114
|
+
(Engineering Design §9.2).
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
model_config = ConfigDict(frozen=True)
|
|
118
|
+
|
|
119
|
+
kind: Literal["text", "tool_call", "command"]
|
|
120
|
+
payload: dict = Field(default_factory=dict)
|
|
121
|
+
|
|
122
|
+
@classmethod
|
|
123
|
+
def text(cls, text: str) -> Action:
|
|
124
|
+
"""A final-answer action."""
|
|
125
|
+
return cls(kind="text", payload={"text": text})
|
|
126
|
+
|
|
127
|
+
@classmethod
|
|
128
|
+
def tool_call(cls, name: str, args: dict | None = None) -> Action:
|
|
129
|
+
"""A request to invoke a tool by name — the LLM-policy shape. The
|
|
130
|
+
payload mirrors :class:`zu_core.ports.ToolCall` (``name`` + ``args``) so
|
|
131
|
+
a Policy adapter can bridge the two without a lossy translation."""
|
|
132
|
+
return cls(kind="tool_call", payload={"name": name, "args": args or {}})
|
|
133
|
+
|
|
134
|
+
@classmethod
|
|
135
|
+
def command(cls, **payload: object) -> Action:
|
|
136
|
+
"""A low-level control action — the world-model / embodied-controller
|
|
137
|
+
shape (e.g. ``Action.command(actuator="gait", vector=[...])``)."""
|
|
138
|
+
return cls(kind="command", payload=dict(payload))
|
zu_core/contracts.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""The typed boundaries everything in Zu speaks through.
|
|
2
|
+
|
|
3
|
+
These three frozen/validated Pydantic models — TaskSpec, Result, Event — are
|
|
4
|
+
the gates every part of the runtime passes through. They are deliberately
|
|
5
|
+
strict: a malformed task or a mis-namespaced event must be refused at the
|
|
6
|
+
boundary, not swallowed.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from datetime import UTC, datetime
|
|
12
|
+
from enum import Enum
|
|
13
|
+
from uuid import UUID, uuid4
|
|
14
|
+
|
|
15
|
+
from pydantic import BaseModel, Field, field_validator
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Status(str, Enum):
|
|
19
|
+
SUCCESS = "success"
|
|
20
|
+
ESCALATE = "escalate"
|
|
21
|
+
TERMINAL = "terminal"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Budget(BaseModel):
|
|
25
|
+
max_steps: int = 20
|
|
26
|
+
max_tokens: int = 200_000
|
|
27
|
+
wall_time_s: int = 120
|
|
28
|
+
max_tool_calls: int = 32 # per single model response — caps a runaway turn
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class TaskSpec(BaseModel):
|
|
32
|
+
"""The typed input to a run."""
|
|
33
|
+
|
|
34
|
+
task_id: UUID = Field(default_factory=uuid4)
|
|
35
|
+
query: str
|
|
36
|
+
target: str | None = None
|
|
37
|
+
output_schema: dict = Field(default_factory=dict) # JSON schema the result must satisfy
|
|
38
|
+
budget: Budget = Field(default_factory=Budget)
|
|
39
|
+
max_tier: int = 2
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class Result(BaseModel):
|
|
43
|
+
"""The typed output of a run."""
|
|
44
|
+
|
|
45
|
+
status: Status
|
|
46
|
+
value: dict | None = None
|
|
47
|
+
reason: str | None = None # detector name, on escalate/terminal
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class Event(BaseModel):
|
|
51
|
+
"""The append-only record envelope.
|
|
52
|
+
|
|
53
|
+
Frozen at the envelope level: no field may be *reassigned* once an event is
|
|
54
|
+
built. The durable record is immutable in the strongest sense — a sink
|
|
55
|
+
serialises the event to JSON at ``append`` time, so what lands in the
|
|
56
|
+
canonical store can never change afterward.
|
|
57
|
+
|
|
58
|
+
One boundary to know: ``frozen`` does not deep-freeze the ``payload`` dict's
|
|
59
|
+
*contents* (``event.payload[k] = ...`` is not blocked). Deep-freezing every
|
|
60
|
+
payload was rejected deliberately — payloads carry large fetched HTML on the
|
|
61
|
+
hot path and copying/freezing them per event is too costly. The invariant is
|
|
62
|
+
therefore: **treat a published event's payload as read-only.** Do not mutate
|
|
63
|
+
it in place; the canonical on-disk copy is already immutable regardless.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
model_config = {"frozen": True}
|
|
67
|
+
|
|
68
|
+
event_id: UUID = Field(default_factory=uuid4)
|
|
69
|
+
trace_id: UUID
|
|
70
|
+
task_id: UUID
|
|
71
|
+
parent_id: UUID | None = None
|
|
72
|
+
type: str
|
|
73
|
+
ts: datetime = Field(default_factory=lambda: datetime.now(UTC))
|
|
74
|
+
source: str
|
|
75
|
+
payload: dict = Field(default_factory=dict)
|
|
76
|
+
schema_version: int = 1
|
|
77
|
+
|
|
78
|
+
@field_validator("type")
|
|
79
|
+
@classmethod
|
|
80
|
+
def _namespace(cls, v: str) -> str:
|
|
81
|
+
if not (v.startswith("harness.") or v.startswith("data.")):
|
|
82
|
+
raise ValueError("event type must start with 'harness.' or 'data.'")
|
|
83
|
+
return v
|