zu-core 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zu_core/__init__.py ADDED
@@ -0,0 +1,142 @@
1
+ """Zu core — the small, stable runtime: contracts, ports, registry, loop, bus.
2
+
3
+ Depends only on the standard library and Pydantic. It contains no model SDK,
4
+ no domain branching, and no knowledge of any specific tool or provider.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from . import events
10
+ from .bus import EventBus, SubscriberFailure
11
+ from .codec import IdentityCodec, KeyProvider, PayloadCodec, decode_payload, encode_payload
12
+ from .content import Action, Audio, Content, ContentPart, Image, Observation, Text
13
+ from .contracts import Budget, Event, Result, Status, TaskSpec
14
+ from .eventstore import ALLOWED_EVENT_FILTERS, event_matches, validate_filter
15
+ from .ports import (
16
+ CAP_FS_READ,
17
+ CAP_FS_WRITE,
18
+ CAP_NET,
19
+ CAP_SANDBOX,
20
+ CAP_SUBPROCESS,
21
+ EGRESS_OPEN,
22
+ INTERFACE_ATTR,
23
+ INTERFACE_VERSION,
24
+ Capabilities,
25
+ Detector,
26
+ EventSink,
27
+ Finish,
28
+ ModelProvider,
29
+ ModelRequest,
30
+ ModelResponse,
31
+ Policy,
32
+ RunContext,
33
+ SandboxBackend,
34
+ Scope,
35
+ Severity,
36
+ Tool,
37
+ ToolCall,
38
+ ToolSpec,
39
+ Trigger,
40
+ TriggerEvent,
41
+ Validator,
42
+ Verdict,
43
+ declared_envelope,
44
+ )
45
+ from .projections import SessionState, SessionStore
46
+ from .registry import (
47
+ REGISTRY,
48
+ IncompatibleInterfaceError,
49
+ LoadFailure,
50
+ Registry,
51
+ backend,
52
+ check_interface,
53
+ detector,
54
+ policy,
55
+ provider,
56
+ sink,
57
+ tool,
58
+ trigger,
59
+ validator,
60
+ )
61
+ from .security import SecurityBlock
62
+ from .sinks import MemoryEventSink
63
+ from .view import RENDER_KEYS, scope_event, scope_payload
64
+
65
+ __all__ = [
66
+ # contracts
67
+ "Budget",
68
+ "Event",
69
+ "Result",
70
+ "Status",
71
+ "TaskSpec",
72
+ # multimodal content (the policy currency)
73
+ "Content",
74
+ "ContentPart",
75
+ "Text",
76
+ "Image",
77
+ "Audio",
78
+ "Observation",
79
+ "Action",
80
+ # event bus + taxonomy + projections + sinks + codec
81
+ "EventBus",
82
+ "SubscriberFailure",
83
+ "SessionStore",
84
+ "SessionState",
85
+ "MemoryEventSink",
86
+ "events",
87
+ "ALLOWED_EVENT_FILTERS",
88
+ "event_matches",
89
+ "validate_filter",
90
+ "IdentityCodec",
91
+ "PayloadCodec",
92
+ "KeyProvider",
93
+ "encode_payload",
94
+ "decode_payload",
95
+ "SecurityBlock",
96
+ "scope_event",
97
+ "scope_payload",
98
+ "RENDER_KEYS",
99
+ # ports
100
+ "CAP_NET",
101
+ "CAP_SANDBOX",
102
+ "CAP_FS_READ",
103
+ "CAP_FS_WRITE",
104
+ "CAP_SUBPROCESS",
105
+ "EGRESS_OPEN",
106
+ "INTERFACE_VERSION",
107
+ "INTERFACE_ATTR",
108
+ "declared_envelope",
109
+ "Capabilities",
110
+ "Detector",
111
+ "EventSink",
112
+ "Finish",
113
+ "ModelProvider",
114
+ "ModelRequest",
115
+ "ModelResponse",
116
+ "Policy",
117
+ "RunContext",
118
+ "SandboxBackend",
119
+ "Scope",
120
+ "Severity",
121
+ "Tool",
122
+ "ToolCall",
123
+ "ToolSpec",
124
+ "Trigger",
125
+ "TriggerEvent",
126
+ "Validator",
127
+ "Verdict",
128
+ # registry
129
+ "REGISTRY",
130
+ "LoadFailure",
131
+ "IncompatibleInterfaceError",
132
+ "check_interface",
133
+ "Registry",
134
+ "backend",
135
+ "detector",
136
+ "policy",
137
+ "provider",
138
+ "sink",
139
+ "tool",
140
+ "trigger",
141
+ "validator",
142
+ ]
zu_core/bus.py ADDED
@@ -0,0 +1,129 @@
1
+ """The event bus — one source of truth, projected to destinations (step 3).
2
+
3
+ There is exactly **one canonical event store** (an ``EventSink``) — the single
4
+ source of truth for a run. The bus, on every publish:
5
+
6
+ 1. **appends to the canonical store first** — durability before any side
7
+ effect. If that write fails, the failure propagates: you cannot have a run
8
+ whose source of truth is missing a record.
9
+ 2. **then fans out to destinations** — projections (derived read models like
10
+ the session store) and secondary sinks (a shipper to OTel, a central log).
11
+ Each destination is isolated: one that raises does not stop the others,
12
+ and its failure is recorded (bounded) rather than disappearing.
13
+
14
+ The canonical store defaults to an in-memory sink and is swapped for a durable
15
+ one (SQLite, Postgres, the hosted central log) by configuration — same port,
16
+ same semantics. Reads (`query`/`stream`/`count`) delegate to the canonical
17
+ store, so there is never a second, divergent copy of the log in the bus.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import inspect
23
+ import logging
24
+ from collections import deque
25
+ from collections.abc import AsyncIterator, Awaitable, Callable
26
+ from typing import NamedTuple
27
+
28
+ from .contracts import Event
29
+ from .ports import EventSink
30
+ from .sinks import MemoryEventSink
31
+
32
+ log = logging.getLogger("zu.bus")
33
+
34
+ Subscriber = Callable[[Event], "Awaitable[None] | None"]
35
+
36
+
37
+ class SubscriberFailure(NamedTuple):
38
+ """A destination that raised while handling an event — recorded, not lost."""
39
+
40
+ subscriber: Subscriber
41
+ event: Event
42
+ error: Exception
43
+
44
+
45
+ class EventBus:
46
+ def __init__(
47
+ self,
48
+ sink: EventSink | None = None,
49
+ *,
50
+ max_recorded_failures: int = 1000,
51
+ ) -> None:
52
+ # The single source of truth. Defaults to in-memory; configure a durable
53
+ # sink for production. Never accompanied by a second in-bus copy.
54
+ self.sink: EventSink = sink if sink is not None else MemoryEventSink()
55
+ self._subscribers: list[Subscriber] = []
56
+ # Secondary sinks attached via ``add_destination`` — tracked so ``aclose``
57
+ # can release their resources (e.g. a sqlite connection) too.
58
+ self._destinations: list[EventSink] = []
59
+ # Bounded so a long-lived bus can't leak memory via recorded failures.
60
+ self.subscriber_failures: deque[SubscriberFailure] = deque(
61
+ maxlen=max_recorded_failures
62
+ )
63
+
64
+ def subscribe(self, fn: Subscriber) -> None:
65
+ """Register a destination: a projection or any per-event handler."""
66
+ self._subscribers.append(fn)
67
+
68
+ def add_destination(self, sink: EventSink) -> None:
69
+ """Project the stream to a secondary sink (e.g. a shipper), isolated.
70
+
71
+ The secondary sink is a destination, not the source of truth: its
72
+ failures are isolated like any other subscriber's, never propagated.
73
+ """
74
+
75
+ async def _ship(event: Event) -> None:
76
+ await sink.append(event)
77
+
78
+ self._destinations.append(sink)
79
+ self.subscribe(_ship)
80
+
81
+ async def aclose(self) -> None:
82
+ """Release the canonical store and every secondary destination that holds
83
+ a resource (e.g. a sqlite connection). ``close`` is an optional capability
84
+ on a sink — a sink without one (the in-memory default, the per-append
85
+ jsonl sink) is simply skipped. Each close is isolated so one failure does
86
+ not strand the others. Idempotent: safe to call more than once.
87
+
88
+ The embed facade assembles a fresh bus per run, so calling this in a
89
+ ``finally`` is what keeps a long-lived ``Zu`` instance from leaking one
90
+ connection per ``run()``."""
91
+ for sink in [self.sink, *self._destinations]:
92
+ closer = getattr(sink, "close", None)
93
+ if closer is None:
94
+ continue
95
+ try:
96
+ result = closer()
97
+ if inspect.isawaitable(result):
98
+ await result
99
+ except Exception as exc: # noqa: BLE001 - one close failure must not strand the rest
100
+ log.warning("sink %r failed to close: %s", sink, exc)
101
+
102
+ async def publish(self, event: Event) -> None:
103
+ # 1. canonical store first; a failure here propagates (source of truth).
104
+ await self.sink.append(event)
105
+
106
+ # 2. fan out to destinations, isolating any crash.
107
+ for fn in self._subscribers:
108
+ try:
109
+ result = fn(event)
110
+ if inspect.isawaitable(result):
111
+ await result
112
+ except Exception as exc: # noqa: BLE001 - one crash must not stop the rest
113
+ self.subscriber_failures.append(SubscriberFailure(fn, event, exc))
114
+ log.warning("destination %r failed on %s: %s", fn, event.type, exc)
115
+
116
+ # --- reads delegate to the single source of truth ---------------------
117
+
118
+ async def query(
119
+ self, flt: dict | None = None, *, limit: int | None = None, after_seq: int = 0
120
+ ) -> list[Event]:
121
+ return await self.sink.query(flt, limit=limit, after_seq=after_seq)
122
+
123
+ def stream(
124
+ self, flt: dict | None = None, *, batch_size: int = 500
125
+ ) -> AsyncIterator[Event]:
126
+ return self.sink.stream(flt, batch_size=batch_size)
127
+
128
+ async def count(self, flt: dict | None = None) -> int:
129
+ return await self.sink.count(flt)
zu_core/codec.py ADDED
@@ -0,0 +1,98 @@
1
+ """Payload codec seam — the encryption-at-rest boundary for durable sinks.
2
+
3
+ Encryption-at-rest is deferred as a *cipher* but not as a *format*: an
4
+ append-only log is the worst place to retrofit encryption (you accumulate
5
+ immutable plaintext), so the on-disk envelope is fixed now and the cipher is
6
+ swappable later. Every stored payload blob begins with a one-byte **version
7
+ tag** identifying the codec that wrote it, so a log can hold rows written by
8
+ different codecs (e.g. plaintext rows from before encryption was enabled) and
9
+ still be read back — the durable sink decodes each row by its own tag.
10
+
11
+ Default is `IdentityCodec` (plaintext, zero dependencies). A real AES-256-GCM
12
+ codec ships behind zu-backends' optional ``[encryption]`` extra. The AES codec
13
+ binds the row's indexed columns as associated data (AAD), so a ciphertext can't
14
+ be moved to — or have its index columns edited on — a different row. The default
15
+ `IdentityCodec` is plaintext and provides *no* integrity: it accepts the ``aad``
16
+ argument for interface parity but cannot bind it (there is no authentication tag
17
+ over plaintext), so the move-resistance guarantee applies only once a cipher is
18
+ configured. Managed keys (KMS / envelope encryption / rotation) are a future
19
+ stage; the codec asks for a key, so swapping an env-var key for a KMS provider
20
+ later is a contained change with no on-disk format impact.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ from collections.abc import Mapping
26
+ from typing import Protocol, runtime_checkable
27
+
28
+
29
+ @runtime_checkable
30
+ class PayloadCodec(Protocol):
31
+ version: int # 0-255; the tag byte written as the first byte of each blob
32
+
33
+ def encode_body(self, plaintext: str, aad: bytes) -> bytes: ...
34
+
35
+ def decode_body(self, body: bytes, aad: bytes) -> str: ...
36
+
37
+
38
+ @runtime_checkable
39
+ class KeyProvider(Protocol):
40
+ """Supplies symmetric data keys *by id*, so a codec can rotate keys and a
41
+ deployment can source them from the KMS/secret store of its choice (AWS KMS,
42
+ GCP KMS, Vault, an HSM, …) — the choice belongs to whoever runs Zu, never
43
+ baked in here. The codec never holds a long-lived master key: it asks the
44
+ provider for the *current* key id when writing, and for a specific key id
45
+ (read back off the stored blob) when decrypting an older row.
46
+
47
+ Key rotation is the answer to AES-GCM's nonce-scaling bound too: a fresh
48
+ random 96-bit nonce is safe to ~2^32 events under one key, so rotating the
49
+ data key (a new ``current_key_id``) resets that budget while old rows keep
50
+ decrypting under their own key id. Implement this against a KMS to get
51
+ managed keys with no on-disk format change."""
52
+
53
+ @property
54
+ def current_key_id(self) -> str: ...
55
+
56
+ def key(self, key_id: str) -> bytes: ...
57
+
58
+
59
+ class IdentityCodec:
60
+ """Plaintext. The default: no dependencies, fully queryable on disk.
61
+
62
+ ``aad`` is accepted for interface parity with authenticated codecs but is
63
+ intentionally unused: plaintext carries no authentication tag, so there is
64
+ nothing to bind it to. The AAD row-binding guarantee is a property of the
65
+ AES codec only — see the module docstring.
66
+ """
67
+
68
+ version = 0
69
+
70
+ def encode_body(self, plaintext: str, aad: bytes) -> bytes:
71
+ return plaintext.encode("utf-8")
72
+
73
+ def decode_body(self, body: bytes, aad: bytes) -> str:
74
+ return body.decode("utf-8")
75
+
76
+
77
+ def encode_payload(codec: PayloadCodec, plaintext: str, aad: bytes = b"") -> bytes:
78
+ """Tag-then-body: [version byte][codec-specific body]."""
79
+ if not 0 <= codec.version <= 255:
80
+ raise ValueError(f"codec.version must be a byte (0-255), got {codec.version}")
81
+ return bytes([codec.version]) + codec.encode_body(plaintext, aad)
82
+
83
+
84
+ def decode_payload(
85
+ blob: bytes, aad: bytes, registry: Mapping[int, PayloadCodec]
86
+ ) -> str:
87
+ """Dispatch on the leading version byte so mixed-codec logs read back."""
88
+ if not blob:
89
+ raise ValueError("empty payload blob")
90
+ version = blob[0]
91
+ codec = registry.get(version)
92
+ if codec is None:
93
+ raise ValueError(
94
+ f"no codec registered for payload version {version}; "
95
+ "cannot decode (was this row written with an encryption codec "
96
+ "that is not installed/configured?)"
97
+ )
98
+ return codec.decode_body(blob[1:], aad)
zu_core/content.py ADDED
@@ -0,0 +1,138 @@
1
+ """Typed multimodal content — the modality-agnostic currency of the loop.
2
+
3
+ The policy port (today an LLM, tomorrow a world model or an embodied
4
+ controller) consumes an :class:`Observation` and emits an :class:`Action`. For
5
+ that single seam to serve every modality, the *observation* must carry typed
6
+ content — text, image, audio, sensor — rather than a bare string, and the
7
+ *action* must be typed rather than a guessed-at dict. These models are that
8
+ currency (Engineering Design §8.2, §9).
9
+
10
+ Design notes that are load-bearing:
11
+
12
+ * **Frozen value objects.** A piece of content is a fact about what was
13
+ observed; it is never mutated in place. Like :class:`Event`, the envelope is
14
+ frozen.
15
+ * **Discriminated union.** ``Observation.content`` is a list of a closed set of
16
+ parts, tagged by ``kind`` so Pydantic can round-trip it from JSON on the event
17
+ log without ambiguity. New modalities are added here (a new ``Content``
18
+ subclass + a new ``kind``), never by smuggling an untyped blob through.
19
+ * **Binary is base64 on the wire.** :class:`Image`/:class:`Audio` carry raw
20
+ ``bytes`` in memory but serialise as base64 in JSON mode, so an observation is
21
+ safe to journal or hand to the codec without a decode error. Media payloads
22
+ are large; what lands on the event log is the caller's choice (a reference or
23
+ a scoped copy), but the contract itself never crashes a ``model_dump``.
24
+ * **Additive, not a rewrite.** Tools still return plain dicts and the
25
+ interpreter loop still speaks ``ModelRequest``/``ModelResponse``. These types
26
+ are the seam the perception-reduction tools (the Action Surface), the
27
+ HuggingFace task-model adapter, and the generalised Policy port build on; they
28
+ do not disturb the existing contracts.
29
+ """
30
+
31
+ from __future__ import annotations
32
+
33
+ from typing import Annotated, Literal
34
+
35
+ from pydantic import BaseModel, ConfigDict, Field
36
+
37
+
38
+ class Content(BaseModel):
39
+ """Frozen base for one piece of observed content.
40
+
41
+ A concrete part declares a ``kind`` discriminator so a heterogeneous
42
+ ``list[Content]`` round-trips from JSON unambiguously.
43
+ """
44
+
45
+ model_config = ConfigDict(frozen=True)
46
+
47
+
48
+ class Text(Content):
49
+ kind: Literal["text"] = "text"
50
+ text: str
51
+
52
+
53
+ class Image(Content):
54
+ # base64 in/out in JSON mode so a binary payload never breaks model_dump(mode="json").
55
+ model_config = ConfigDict(frozen=True, ser_json_bytes="base64", val_json_bytes="base64")
56
+
57
+ kind: Literal["image"] = "image"
58
+ data: bytes
59
+ mime: str = "image/png"
60
+
61
+
62
+ class Audio(Content):
63
+ model_config = ConfigDict(frozen=True, ser_json_bytes="base64", val_json_bytes="base64")
64
+
65
+ kind: Literal["audio"] = "audio"
66
+ data: bytes
67
+ mime: str = "audio/wav"
68
+
69
+
70
+ # The closed set of content parts, tagged by ``kind``. Extend it by adding a
71
+ # ``Content`` subclass with a new ``kind`` literal and listing it here — the one
72
+ # place modality support is declared.
73
+ ContentPart = Annotated[Text | Image | Audio, Field(discriminator="kind")]
74
+
75
+
76
+ class Observation(BaseModel):
77
+ """The typed input side of the policy — heavy perceptual input, one shape.
78
+
79
+ The :class:`Observation` is what a perception-reduction step (the Action
80
+ Surface, a UI-element detector, a lidar reducer) fills compactly, and what
81
+ the policy reads to choose its next :class:`Action`.
82
+ """
83
+
84
+ model_config = ConfigDict(frozen=True)
85
+
86
+ content: list[ContentPart] = Field(default_factory=list)
87
+
88
+ @classmethod
89
+ def from_text(cls, text: str) -> Observation:
90
+ """Build a text-only observation — the common case and the bridge from
91
+ the loop's existing string/dict observations."""
92
+ return cls(content=[Text(text=text)])
93
+
94
+ def text(self) -> str:
95
+ """The concatenated text of every :class:`Text` part (newline-joined).
96
+
97
+ How a text policy, a grounding validator, or a text-classifier detector
98
+ reads an observation without caring which other modalities ride along.
99
+ """
100
+ return "\n".join(p.text for p in self.content if isinstance(p, Text))
101
+
102
+ def parts(self, kind: str) -> list[ContentPart]:
103
+ """Every part of a given ``kind`` (``"text"`` | ``"image"`` | ``"audio"``)."""
104
+ return [p for p in self.content if p.kind == kind]
105
+
106
+
107
+ class Action(BaseModel):
108
+ """The typed output side of the policy.
109
+
110
+ An LLM policy returns a ``tool_call`` (or final ``text``); a world-model or
111
+ embodied controller returns a ``command`` carrying a control action. The
112
+ harness, bus, detectors, validation, and envelope are unchanged across all
113
+ three — which is the whole point of typing the action rather than the policy
114
+ (Engineering Design §9.2).
115
+ """
116
+
117
+ model_config = ConfigDict(frozen=True)
118
+
119
+ kind: Literal["text", "tool_call", "command"]
120
+ payload: dict = Field(default_factory=dict)
121
+
122
+ @classmethod
123
+ def text(cls, text: str) -> Action:
124
+ """A final-answer action."""
125
+ return cls(kind="text", payload={"text": text})
126
+
127
+ @classmethod
128
+ def tool_call(cls, name: str, args: dict | None = None) -> Action:
129
+ """A request to invoke a tool by name — the LLM-policy shape. The
130
+ payload mirrors :class:`zu_core.ports.ToolCall` (``name`` + ``args``) so
131
+ a Policy adapter can bridge the two without a lossy translation."""
132
+ return cls(kind="tool_call", payload={"name": name, "args": args or {}})
133
+
134
+ @classmethod
135
+ def command(cls, **payload: object) -> Action:
136
+ """A low-level control action — the world-model / embodied-controller
137
+ shape (e.g. ``Action.command(actuator="gait", vector=[...])``)."""
138
+ return cls(kind="command", payload=dict(payload))
zu_core/contracts.py ADDED
@@ -0,0 +1,83 @@
1
+ """The typed boundaries everything in Zu speaks through.
2
+
3
+ These three frozen/validated Pydantic models — TaskSpec, Result, Event — are
4
+ the gates every part of the runtime passes through. They are deliberately
5
+ strict: a malformed task or a mis-namespaced event must be refused at the
6
+ boundary, not swallowed.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from datetime import UTC, datetime
12
+ from enum import Enum
13
+ from uuid import UUID, uuid4
14
+
15
+ from pydantic import BaseModel, Field, field_validator
16
+
17
+
18
+ class Status(str, Enum):
19
+ SUCCESS = "success"
20
+ ESCALATE = "escalate"
21
+ TERMINAL = "terminal"
22
+
23
+
24
+ class Budget(BaseModel):
25
+ max_steps: int = 20
26
+ max_tokens: int = 200_000
27
+ wall_time_s: int = 120
28
+ max_tool_calls: int = 32 # per single model response — caps a runaway turn
29
+
30
+
31
+ class TaskSpec(BaseModel):
32
+ """The typed input to a run."""
33
+
34
+ task_id: UUID = Field(default_factory=uuid4)
35
+ query: str
36
+ target: str | None = None
37
+ output_schema: dict = Field(default_factory=dict) # JSON schema the result must satisfy
38
+ budget: Budget = Field(default_factory=Budget)
39
+ max_tier: int = 2
40
+
41
+
42
+ class Result(BaseModel):
43
+ """The typed output of a run."""
44
+
45
+ status: Status
46
+ value: dict | None = None
47
+ reason: str | None = None # detector name, on escalate/terminal
48
+
49
+
50
+ class Event(BaseModel):
51
+ """The append-only record envelope.
52
+
53
+ Frozen at the envelope level: no field may be *reassigned* once an event is
54
+ built. The durable record is immutable in the strongest sense — a sink
55
+ serialises the event to JSON at ``append`` time, so what lands in the
56
+ canonical store can never change afterward.
57
+
58
+ One boundary to know: ``frozen`` does not deep-freeze the ``payload`` dict's
59
+ *contents* (``event.payload[k] = ...`` is not blocked). Deep-freezing every
60
+ payload was rejected deliberately — payloads carry large fetched HTML on the
61
+ hot path and copying/freezing them per event is too costly. The invariant is
62
+ therefore: **treat a published event's payload as read-only.** Do not mutate
63
+ it in place; the canonical on-disk copy is already immutable regardless.
64
+ """
65
+
66
+ model_config = {"frozen": True}
67
+
68
+ event_id: UUID = Field(default_factory=uuid4)
69
+ trace_id: UUID
70
+ task_id: UUID
71
+ parent_id: UUID | None = None
72
+ type: str
73
+ ts: datetime = Field(default_factory=lambda: datetime.now(UTC))
74
+ source: str
75
+ payload: dict = Field(default_factory=dict)
76
+ schema_version: int = 1
77
+
78
+ @field_validator("type")
79
+ @classmethod
80
+ def _namespace(cls, v: str) -> str:
81
+ if not (v.startswith("harness.") or v.startswith("data.")):
82
+ raise ValueError("event type must start with 'harness.' or 'data.'")
83
+ return v