techrevati-runtime 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,243 @@
1
+ """
2
+ Circuit Breaker — Fault-tolerant execution with state machine.
3
+
4
+ Implements the Circuit Breaker pattern to prevent cascading failures
5
+ when calling unreliable services or operations. Transitions between
6
+ CLOSED (normal), OPEN (failing), and HALF_OPEN (testing) states.
7
+
8
+ Thread-safe with configurable failure threshold, recovery timeout,
9
+ and number of in-flight probes permitted in HALF_OPEN. Uses
10
+ ``time.monotonic`` for duration checks so clock jumps don't affect
11
+ behavior; the clock function is injectable for deterministic tests.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import asyncio
17
+ import threading
18
+ import time
19
+ from collections.abc import Awaitable, Callable
20
+ from dataclasses import dataclass, field
21
+ from enum import Enum
22
+ from typing import Any, TypeVar
23
+
24
+ T = TypeVar("T")
25
+
26
+
27
+ class CircuitState(str, Enum):
28
+ """Circuit breaker lifecycle states."""
29
+
30
+ CLOSED = "closed" # Normal operation; requests pass through
31
+ OPEN = "open" # Failed; requests blocked immediately
32
+ HALF_OPEN = "half_open" # Testing; limited probes allowed
33
+
34
+
35
+ class CircuitOpenError(Exception):
36
+ """Raised when circuit breaker is open and request is blocked."""
37
+
38
+ def __init__(self, name: str) -> None:
39
+ self.name = name
40
+ super().__init__(f"Circuit breaker '{name}' is OPEN")
41
+
42
+
43
+ @dataclass
44
+ class CircuitBreaker:
45
+ """Stateful circuit breaker with thread-safe transitions.
46
+
47
+ Parameters
48
+ ----------
49
+ name:
50
+ Human-readable identifier (included in ``CircuitOpenError``).
51
+ failure_threshold:
52
+ Consecutive failures before the circuit opens.
53
+ recovery_timeout_seconds:
54
+ Duration the circuit stays open before allowing probes.
55
+ half_open_max_probes:
56
+ Concurrent probe calls allowed in HALF_OPEN. Default 1 (Polly
57
+ convention); raising to N spreads recovery risk over multiple
58
+ in-flight calls (Resilience4j defaults to 10).
59
+ clock:
60
+ Monotonic time source. Defaults to ``time.monotonic``. Override
61
+ in tests to make timing-dependent behavior deterministic.
62
+ """
63
+
64
+ name: str
65
+ failure_threshold: int = 5
66
+ recovery_timeout_seconds: float = 60.0
67
+ half_open_max_probes: int = 1
68
+ clock: Callable[[], float] = field(default=time.monotonic)
69
+
70
+ _state: CircuitState = field(default=CircuitState.CLOSED, init=False, repr=False)
71
+ _failure_count: int = field(default=0, init=False, repr=False)
72
+ _last_failure_time: float | None = field(default=None, init=False, repr=False)
73
+ _probe_in_flight: int = field(default=0, init=False, repr=False)
74
+ _lock: threading.Lock = field(
75
+ default_factory=threading.Lock, init=False, repr=False
76
+ )
77
+
78
+ def call(self, fn: Callable[..., T], *args: Any, **kwargs: Any) -> T:
79
+ """Execute fn with breaker protection. Raises CircuitOpenError if open.
80
+
81
+ In HALF_OPEN state, at most ``half_open_max_probes`` concurrent
82
+ calls are admitted; excess callers receive ``CircuitOpenError``
83
+ until in-flight probes complete.
84
+ """
85
+ with self._lock:
86
+ if self._state == CircuitState.OPEN:
87
+ if self._should_attempt_reset():
88
+ self._state = CircuitState.HALF_OPEN
89
+ self._probe_in_flight = 1
90
+ else:
91
+ raise CircuitOpenError(self.name)
92
+ elif self._state == CircuitState.HALF_OPEN:
93
+ if self._probe_in_flight >= self.half_open_max_probes:
94
+ raise CircuitOpenError(self.name)
95
+ self._probe_in_flight += 1
96
+ # CLOSED: pass through without tracking probes.
97
+
98
+ try:
99
+ result = fn(*args, **kwargs)
100
+ except Exception:
101
+ self.record_failure()
102
+ raise
103
+ self.record_success()
104
+ return result
105
+
106
+ def record_success(self) -> None:
107
+ """Record a successful execution. Closes the circuit if HALF_OPEN."""
108
+ with self._lock:
109
+ self._failure_count = 0
110
+ if self._state == CircuitState.HALF_OPEN:
111
+ self._state = CircuitState.CLOSED
112
+ self._probe_in_flight = 0
113
+
114
+ def record_failure(self) -> None:
115
+ """Record a failed execution. Opens the circuit at threshold."""
116
+ with self._lock:
117
+ self._failure_count += 1
118
+ self._last_failure_time = self.clock()
119
+ if self._state == CircuitState.HALF_OPEN:
120
+ # Failed probe → back to OPEN, drop all in-flight permits.
121
+ self._state = CircuitState.OPEN
122
+ self._probe_in_flight = 0
123
+ elif self._failure_count >= self.failure_threshold:
124
+ self._state = CircuitState.OPEN
125
+
126
+ def state(self) -> CircuitState:
127
+ """Get current circuit state."""
128
+ with self._lock:
129
+ if self._state == CircuitState.OPEN and self._should_attempt_reset():
130
+ return CircuitState.HALF_OPEN
131
+ return self._state
132
+
133
+ def is_open(self) -> bool:
134
+ """Return True if circuit is open (blocking requests)."""
135
+ return self.state() == CircuitState.OPEN
136
+
137
+ def reset(self) -> None:
138
+ """Manually reset the circuit to CLOSED state."""
139
+ with self._lock:
140
+ self._state = CircuitState.CLOSED
141
+ self._failure_count = 0
142
+ self._last_failure_time = None
143
+ self._probe_in_flight = 0
144
+
145
+ def _should_attempt_reset(self) -> bool:
146
+ """Check if recovery timeout has elapsed since last failure."""
147
+ if self._last_failure_time is None:
148
+ return False
149
+ return (self.clock() - self._last_failure_time) >= self.recovery_timeout_seconds
150
+
151
+
152
+ @dataclass
153
+ class AsyncCircuitBreaker:
154
+ """Async sibling of CircuitBreaker — same state semantics, asyncio.Lock.
155
+
156
+ Independent from the sync variant: state is not shared. Choose one
157
+ per downstream. The probe-serialization, monotonic clock, and
158
+ clock-injection contracts match the sync class exactly so behavior
159
+ is portable between sync and async code paths.
160
+ """
161
+
162
+ name: str
163
+ failure_threshold: int = 5
164
+ recovery_timeout_seconds: float = 60.0
165
+ half_open_max_probes: int = 1
166
+ clock: Callable[[], float] = field(default=time.monotonic)
167
+
168
+ _state: CircuitState = field(default=CircuitState.CLOSED, init=False, repr=False)
169
+ _failure_count: int = field(default=0, init=False, repr=False)
170
+ _last_failure_time: float | None = field(default=None, init=False, repr=False)
171
+ _probe_in_flight: int = field(default=0, init=False, repr=False)
172
+ _lock: asyncio.Lock = field(default_factory=asyncio.Lock, init=False, repr=False)
173
+
174
+ async def call(
175
+ self,
176
+ coro_factory: Callable[..., Awaitable[T]],
177
+ *args: Any,
178
+ **kwargs: Any,
179
+ ) -> T:
180
+ """Execute coro with breaker protection. Raises CircuitOpenError if open."""
181
+ async with self._lock:
182
+ if self._state == CircuitState.OPEN:
183
+ if self._should_attempt_reset():
184
+ self._state = CircuitState.HALF_OPEN
185
+ self._probe_in_flight = 1
186
+ else:
187
+ raise CircuitOpenError(self.name)
188
+ elif self._state == CircuitState.HALF_OPEN:
189
+ if self._probe_in_flight >= self.half_open_max_probes:
190
+ raise CircuitOpenError(self.name)
191
+ self._probe_in_flight += 1
192
+
193
+ try:
194
+ result = await coro_factory(*args, **kwargs)
195
+ except Exception:
196
+ await self.record_failure()
197
+ raise
198
+ await self.record_success()
199
+ return result
200
+
201
+ async def record_success(self) -> None:
202
+ """Record a successful execution. Closes the circuit if HALF_OPEN."""
203
+ async with self._lock:
204
+ self._failure_count = 0
205
+ if self._state == CircuitState.HALF_OPEN:
206
+ self._state = CircuitState.CLOSED
207
+ self._probe_in_flight = 0
208
+
209
+ async def record_failure(self) -> None:
210
+ """Record a failed execution. Opens the circuit at threshold."""
211
+ async with self._lock:
212
+ self._failure_count += 1
213
+ self._last_failure_time = self.clock()
214
+ if self._state == CircuitState.HALF_OPEN:
215
+ self._state = CircuitState.OPEN
216
+ self._probe_in_flight = 0
217
+ elif self._failure_count >= self.failure_threshold:
218
+ self._state = CircuitState.OPEN
219
+
220
+ async def state(self) -> CircuitState:
221
+ """Get current circuit state."""
222
+ async with self._lock:
223
+ if self._state == CircuitState.OPEN and self._should_attempt_reset():
224
+ return CircuitState.HALF_OPEN
225
+ return self._state
226
+
227
+ async def is_open(self) -> bool:
228
+ """Return True if circuit is open (blocking requests)."""
229
+ return (await self.state()) == CircuitState.OPEN
230
+
231
+ async def reset(self) -> None:
232
+ """Manually reset the circuit to CLOSED state."""
233
+ async with self._lock:
234
+ self._state = CircuitState.CLOSED
235
+ self._failure_count = 0
236
+ self._last_failure_time = None
237
+ self._probe_in_flight = 0
238
+
239
+ def _should_attempt_reset(self) -> bool:
240
+ """Check if recovery timeout has elapsed since last failure."""
241
+ if self._last_failure_time is None:
242
+ return False
243
+ return (self.clock() - self._last_failure_time) >= self.recovery_timeout_seconds
@@ -0,0 +1,10 @@
1
+ {
2
+ "_comment": "Pricing data is intentionally empty. Override at runtime via techrevati.runtime.usage_tracking.register_pricing(model, ModelPricing(...)) or load_pricing_from_file(path).",
3
+ "_schema": {
4
+ "input_per_million": "float",
5
+ "output_per_million": "float",
6
+ "cache_write_per_million": "float (optional, default 0.0)",
7
+ "cache_read_per_million": "float (optional, default 0.0)"
8
+ },
9
+ "models": {}
10
+ }
@@ -0,0 +1,138 @@
1
+ """
2
+ Guardrails — content-level checks around tool execution.
3
+
4
+ A ``Guardrail`` is a small object that inspects either the call site
5
+ (role + tool name, before invocation) or the result (after invocation)
6
+ and reports an outcome. The orchestrator runs all registered guardrails
7
+ automatically around ``run_tool`` / ``arun_tool`` and raises
8
+ ``GuardrailViolatedError`` on the first violation.
9
+
10
+ This is content gating — orthogonal to ``PermissionEnforcer`` which
11
+ answers "is this role allowed to use this tool at all?". Permissions
12
+ are role × tool; guardrails are value × context.
13
+
14
+ Inspired by the OpenAI Agents SDK guardrail model. Output checks are
15
+ mandatory; input/pre-call checks are optional and default to
16
+ ``GuardrailOutcome(allowed=True)`` if a guardrail does not implement
17
+ them, matching the structural Protocol pattern.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ from dataclasses import dataclass
23
+ from typing import Any, Literal, Protocol, runtime_checkable
24
+
25
+ GuardrailStage = Literal["pre", "post"]
26
+
27
+
28
+ @dataclass(frozen=True)
29
+ class GuardrailOutcome:
30
+ """Result of a guardrail check.
31
+
32
+ ``allowed=False`` blocks the operation. Provide ``reason`` so the
33
+ raised ``GuardrailViolatedError`` carries actionable context.
34
+ """
35
+
36
+ allowed: bool
37
+ reason: str | None = None
38
+
39
+
40
+ @runtime_checkable
41
+ class Guardrail(Protocol):
42
+ """Structural protocol for tool-level guardrails.
43
+
44
+ Implementations should be small, deterministic, and side-effect-free.
45
+ Heavy checks (e.g. calling out to a moderation model) belong behind
46
+ a separate service the guardrail consults.
47
+
48
+ ``name`` lets the orchestrator label events and errors; default to
49
+ the class name if you don't override it.
50
+ """
51
+
52
+ name: str
53
+
54
+ def check_pre(self, *, role: str, tool: str) -> GuardrailOutcome: ...
55
+
56
+ def check_post(self, value: Any, *, role: str, tool: str) -> GuardrailOutcome: ...
57
+
58
+
59
+ class GuardrailViolatedError(Exception):
60
+ """Raised when a guardrail blocks tool invocation or its result."""
61
+
62
+ def __init__(
63
+ self,
64
+ outcome: GuardrailOutcome,
65
+ *,
66
+ guardrail: str,
67
+ role: str,
68
+ tool: str,
69
+ stage: GuardrailStage,
70
+ ) -> None:
71
+ self.outcome = outcome
72
+ self.guardrail = guardrail
73
+ self.role = role
74
+ self.tool = tool
75
+ self.stage = stage
76
+ reason = outcome.reason or "no reason provided"
77
+ super().__init__(
78
+ f"{stage} guardrail '{guardrail}' blocked tool '{tool}' "
79
+ f"for role '{role}': {reason}"
80
+ )
81
+
82
+
83
+ def run_pre_checks(guardrails: list[Guardrail], *, role: str, tool: str) -> None:
84
+ """Run every pre-call guardrail; raise on first violation."""
85
+ for g in guardrails:
86
+ outcome = g.check_pre(role=role, tool=tool)
87
+ if not outcome.allowed:
88
+ raise GuardrailViolatedError(
89
+ outcome,
90
+ guardrail=getattr(g, "name", type(g).__name__),
91
+ role=role,
92
+ tool=tool,
93
+ stage="pre",
94
+ )
95
+
96
+
97
+ def run_post_checks(
98
+ guardrails: list[Guardrail],
99
+ value: Any,
100
+ *,
101
+ role: str,
102
+ tool: str,
103
+ ) -> None:
104
+ """Run every post-call guardrail; raise on first violation."""
105
+ for g in guardrails:
106
+ outcome = g.check_post(value, role=role, tool=tool)
107
+ if not outcome.allowed:
108
+ raise GuardrailViolatedError(
109
+ outcome,
110
+ guardrail=getattr(g, "name", type(g).__name__),
111
+ role=role,
112
+ tool=tool,
113
+ stage="post",
114
+ )
115
+
116
+
117
+ @dataclass(frozen=True)
118
+ class AllowAllGuardrail:
119
+ """Reference no-op guardrail. Useful as a baseline in tests."""
120
+
121
+ name: str = "allow_all"
122
+
123
+ def check_pre(self, *, role: str, tool: str) -> GuardrailOutcome:
124
+ return GuardrailOutcome(allowed=True)
125
+
126
+ def check_post(self, value: Any, *, role: str, tool: str) -> GuardrailOutcome:
127
+ return GuardrailOutcome(allowed=True)
128
+
129
+
130
+ __all__ = [
131
+ "AllowAllGuardrail",
132
+ "Guardrail",
133
+ "GuardrailOutcome",
134
+ "GuardrailStage",
135
+ "GuardrailViolatedError",
136
+ "run_post_checks",
137
+ "run_pre_checks",
138
+ ]
@@ -0,0 +1,57 @@
1
+ """
2
+ Handoffs — delegation between agents.
3
+
4
+ A ``Handoff`` records the intent of one agent (the *source*) to pass
5
+ control to another (the *target*), with a structured reason and
6
+ caller-provided context payload. The source session finalizes its
7
+ worker as ``COMPLETED``; a new worker is created in the registry under
8
+ the target role so downstream code can pick it up by ``worker_id``.
9
+
10
+ The caller resolves the handoff by opening a new session for the
11
+ target role. This module does not run the target agent — it just
12
+ records and routes.
13
+
14
+ The pattern mirrors OpenAI Agents SDK handoffs and Anthropic's
15
+ orchestrator-workers workflow.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from dataclasses import dataclass, field
21
+ from datetime import UTC, datetime
22
+ from typing import Any
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class Handoff:
27
+ """Immutable record of an agent-to-agent delegation.
28
+
29
+ Created by ``OrchestrationSession.handoff_to`` /
30
+ ``AsyncOrchestrationSession.handoff_to``. Use ``target_worker_id`` to
31
+ look up the freshly-registered worker from the same ``AgentRegistry``
32
+ the source session was using.
33
+ """
34
+
35
+ source_role: str
36
+ target_role: str
37
+ phase: str
38
+ reason: str
39
+ context: dict[str, Any] = field(default_factory=dict)
40
+ project_id: int | None = None
41
+ target_worker_id: str = ""
42
+ created_at: str = field(default_factory=lambda: datetime.now(UTC).isoformat())
43
+
44
+ def to_dict(self) -> dict[str, Any]:
45
+ return {
46
+ "source_role": self.source_role,
47
+ "target_role": self.target_role,
48
+ "phase": self.phase,
49
+ "reason": self.reason,
50
+ "context": dict(self.context),
51
+ "project_id": self.project_id,
52
+ "target_worker_id": self.target_worker_id,
53
+ "created_at": self.created_at,
54
+ }
55
+
56
+
57
+ __all__ = ["Handoff"]