tightloop 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loop/__init__.py +40 -0
- loop/approval/__init__.py +87 -0
- loop/blueprints/__init__.py +3 -0
- loop/blueprints/testfix.py +117 -0
- loop/context/__init__.py +144 -0
- loop/core/__init__.py +0 -0
- loop/core/engine.py +515 -0
- loop/core/result.py +64 -0
- loop/core/state.py +143 -0
- loop/exit/__init__.py +60 -0
- loop/llm/__init__.py +70 -0
- loop/llm/anthropic.py +45 -0
- loop/llm/openai.py +55 -0
- loop/policy/__init__.py +96 -0
- loop/pricing.py +47 -0
- loop/progress/__init__.py +72 -0
- loop/tools/__init__.py +220 -0
- loop/trace/__init__.py +81 -0
- tightloop-0.1.0.dist-info/METADATA +439 -0
- tightloop-0.1.0.dist-info/RECORD +21 -0
- tightloop-0.1.0.dist-info/WHEEL +4 -0
loop/core/engine.py
ADDED
|
@@ -0,0 +1,515 @@
|
|
|
1
|
+
"""The Loop engine.
|
|
2
|
+
|
|
3
|
+
Per iteration: policies → observe → plan (validated, retried) → per action:
|
|
4
|
+
hard-ceiling check → approval gate → enforced execution → record → progress →
|
|
5
|
+
exits. Hard ceilings are checked before every action and before granting any
|
|
6
|
+
approval; provider max_tokens is clamped to the remaining budget so no single
|
|
7
|
+
call can overshoot. Nested Loop.run() inside a tool raises.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import contextvars
|
|
12
|
+
import json
|
|
13
|
+
import time
|
|
14
|
+
from typing import Any, Callable
|
|
15
|
+
|
|
16
|
+
from ..approval import (
|
|
17
|
+
ApprovalDecision,
|
|
18
|
+
ApprovalRequest,
|
|
19
|
+
ApprovalRunner,
|
|
20
|
+
CLIApprovalRunner,
|
|
21
|
+
HeadlessApprovalRunner,
|
|
22
|
+
new_token,
|
|
23
|
+
)
|
|
24
|
+
from ..context import ContextManager
|
|
25
|
+
from ..exit import ExitCondition
|
|
26
|
+
from ..llm import LLMClient, LLMResponse, ToolCallReq, complete_with_retry
|
|
27
|
+
from ..policy import DecisionKind, NoProgress, Policy
|
|
28
|
+
from ..pricing import DEFAULT_PRICING, check_staleness, estimate_cost
|
|
29
|
+
from ..progress import GoalMetric, ProgressEngine
|
|
30
|
+
from ..tools import Tool, ToolRegistry, ToolValidationError
|
|
31
|
+
from ..trace import ExplainReport, TraceSink, explain
|
|
32
|
+
from .result import LoopResult, LoopStatus
|
|
33
|
+
from .state import (
|
|
34
|
+
ActionRecord,
|
|
35
|
+
ArtifactDriftError,
|
|
36
|
+
IterationRecord,
|
|
37
|
+
MetricSnapshot,
|
|
38
|
+
PendingApproval,
|
|
39
|
+
SchemaChangedError,
|
|
40
|
+
State,
|
|
41
|
+
digest,
|
|
42
|
+
excerpt,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
_ACTIVE: contextvars.ContextVar[bool] = contextvars.ContextVar("loop_active", default=False)
|
|
46
|
+
|
|
47
|
+
_CONFIG_KEYS = (
|
|
48
|
+
"max_iterations",
|
|
49
|
+
"token_limit",
|
|
50
|
+
"wall_clock_s",
|
|
51
|
+
"cost_limit_usd",
|
|
52
|
+
"verbatim_window",
|
|
53
|
+
"max_tokens_per_call",
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class NestedLoopError(RuntimeError):
|
|
58
|
+
"""Loops may not be invoked from inside a tool. Delegate sub-tasks via a tool
|
|
59
|
+
that returns a result instead."""
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class LoopConfigError(ValueError):
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class Loop:
|
|
67
|
+
def __init__(
|
|
68
|
+
self,
|
|
69
|
+
goal: str,
|
|
70
|
+
tools: list[Tool | Callable],
|
|
71
|
+
llm: LLMClient,
|
|
72
|
+
*,
|
|
73
|
+
observe: Callable[[State], str] | None = None,
|
|
74
|
+
goal_metric: GoalMetric | None = None,
|
|
75
|
+
policies: list[Policy] | None = None,
|
|
76
|
+
exits: list[ExitCondition] | None = None,
|
|
77
|
+
max_iterations: int = 20,
|
|
78
|
+
token_limit: int = 500_000,
|
|
79
|
+
wall_clock_s: float = 1800.0,
|
|
80
|
+
cost_limit_usd: float | None = None,
|
|
81
|
+
pricing: dict | None = None,
|
|
82
|
+
pricing_staleness: str = "warn",
|
|
83
|
+
approval_runner: ApprovalRunner | None = None,
|
|
84
|
+
approval_ttl_s: float = 3600.0,
|
|
85
|
+
summarizer: LLMClient | None = None,
|
|
86
|
+
verbatim_window: int = 3,
|
|
87
|
+
max_tokens_per_call: int = 4096,
|
|
88
|
+
state: State | None = None,
|
|
89
|
+
state_path: str | None = None,
|
|
90
|
+
trace_path: str | None = None,
|
|
91
|
+
on_event: Callable[[dict], None] | None = None,
|
|
92
|
+
allow_schema_change: bool = False,
|
|
93
|
+
allow_artifact_drift: bool = False,
|
|
94
|
+
quiet: bool = False,
|
|
95
|
+
):
|
|
96
|
+
self.llm = llm
|
|
97
|
+
self.registry = tools if isinstance(tools, ToolRegistry) else ToolRegistry(tools)
|
|
98
|
+
self.observe_fn = observe
|
|
99
|
+
self.goal_metric = goal_metric
|
|
100
|
+
self.policies = list(policies) if policies is not None else [NoProgress(3)]
|
|
101
|
+
self.exits = list(exits or [])
|
|
102
|
+
self.max_iterations = max_iterations
|
|
103
|
+
self.token_limit = token_limit
|
|
104
|
+
self.wall_clock_s = wall_clock_s
|
|
105
|
+
self.cost_limit_usd = cost_limit_usd
|
|
106
|
+
self.pricing = pricing or DEFAULT_PRICING
|
|
107
|
+
self.approval_runner = approval_runner or CLIApprovalRunner()
|
|
108
|
+
self.approval_ttl_s = approval_ttl_s
|
|
109
|
+
self.max_tokens_per_call = max_tokens_per_call
|
|
110
|
+
self.verbatim_window = verbatim_window
|
|
111
|
+
self.state_path = state_path
|
|
112
|
+
self.trace = TraceSink(trace_path, on_event)
|
|
113
|
+
self.context = ContextManager(verbatim_window=verbatim_window, summarizer=summarizer)
|
|
114
|
+
self.progress = ProgressEngine()
|
|
115
|
+
self.quiet = quiet
|
|
116
|
+
self._last_result: LoopResult | None = None
|
|
117
|
+
self._session_start = time.monotonic()
|
|
118
|
+
|
|
119
|
+
self._usd_enabled = True
|
|
120
|
+
if cost_limit_usd is not None:
|
|
121
|
+
self._usd_enabled = check_staleness(self.pricing, pricing_staleness)
|
|
122
|
+
|
|
123
|
+
if state is not None:
|
|
124
|
+
if state.tool_schema_hash != self.registry.schema_hash:
|
|
125
|
+
if not allow_schema_change:
|
|
126
|
+
raise SchemaChangedError(
|
|
127
|
+
"tool schemas changed since this state was saved; pass "
|
|
128
|
+
"allow_schema_change=True to accept the new schemas"
|
|
129
|
+
)
|
|
130
|
+
state.tool_schema_hash = self.registry.schema_hash
|
|
131
|
+
drift = self.context.check_artifact_drift(state)
|
|
132
|
+
if drift and not allow_artifact_drift:
|
|
133
|
+
raise ArtifactDriftError(
|
|
134
|
+
"stored context artifacts were produced under a different configuration: "
|
|
135
|
+
+ "; ".join(drift)
|
|
136
|
+
+ " — pass allow_artifact_drift=True to reuse them anyway"
|
|
137
|
+
)
|
|
138
|
+
self.state = state
|
|
139
|
+
else:
|
|
140
|
+
self.state = State(goal=goal, tool_schema_hash=self.registry.schema_hash)
|
|
141
|
+
self.state.config = {k: getattr(self, k) for k in _CONFIG_KEYS}
|
|
142
|
+
|
|
143
|
+
# ---------------------------------------------------------------- run
|
|
144
|
+
|
|
145
|
+
def run(self) -> LoopResult:
|
|
146
|
+
if _ACTIVE.get():
|
|
147
|
+
raise NestedLoopError(
|
|
148
|
+
"Loop.run() called inside an active loop's tool execution. Delegate "
|
|
149
|
+
"sub-tasks via a tool that returns a result; nested loops are unsupported in v1."
|
|
150
|
+
)
|
|
151
|
+
if self.state.pending_approval:
|
|
152
|
+
raise LoopConfigError(
|
|
153
|
+
"this state has a pending approval; use Loop.resume(path, approval={...})"
|
|
154
|
+
)
|
|
155
|
+
token = _ACTIVE.set(True)
|
|
156
|
+
self._session_start = time.monotonic()
|
|
157
|
+
self._announce()
|
|
158
|
+
try:
|
|
159
|
+
return self._loop()
|
|
160
|
+
finally:
|
|
161
|
+
_ACTIVE.reset(token)
|
|
162
|
+
|
|
163
|
+
def _announce(self) -> None:
|
|
164
|
+
limits = (
|
|
165
|
+
f"{self.max_iterations} iterations, {self.token_limit:,} tokens, "
|
|
166
|
+
f"{self.wall_clock_s:.0f}s wall-clock"
|
|
167
|
+
+ (f", ${self.cost_limit_usd:.2f} cost" if self.cost_limit_usd is not None else "")
|
|
168
|
+
)
|
|
169
|
+
if not self.quiet:
|
|
170
|
+
print(f"[loop] goal={self.state.goal!r} | limits: {limits}")
|
|
171
|
+
self.trace.emit("loop.start", goal=self.state.goal, limits=limits)
|
|
172
|
+
|
|
173
|
+
# ------------------------------------------------------------- ceilings
|
|
174
|
+
|
|
175
|
+
def _elapsed(self) -> float:
|
|
176
|
+
return self.state.metrics.elapsed_s + (time.monotonic() - self._session_start)
|
|
177
|
+
|
|
178
|
+
def _remaining_tokens(self) -> int:
|
|
179
|
+
return self.token_limit - self.state.metrics.total_tokens
|
|
180
|
+
|
|
181
|
+
def _ceiling(self) -> tuple[LoopStatus, str] | None:
|
|
182
|
+
m = self.state.metrics
|
|
183
|
+
if len(self.state.iterations) >= self.max_iterations:
|
|
184
|
+
return LoopStatus.BUDGET_EXHAUSTED, f"max_iterations ({self.max_iterations}) reached"
|
|
185
|
+
if m.total_tokens >= self.token_limit:
|
|
186
|
+
return LoopStatus.BUDGET_EXHAUSTED, f"token_limit ({self.token_limit:,}) reached"
|
|
187
|
+
if self._elapsed() >= self.wall_clock_s:
|
|
188
|
+
return LoopStatus.BUDGET_EXHAUSTED, f"wall_clock limit ({self.wall_clock_s:.0f}s) reached"
|
|
189
|
+
if self.cost_limit_usd is not None and self._usd_enabled and m.cost_usd is not None:
|
|
190
|
+
if m.cost_usd >= self.cost_limit_usd:
|
|
191
|
+
return LoopStatus.BUDGET_EXHAUSTED, (
|
|
192
|
+
f"cost_limit (${self.cost_limit_usd:.2f}) reached — estimate; tokens authoritative"
|
|
193
|
+
)
|
|
194
|
+
return None
|
|
195
|
+
|
|
196
|
+
# ----------------------------------------------------------- main loop
|
|
197
|
+
|
|
198
|
+
def _loop(self) -> LoopResult:
|
|
199
|
+
while True:
|
|
200
|
+
ceiling = self._ceiling()
|
|
201
|
+
if ceiling:
|
|
202
|
+
return self._finish(*ceiling)
|
|
203
|
+
|
|
204
|
+
for p in self.policies:
|
|
205
|
+
d = p.before_iteration(self.state)
|
|
206
|
+
if d.kind == DecisionKind.STOP:
|
|
207
|
+
self.trace.emit("policy.stop", policy=type(p).__name__, reason=d.reason)
|
|
208
|
+
return self._finish(d.status or LoopStatus.ERROR, d.reason)
|
|
209
|
+
|
|
210
|
+
index = len(self.state.iterations)
|
|
211
|
+
obs = self._observe()
|
|
212
|
+
metric = self._measure(obs)
|
|
213
|
+
self.trace.emit("iteration.start", index=index,
|
|
214
|
+
metric=metric.model_dump() if metric else None)
|
|
215
|
+
|
|
216
|
+
if metric and self.goal_metric and self.goal_metric.is_success(metric):
|
|
217
|
+
self._record(IterationRecord(index=index, observation=obs, metric=metric))
|
|
218
|
+
return self._finish(LoopStatus.SUCCESS, "goal metric reports success")
|
|
219
|
+
|
|
220
|
+
planned = self._plan(obs)
|
|
221
|
+
if planned is None: # 3 validation failures this iteration
|
|
222
|
+
it = IterationRecord(index=index, observation=obs, plan_invalid=True, metric=metric)
|
|
223
|
+
self._record(it)
|
|
224
|
+
if self.state.plan_invalid_streak >= 2:
|
|
225
|
+
return self._finish(
|
|
226
|
+
LoopStatus.PLAN_FAILED,
|
|
227
|
+
"two consecutive iterations failed tool-argument validation "
|
|
228
|
+
"(check tool schemas and prompt)",
|
|
229
|
+
)
|
|
230
|
+
continue
|
|
231
|
+
|
|
232
|
+
resp, calls = planned
|
|
233
|
+
actions: list[ActionRecord] = []
|
|
234
|
+
for call in calls:
|
|
235
|
+
ceiling = self._ceiling()
|
|
236
|
+
if ceiling:
|
|
237
|
+
self.trace.emit("budget.preempt", dropped_action=call.name)
|
|
238
|
+
it = IterationRecord(index=index, observation=obs, plan_text=resp.text,
|
|
239
|
+
actions=actions, metric=metric)
|
|
240
|
+
self._record(it)
|
|
241
|
+
return self._finish(*ceiling)
|
|
242
|
+
|
|
243
|
+
pause = self._gate(call)
|
|
244
|
+
if pause is not None:
|
|
245
|
+
outcome = self._handle_approval(call, pause, metric, resp.text, index, obs, actions)
|
|
246
|
+
if isinstance(outcome, LoopResult):
|
|
247
|
+
return outcome
|
|
248
|
+
actions.append(self._execute(call))
|
|
249
|
+
|
|
250
|
+
it = IterationRecord(
|
|
251
|
+
index=index, observation=obs, plan_text=resp.text, actions=actions,
|
|
252
|
+
metric=metric, input_tokens=resp.input_tokens, output_tokens=resp.output_tokens,
|
|
253
|
+
)
|
|
254
|
+
self._record(it)
|
|
255
|
+
|
|
256
|
+
for ex in self.exits:
|
|
257
|
+
hit = ex.evaluate(self.state)
|
|
258
|
+
if hit:
|
|
259
|
+
return self._finish(*hit)
|
|
260
|
+
|
|
261
|
+
# ------------------------------------------------------------- helpers
|
|
262
|
+
|
|
263
|
+
def _observe(self) -> str:
|
|
264
|
+
raw = self.observe_fn(self.state) if self.observe_fn else ""
|
|
265
|
+
return excerpt(str(raw))
|
|
266
|
+
|
|
267
|
+
def _measure(self, obs: str) -> MetricSnapshot | None:
|
|
268
|
+
return self.goal_metric.measure(obs, self.state) if self.goal_metric else None
|
|
269
|
+
|
|
270
|
+
def _plan(self, obs: str) -> tuple[LLMResponse, list[ToolCallReq]] | None:
|
|
271
|
+
"""One planning call; invalid tool args are fed back as structured errors,
|
|
272
|
+
retry budget 2. Returns None after 3 failed validations."""
|
|
273
|
+
messages = self.context.build(self.state, obs)
|
|
274
|
+
for attempt in range(3):
|
|
275
|
+
max_toks = max(16, min(self.max_tokens_per_call, self._remaining_tokens()))
|
|
276
|
+
resp = complete_with_retry(self.llm, messages, self.registry.schemas, max_toks)
|
|
277
|
+
m = self.state.metrics
|
|
278
|
+
m.input_tokens += resp.input_tokens
|
|
279
|
+
m.output_tokens += resp.output_tokens
|
|
280
|
+
m.llm_calls += 1
|
|
281
|
+
if self._usd_enabled:
|
|
282
|
+
m.cost_usd = estimate_cost(m.input_tokens, m.output_tokens,
|
|
283
|
+
self.llm.model_id, self.pricing)
|
|
284
|
+
self.trace.emit("llm.call", model=resp.model_id or self.llm.model_id,
|
|
285
|
+
input_tokens=resp.input_tokens, output_tokens=resp.output_tokens,
|
|
286
|
+
attempt=attempt)
|
|
287
|
+
|
|
288
|
+
errors: list[str] = []
|
|
289
|
+
validated: list[ToolCallReq] = []
|
|
290
|
+
for call in resp.tool_calls:
|
|
291
|
+
t = self.registry.get(call.name)
|
|
292
|
+
if t is None:
|
|
293
|
+
errors.append(f"unknown tool {call.name!r}")
|
|
294
|
+
continue
|
|
295
|
+
try:
|
|
296
|
+
validated.append(ToolCallReq(id=call.id, name=call.name, args=t.validate(call.args)))
|
|
297
|
+
except ToolValidationError as e:
|
|
298
|
+
errors.append(str(e))
|
|
299
|
+
if not errors:
|
|
300
|
+
return resp, validated
|
|
301
|
+
self.trace.emit("plan.invalid", attempt=attempt, errors=errors)
|
|
302
|
+
messages = messages + [
|
|
303
|
+
{"role": "assistant", "content": resp.text or json.dumps(
|
|
304
|
+
[c.model_dump() for c in resp.tool_calls])},
|
|
305
|
+
{"role": "user", "content": "Tool call rejected:\n" + "\n".join(errors)
|
|
306
|
+
+ "\nRetry with valid arguments matching the tool schemas."},
|
|
307
|
+
]
|
|
308
|
+
return None
|
|
309
|
+
|
|
310
|
+
def _gate(self, call: ToolCallReq) -> str | None:
|
|
311
|
+
for p in self.policies:
|
|
312
|
+
d = p.before_action(self.state, call.name, call.args)
|
|
313
|
+
if d.kind == DecisionKind.PAUSE:
|
|
314
|
+
return d.reason
|
|
315
|
+
if d.kind == DecisionKind.STOP:
|
|
316
|
+
return d.reason # treated as approval-style gate; engine pauses
|
|
317
|
+
return None
|
|
318
|
+
|
|
319
|
+
def _execute(self, call: ToolCallReq) -> ActionRecord:
|
|
320
|
+
t = self.registry.get(call.name)
|
|
321
|
+
result = self.registry.execute(call.name, call.args)
|
|
322
|
+
record = ActionRecord(
|
|
323
|
+
tool=call.name,
|
|
324
|
+
args_excerpt=excerpt(json.dumps(call.args, default=str), 1024),
|
|
325
|
+
status=result.status,
|
|
326
|
+
result_excerpt=result.output,
|
|
327
|
+
duration_s=result.duration_s,
|
|
328
|
+
fingerprint=t.fingerprint(call.args),
|
|
329
|
+
)
|
|
330
|
+
self.trace.emit("action.executed", tool=call.name, status=result.status,
|
|
331
|
+
duration_s=round(result.duration_s, 3))
|
|
332
|
+
return record
|
|
333
|
+
|
|
334
|
+
def _action_hash(self, call: ToolCallReq) -> str:
|
|
335
|
+
return digest(call.name + json.dumps(call.args, sort_keys=True, default=str)
|
|
336
|
+
+ str(self.state.state_version))
|
|
337
|
+
|
|
338
|
+
def _handle_approval(
|
|
339
|
+
self,
|
|
340
|
+
call: ToolCallReq,
|
|
341
|
+
reason: str,
|
|
342
|
+
metric: MetricSnapshot | None,
|
|
343
|
+
plan_text: str,
|
|
344
|
+
index: int,
|
|
345
|
+
obs: str,
|
|
346
|
+
actions_so_far: list[ActionRecord],
|
|
347
|
+
) -> LoopResult | None:
|
|
348
|
+
"""Returns None if approved (caller executes), or a LoopResult to return."""
|
|
349
|
+
ttl = getattr(self.approval_runner, "ttl_s", self.approval_ttl_s)
|
|
350
|
+
req = ApprovalRequest(
|
|
351
|
+
token=new_token(), tool=call.name, args=call.args, reason=reason,
|
|
352
|
+
action_hash=self._action_hash(call), state_version=self.state.state_version,
|
|
353
|
+
created_at=time.time(), ttl_s=ttl,
|
|
354
|
+
)
|
|
355
|
+
self.trace.emit("approval.requested", tool=call.name, token=req.token, reason=reason)
|
|
356
|
+
decision, note = self.approval_runner.request(req)
|
|
357
|
+
self.trace.emit("approval.decision", token=req.token, decision=decision.value, note=note)
|
|
358
|
+
|
|
359
|
+
if decision == ApprovalDecision.APPROVED:
|
|
360
|
+
return None
|
|
361
|
+
if decision == ApprovalDecision.DENIED:
|
|
362
|
+
it = IterationRecord(index=index, observation=obs, plan_text=plan_text,
|
|
363
|
+
actions=actions_so_far, metric=metric)
|
|
364
|
+
self._record(it)
|
|
365
|
+
return self._finish(LoopStatus.APPROVAL_DENIED, note)
|
|
366
|
+
|
|
367
|
+
# PENDING: serialize and hand back a resume token
|
|
368
|
+
if not self.state_path:
|
|
369
|
+
raise LoopConfigError("headless approval requires state_path= so the loop can pause")
|
|
370
|
+
if actions_so_far or plan_text:
|
|
371
|
+
it = IterationRecord(index=index, observation=obs, plan_text=plan_text,
|
|
372
|
+
actions=actions_so_far, metric=metric)
|
|
373
|
+
self._record(it)
|
|
374
|
+
self.state.pending_approval = PendingApproval(
|
|
375
|
+
token=req.token, tool=call.name, args=call.args, reason=reason,
|
|
376
|
+
action_hash=req.action_hash, state_version=req.state_version,
|
|
377
|
+
created_at=req.created_at, ttl_s=req.ttl_s, plan_text=plan_text,
|
|
378
|
+
precondition_metric=metric,
|
|
379
|
+
)
|
|
380
|
+
return self._finish(LoopStatus.AWAITING_APPROVAL,
|
|
381
|
+
f"action {call.name!r} awaits approval (token {req.token})",
|
|
382
|
+
approval_token=req.token)
|
|
383
|
+
|
|
384
|
+
def _record(self, it: IterationRecord) -> None:
|
|
385
|
+
report = self.progress.evaluate(self.state, it)
|
|
386
|
+
if report.trend == "regressing" and it.plan_text:
|
|
387
|
+
self.state.failed_approaches.append(
|
|
388
|
+
f"iteration {it.index}: {it.plan_text[:160]} -> metric regressed"
|
|
389
|
+
)
|
|
390
|
+
self.state.iterations.append(it)
|
|
391
|
+
self.state.state_version += 1
|
|
392
|
+
created = self.context.ensure_summaries(self.state)
|
|
393
|
+
self.trace.emit(
|
|
394
|
+
"iteration.end", index=it.index, trend=report.trend,
|
|
395
|
+
repetition=report.repetition, no_progress_streak=report.no_progress_streak,
|
|
396
|
+
summarized_iterations=created,
|
|
397
|
+
accounting=self.context.budget_report(self.state),
|
|
398
|
+
)
|
|
399
|
+
if self.state_path:
|
|
400
|
+
self._sync_elapsed()
|
|
401
|
+
self.state.save(self.state_path)
|
|
402
|
+
|
|
403
|
+
def _sync_elapsed(self) -> None:
|
|
404
|
+
now = time.monotonic()
|
|
405
|
+
self.state.metrics.elapsed_s += now - self._session_start
|
|
406
|
+
self._session_start = now
|
|
407
|
+
|
|
408
|
+
def _finish(self, status: LoopStatus, reason: str, approval_token: str | None = None) -> LoopResult:
|
|
409
|
+
self._sync_elapsed()
|
|
410
|
+
if status != LoopStatus.AWAITING_APPROVAL and self.state.pending_approval:
|
|
411
|
+
self.trace.emit("approval.cancelled", token=self.state.pending_approval.token,
|
|
412
|
+
reason=f"loop ended: {reason}")
|
|
413
|
+
self.state.pending_approval = None
|
|
414
|
+
if self.state_path:
|
|
415
|
+
self.state.save(self.state_path)
|
|
416
|
+
result = LoopResult.make(status, reason, len(self.state.iterations),
|
|
417
|
+
self.state.metrics, state_path=self.state_path,
|
|
418
|
+
approval_token=approval_token)
|
|
419
|
+
self.trace.emit("loop.end", status=status.value, reason=reason,
|
|
420
|
+
iterations=result.iterations,
|
|
421
|
+
total_tokens=self.state.metrics.total_tokens)
|
|
422
|
+
self._last_result = result
|
|
423
|
+
return result
|
|
424
|
+
|
|
425
|
+
# ------------------------------------------------------------- public
|
|
426
|
+
|
|
427
|
+
def explain(self, result: LoopResult | None = None) -> ExplainReport:
|
|
428
|
+
return explain(self.state, result or self._last_result)
|
|
429
|
+
|
|
430
|
+
def budget_report(self) -> dict[str, Any]:
|
|
431
|
+
return self.context.budget_report(self.state)
|
|
432
|
+
|
|
433
|
+
# ------------------------------------------------------------- resume
|
|
434
|
+
|
|
435
|
+
@classmethod
|
|
436
|
+
def resume(
|
|
437
|
+
cls,
|
|
438
|
+
state_path: str,
|
|
439
|
+
*,
|
|
440
|
+
tools: list[Tool | Callable],
|
|
441
|
+
llm: LLMClient,
|
|
442
|
+
approval: dict[str, Any] | None = None,
|
|
443
|
+
extend: dict[str, Any] | None = None,
|
|
444
|
+
**kwargs: Any,
|
|
445
|
+
) -> LoopResult:
|
|
446
|
+
state = State.load(state_path)
|
|
447
|
+
if extend:
|
|
448
|
+
unknown = set(extend) - set(_CONFIG_KEYS)
|
|
449
|
+
if unknown:
|
|
450
|
+
raise LoopConfigError(f"extend has unknown keys: {sorted(unknown)}")
|
|
451
|
+
state.config.update(extend)
|
|
452
|
+
for key in _CONFIG_KEYS:
|
|
453
|
+
if key in state.config:
|
|
454
|
+
kwargs.setdefault(key, state.config[key])
|
|
455
|
+
self = cls(goal=state.goal, tools=tools, llm=llm, state=state,
|
|
456
|
+
state_path=state_path, **kwargs)
|
|
457
|
+
|
|
458
|
+
if state.pending_approval:
|
|
459
|
+
outcome = self._resume_pending(approval)
|
|
460
|
+
if outcome is not None:
|
|
461
|
+
return outcome
|
|
462
|
+
|
|
463
|
+
token = _ACTIVE.set(True)
|
|
464
|
+
self._session_start = time.monotonic()
|
|
465
|
+
self._announce()
|
|
466
|
+
try:
|
|
467
|
+
return self._loop()
|
|
468
|
+
finally:
|
|
469
|
+
_ACTIVE.reset(token)
|
|
470
|
+
|
|
471
|
+
def _resume_pending(self, approval: dict[str, Any] | None) -> LoopResult | None:
|
|
472
|
+
"""Handles a pending approval on resume. Returns a LoopResult to stop with,
|
|
473
|
+
or None to continue looping (approved action already executed)."""
|
|
474
|
+
pa = self.state.pending_approval
|
|
475
|
+
if time.time() - pa.created_at > pa.ttl_s:
|
|
476
|
+
self.trace.emit("approval.expired", token=pa.token)
|
|
477
|
+
self.state.pending_approval = None
|
|
478
|
+
return self._finish(LoopStatus.PENDING_EXPIRED,
|
|
479
|
+
f"approval token {pa.token} expired after {pa.ttl_s:.0f}s")
|
|
480
|
+
if approval is None:
|
|
481
|
+
raise LoopConfigError(
|
|
482
|
+
"state has a pending approval; pass approval={'token': ..., 'approved': bool}"
|
|
483
|
+
)
|
|
484
|
+
if approval.get("token") != pa.token:
|
|
485
|
+
raise LoopConfigError("approval token does not match the pending request")
|
|
486
|
+
if not approval.get("approved"):
|
|
487
|
+
self.state.pending_approval = None
|
|
488
|
+
return self._finish(LoopStatus.APPROVAL_DENIED, "denied via resume token")
|
|
489
|
+
|
|
490
|
+
# Re-observe: stale precondition invalidates the approval
|
|
491
|
+
obs = self._observe()
|
|
492
|
+
metric = self._measure(obs)
|
|
493
|
+
pre = pa.precondition_metric
|
|
494
|
+
if pre is not None and metric is not None and metric.model_dump() != pre.model_dump():
|
|
495
|
+
fresh = new_token()
|
|
496
|
+
self.trace.emit("approval.invalidated", old_token=pa.token, new_token=fresh,
|
|
497
|
+
reason="precondition changed since approval was requested")
|
|
498
|
+
self.state.pending_approval = pa.model_copy(
|
|
499
|
+
update={"token": fresh, "created_at": time.time(), "precondition_metric": metric}
|
|
500
|
+
)
|
|
501
|
+
return self._finish(
|
|
502
|
+
LoopStatus.AWAITING_APPROVAL,
|
|
503
|
+
"the situation changed since this approval was requested (goal metric "
|
|
504
|
+
f"differs); approval re-requested with new token {fresh}",
|
|
505
|
+
approval_token=fresh,
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
call = ToolCallReq(name=pa.tool, args=pa.args)
|
|
509
|
+
action = self._execute(call)
|
|
510
|
+
it = IterationRecord(index=len(self.state.iterations), observation=obs,
|
|
511
|
+
plan_text=pa.plan_text or f"approved action {pa.tool}",
|
|
512
|
+
actions=[action], metric=metric)
|
|
513
|
+
self.state.pending_approval = None
|
|
514
|
+
self._record(it)
|
|
515
|
+
return None
|
loop/core/result.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""LoopResult status matrix — every status is explicit and actionable."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from enum import Enum
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
|
|
8
|
+
from .state import Metrics
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class LoopStatus(str, Enum):
|
|
12
|
+
SUCCESS = "SUCCESS"
|
|
13
|
+
BUDGET_EXHAUSTED = "BUDGET_EXHAUSTED"
|
|
14
|
+
NO_PROGRESS = "NO_PROGRESS"
|
|
15
|
+
PLAN_FAILED = "PLAN_FAILED"
|
|
16
|
+
APPROVAL_DENIED = "APPROVAL_DENIED"
|
|
17
|
+
AWAITING_APPROVAL = "AWAITING_APPROVAL"
|
|
18
|
+
PENDING_EXPIRED = "PENDING_EXPIRED"
|
|
19
|
+
ERROR = "ERROR"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
STATUS_INFO: dict[LoopStatus, tuple[bool, str]] = {
|
|
23
|
+
LoopStatus.SUCCESS: (False, "done"),
|
|
24
|
+
LoopStatus.BUDGET_EXHAUSTED: (True, "inspect snapshot; Loop.resume(..., extend={...}) with a larger budget"),
|
|
25
|
+
LoopStatus.NO_PROGRESS: (True, "change tools, goal, or limits, then Loop.resume(...)"),
|
|
26
|
+
LoopStatus.PLAN_FAILED: (True, "fix tool schemas or prompt, then Loop.resume(...)"),
|
|
27
|
+
LoopStatus.APPROVAL_DENIED: (True, "adjust plan or policy, then Loop.resume(...)"),
|
|
28
|
+
LoopStatus.AWAITING_APPROVAL: (True, "approve via token: Loop.resume(path, approval={'token': ..., 'approved': True})"),
|
|
29
|
+
LoopStatus.PENDING_EXPIRED: (True, "resume to re-request approval"),
|
|
30
|
+
LoopStatus.ERROR: (False, "inspect trace via loop.explain()"),
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class LoopResult(BaseModel):
|
|
35
|
+
status: LoopStatus
|
|
36
|
+
reason: str = ""
|
|
37
|
+
resumable: bool
|
|
38
|
+
recommended_action: str
|
|
39
|
+
iterations: int
|
|
40
|
+
metrics: Metrics
|
|
41
|
+
state_path: str | None = None
|
|
42
|
+
approval_token: str | None = None
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def make(
|
|
46
|
+
cls,
|
|
47
|
+
status: LoopStatus,
|
|
48
|
+
reason: str,
|
|
49
|
+
iterations: int,
|
|
50
|
+
metrics: Metrics,
|
|
51
|
+
state_path: str | None = None,
|
|
52
|
+
approval_token: str | None = None,
|
|
53
|
+
) -> "LoopResult":
|
|
54
|
+
resumable, action = STATUS_INFO[status]
|
|
55
|
+
return cls(
|
|
56
|
+
status=status,
|
|
57
|
+
reason=reason,
|
|
58
|
+
resumable=resumable,
|
|
59
|
+
recommended_action=action,
|
|
60
|
+
iterations=iterations,
|
|
61
|
+
metrics=metrics,
|
|
62
|
+
state_path=state_path,
|
|
63
|
+
approval_token=approval_token,
|
|
64
|
+
)
|