tightloop 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loop/__init__.py +40 -0
- loop/approval/__init__.py +87 -0
- loop/blueprints/__init__.py +3 -0
- loop/blueprints/testfix.py +117 -0
- loop/context/__init__.py +144 -0
- loop/core/__init__.py +0 -0
- loop/core/engine.py +515 -0
- loop/core/result.py +64 -0
- loop/core/state.py +143 -0
- loop/exit/__init__.py +60 -0
- loop/llm/__init__.py +70 -0
- loop/llm/anthropic.py +45 -0
- loop/llm/openai.py +55 -0
- loop/policy/__init__.py +96 -0
- loop/pricing.py +47 -0
- loop/progress/__init__.py +72 -0
- loop/tools/__init__.py +220 -0
- loop/trace/__init__.py +81 -0
- tightloop-0.1.0.dist-info/METADATA +439 -0
- tightloop-0.1.0.dist-info/RECORD +21 -0
- tightloop-0.1.0.dist-info/WHEEL +4 -0
loop/__init__.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Loop — production-grade loops for AI agents.
|
|
2
|
+
|
|
3
|
+
A structured runtime for reliable, observable, governable agent loops.
|
|
4
|
+
"""
|
|
5
|
+
from .approval import (
|
|
6
|
+
ApprovalDecision,
|
|
7
|
+
ApprovalRequest,
|
|
8
|
+
CallbackApprovalRunner,
|
|
9
|
+
CLIApprovalRunner,
|
|
10
|
+
HeadlessApprovalRunner,
|
|
11
|
+
)
|
|
12
|
+
from .blueprints import PytestFailureMetric, TestFixLoop
|
|
13
|
+
from .core.engine import Loop, LoopConfigError, NestedLoopError
|
|
14
|
+
from .core.result import LoopResult, LoopStatus
|
|
15
|
+
from .core.state import (
|
|
16
|
+
ArtifactDriftError,
|
|
17
|
+
MetricSnapshot,
|
|
18
|
+
SchemaChangedError,
|
|
19
|
+
State,
|
|
20
|
+
)
|
|
21
|
+
from .exit import Exit, ExitCondition
|
|
22
|
+
from .llm import CallableLLM, LLMClient, LLMResponse, ToolCallReq
|
|
23
|
+
from .policy import CostLimit, NoProgress, Policy, RequireApproval
|
|
24
|
+
from .progress import GoalMetric
|
|
25
|
+
from .tools import Tool, ToolRegistry, UnsupportedTypeError, run_command, tool
|
|
26
|
+
from .trace import explain
|
|
27
|
+
|
|
28
|
+
__version__ = "0.1.0"
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
"Loop", "LoopResult", "LoopStatus", "State", "MetricSnapshot",
|
|
32
|
+
"Exit", "ExitCondition", "Policy", "NoProgress", "CostLimit", "RequireApproval",
|
|
33
|
+
"GoalMetric", "Tool", "tool", "ToolRegistry", "run_command",
|
|
34
|
+
"LLMClient", "LLMResponse", "ToolCallReq", "CallableLLM",
|
|
35
|
+
"ApprovalRequest", "ApprovalDecision", "CLIApprovalRunner",
|
|
36
|
+
"CallbackApprovalRunner", "HeadlessApprovalRunner",
|
|
37
|
+
"TestFixLoop", "PytestFailureMetric", "explain",
|
|
38
|
+
"NestedLoopError", "LoopConfigError", "SchemaChangedError",
|
|
39
|
+
"ArtifactDriftError", "UnsupportedTypeError",
|
|
40
|
+
]
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Human approval checkpoints.
|
|
2
|
+
|
|
3
|
+
ApprovalRequest is frozen at type level: callbacks get a
|
|
4
|
+
read-only payload — action, args, reason, digests — never the full context.
|
|
5
|
+
Callback runner: 60s timeout, deny-on-exception, every invocation traced by the engine.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import secrets
|
|
10
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
11
|
+
from concurrent.futures import TimeoutError as FutureTimeout
|
|
12
|
+
from enum import Enum
|
|
13
|
+
from typing import Any, Callable
|
|
14
|
+
|
|
15
|
+
from pydantic import BaseModel, ConfigDict
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ApprovalDecision(str, Enum):
|
|
19
|
+
APPROVED = "APPROVED"
|
|
20
|
+
DENIED = "DENIED"
|
|
21
|
+
PENDING = "PENDING" # headless: serialize state, resume by token
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ApprovalRequest(BaseModel):
|
|
25
|
+
model_config = ConfigDict(frozen=True)
|
|
26
|
+
|
|
27
|
+
token: str
|
|
28
|
+
tool: str
|
|
29
|
+
args: dict[str, Any]
|
|
30
|
+
reason: str
|
|
31
|
+
action_hash: str
|
|
32
|
+
state_version: int
|
|
33
|
+
created_at: float
|
|
34
|
+
ttl_s: float
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def new_token() -> str:
|
|
38
|
+
return secrets.token_urlsafe(8)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class ApprovalRunner:
|
|
42
|
+
def request(self, req: ApprovalRequest) -> tuple[ApprovalDecision, str]:
|
|
43
|
+
raise NotImplementedError
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class CLIApprovalRunner(ApprovalRunner):
|
|
47
|
+
"""Interactive default: prompts on stdin."""
|
|
48
|
+
|
|
49
|
+
def request(self, req: ApprovalRequest) -> tuple[ApprovalDecision, str]:
|
|
50
|
+
print(f"\n[loop] approval required: {req.tool}({req.args})\nreason: {req.reason}")
|
|
51
|
+
answer = input("approve? [y/N] ").strip().lower()
|
|
52
|
+
if answer in ("y", "yes"):
|
|
53
|
+
return ApprovalDecision.APPROVED, "approved via CLI"
|
|
54
|
+
return ApprovalDecision.DENIED, "denied via CLI"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class CallbackApprovalRunner(ApprovalRunner):
|
|
58
|
+
def __init__(self, fn: Callable[[ApprovalRequest], bool], timeout_s: float = 60.0):
|
|
59
|
+
self.fn = fn
|
|
60
|
+
self.timeout_s = timeout_s
|
|
61
|
+
|
|
62
|
+
def request(self, req: ApprovalRequest) -> tuple[ApprovalDecision, str]:
|
|
63
|
+
executor = ThreadPoolExecutor(max_workers=1)
|
|
64
|
+
try:
|
|
65
|
+
future = executor.submit(self.fn, req)
|
|
66
|
+
try:
|
|
67
|
+
approved = future.result(timeout=self.timeout_s)
|
|
68
|
+
except FutureTimeout:
|
|
69
|
+
return ApprovalDecision.DENIED, f"callback timed out after {self.timeout_s}s (deny-on-timeout)"
|
|
70
|
+
except Exception as e:
|
|
71
|
+
return ApprovalDecision.DENIED, f"callback raised {type(e).__name__}: {e} (deny-on-exception)"
|
|
72
|
+
if approved:
|
|
73
|
+
return ApprovalDecision.APPROVED, "approved via callback"
|
|
74
|
+
return ApprovalDecision.DENIED, "denied via callback"
|
|
75
|
+
finally:
|
|
76
|
+
executor.shutdown(wait=False)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class HeadlessApprovalRunner(ApprovalRunner):
|
|
80
|
+
"""Always returns PENDING: the engine serializes state and exits AWAITING_APPROVAL;
|
|
81
|
+
resume with Loop.resume(path, approval={'token': ..., 'approved': True})."""
|
|
82
|
+
|
|
83
|
+
def __init__(self, ttl_s: float = 3600.0):
|
|
84
|
+
self.ttl_s = ttl_s
|
|
85
|
+
|
|
86
|
+
def request(self, req: ApprovalRequest) -> tuple[ApprovalDecision, str]:
|
|
87
|
+
return ApprovalDecision.PENDING, f"awaiting approval, token={req.token}"
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""TestFixLoop blueprint: fix failing tests until all pass.
|
|
2
|
+
|
|
3
|
+
Progress tracks test IDENTITY, not counts: value = originally_failing_fixed −
|
|
4
|
+
newly_broken, and newly-broken tests flag `regression` even when totals improve.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from ..core.engine import Loop
|
|
13
|
+
from ..core.state import MetricSnapshot, State
|
|
14
|
+
from ..llm import LLMClient
|
|
15
|
+
from ..progress import GoalMetric
|
|
16
|
+
from ..tools import Tool, run_command
|
|
17
|
+
|
|
18
|
+
_FAILED_RE = re.compile(r"^(?:FAILED|ERROR)\s+(\S+)", re.MULTILINE)
|
|
19
|
+
_EXIT_RE = re.compile(r"\[exit code: (-?\d+)\]")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def parse_failing(output: str) -> set[str]:
|
|
23
|
+
return {m.split(" - ")[0] for m in _FAILED_RE.findall(output)}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class PytestFailureMetric(GoalMetric):
|
|
27
|
+
def measure(self, observation: str, state: State) -> MetricSnapshot:
|
|
28
|
+
failing = parse_failing(observation)
|
|
29
|
+
exit_match = _EXIT_RE.search(observation)
|
|
30
|
+
exit_code = int(exit_match.group(1)) if exit_match else None
|
|
31
|
+
|
|
32
|
+
baseline: set[str] | None = None
|
|
33
|
+
prev: set[str] | None = None
|
|
34
|
+
for it in state.iterations:
|
|
35
|
+
if it.metric and "failing" in it.metric.detail:
|
|
36
|
+
if baseline is None:
|
|
37
|
+
baseline = set(it.metric.detail["baseline"] or it.metric.detail["failing"])
|
|
38
|
+
prev = set(it.metric.detail["failing"])
|
|
39
|
+
if baseline is None:
|
|
40
|
+
baseline = set(failing)
|
|
41
|
+
|
|
42
|
+
fixed = baseline - failing
|
|
43
|
+
newly_broken = failing - baseline
|
|
44
|
+
regressed_vs_prev = bool(failing - prev) if prev is not None else False
|
|
45
|
+
return MetricSnapshot(
|
|
46
|
+
value=float(len(fixed) - len(newly_broken)),
|
|
47
|
+
regression=bool(newly_broken) or regressed_vs_prev,
|
|
48
|
+
detail={
|
|
49
|
+
"failing": sorted(failing),
|
|
50
|
+
"baseline": sorted(baseline),
|
|
51
|
+
"fixed": len(fixed),
|
|
52
|
+
"newly_broken": len(newly_broken),
|
|
53
|
+
"exit_code": exit_code,
|
|
54
|
+
},
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
def is_success(self, snapshot: MetricSnapshot) -> bool:
|
|
58
|
+
return not snapshot.detail.get("failing") and snapshot.detail.get("exit_code") == 0
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class TestFixLoop(Loop):
|
|
62
|
+
__test__ = False # not a pytest test class, despite the name
|
|
63
|
+
|
|
64
|
+
def __init__(
|
|
65
|
+
self,
|
|
66
|
+
llm: LLMClient,
|
|
67
|
+
repo: str = ".",
|
|
68
|
+
test_cmd: str = "python -m pytest -q -rf --tb=short",
|
|
69
|
+
test_timeout_s: float = 300.0,
|
|
70
|
+
goal: str | None = None,
|
|
71
|
+
**kwargs: Any,
|
|
72
|
+
):
|
|
73
|
+
repo_path = Path(repo).resolve()
|
|
74
|
+
|
|
75
|
+
def _run_tests() -> str:
|
|
76
|
+
res = run_command(test_cmd, timeout_s=test_timeout_s, cwd=str(repo_path))
|
|
77
|
+
suffix = " [timed out]" if res.timed_out else ""
|
|
78
|
+
return f"{res.stdout}\n{res.stderr}\n[exit code: {res.code}]{suffix}"
|
|
79
|
+
|
|
80
|
+
def run_tests() -> str:
|
|
81
|
+
"""Run the test suite and return its output."""
|
|
82
|
+
return _run_tests()
|
|
83
|
+
|
|
84
|
+
def read_file(path: str) -> str:
|
|
85
|
+
"""Read a file from the repository."""
|
|
86
|
+
target = (repo_path / path).resolve()
|
|
87
|
+
if not target.is_relative_to(repo_path):
|
|
88
|
+
raise ValueError(f"path {path!r} escapes the repository")
|
|
89
|
+
return target.read_text()
|
|
90
|
+
|
|
91
|
+
def edit_file(path: str, content: str) -> str:
|
|
92
|
+
"""Replace the full contents of a file in the repository."""
|
|
93
|
+
target = (repo_path / path).resolve()
|
|
94
|
+
if not target.is_relative_to(repo_path):
|
|
95
|
+
raise ValueError(f"path {path!r} escapes the repository")
|
|
96
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
97
|
+
target.write_text(content)
|
|
98
|
+
# drop stale bytecode: pyc validation uses whole-second mtime + size,
|
|
99
|
+
# so a same-size edit within the same second would be masked by the cache
|
|
100
|
+
cache_dir = target.parent / "__pycache__"
|
|
101
|
+
if target.suffix == ".py" and cache_dir.is_dir():
|
|
102
|
+
for pyc in cache_dir.glob(f"{target.stem}.*.pyc"):
|
|
103
|
+
pyc.unlink(missing_ok=True)
|
|
104
|
+
return f"wrote {len(content)} chars to {path}"
|
|
105
|
+
|
|
106
|
+
super().__init__(
|
|
107
|
+
goal=goal or f"Fix failing tests in {repo_path.name} until all pass",
|
|
108
|
+
tools=[
|
|
109
|
+
Tool(run_tests, timeout_s=test_timeout_s + 10),
|
|
110
|
+
Tool(read_file),
|
|
111
|
+
Tool(edit_file),
|
|
112
|
+
],
|
|
113
|
+
llm=llm,
|
|
114
|
+
observe=lambda state: _run_tests(),
|
|
115
|
+
goal_metric=PytestFailureMetric(),
|
|
116
|
+
**kwargs,
|
|
117
|
+
)
|
loop/context/__init__.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""Context manager: pinned facts never summarized away,
|
|
2
|
+
failed-approaches registry always in context, version-stamped summaries
|
|
3
|
+
computed once and stored (deterministic resume), transparent token accounting.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import hashlib
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from ..core.state import (
|
|
11
|
+
ENGINE_VERSION,
|
|
12
|
+
ArtifactStamp,
|
|
13
|
+
ContextArtifact,
|
|
14
|
+
IterationRecord,
|
|
15
|
+
State,
|
|
16
|
+
)
|
|
17
|
+
from ..llm import LLMClient
|
|
18
|
+
|
|
19
|
+
SUMMARY_PROMPT = (
|
|
20
|
+
"Summarize this agent-loop iteration in 3 sentences or fewer. Preserve: what was "
|
|
21
|
+
"attempted, the outcome, and any error messages verbatim.\n\n{body}"
|
|
22
|
+
)
|
|
23
|
+
_PROMPT_HASH = hashlib.sha256(SUMMARY_PROMPT.encode()).hexdigest()[:16]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _render_iteration(it: IterationRecord, result_cap: int = 2000) -> str:
|
|
27
|
+
lines = [f"### Iteration {it.index}"]
|
|
28
|
+
if it.observation:
|
|
29
|
+
lines.append(f"Observed:\n{it.observation[:result_cap]}")
|
|
30
|
+
if it.plan_text:
|
|
31
|
+
lines.append(f"Planned: {it.plan_text[:600]}")
|
|
32
|
+
if it.plan_invalid:
|
|
33
|
+
lines.append("(plan failed tool-argument validation)")
|
|
34
|
+
for a in it.actions:
|
|
35
|
+
lines.append(f"Action {a.tool}({a.args_excerpt[:300]}) -> [{a.status}] {a.result_excerpt[:result_cap]}")
|
|
36
|
+
if it.metric:
|
|
37
|
+
lines.append(f"Metric: {it.metric.value}" + (" (REGRESSION)" if it.metric.regression else ""))
|
|
38
|
+
return "\n".join(lines)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _est_tokens(text: str) -> int:
|
|
42
|
+
return len(text) // 4 # documented heuristic; itemized, not hidden
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class ContextManager:
|
|
46
|
+
def __init__(self, verbatim_window: int = 3, summarizer: LLMClient | None = None,
|
|
47
|
+
summary_max_tokens: int = 400):
|
|
48
|
+
self.verbatim_window = verbatim_window
|
|
49
|
+
self.summarizer = summarizer
|
|
50
|
+
self.summary_max_tokens = summary_max_tokens
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def stamp(self) -> ArtifactStamp:
|
|
54
|
+
model_id = self.summarizer.model_id if self.summarizer else "deterministic-truncate"
|
|
55
|
+
return ArtifactStamp(engine_version=ENGINE_VERSION, model_id=model_id, prompt_hash=_PROMPT_HASH)
|
|
56
|
+
|
|
57
|
+
def check_artifact_drift(self, state: State) -> list[str]:
|
|
58
|
+
"""Returns mismatch descriptions for artifacts produced under a different config."""
|
|
59
|
+
current = self.stamp
|
|
60
|
+
problems = []
|
|
61
|
+
for a in state.artifacts:
|
|
62
|
+
if a.kind == "summary" and a.stamp != current:
|
|
63
|
+
problems.append(
|
|
64
|
+
f"summary for iteration {a.iteration}: produced by "
|
|
65
|
+
f"{a.stamp.engine_version}/{a.stamp.model_id}, current is "
|
|
66
|
+
f"{current.engine_version}/{current.model_id}"
|
|
67
|
+
)
|
|
68
|
+
return problems
|
|
69
|
+
|
|
70
|
+
def ensure_summaries(self, state: State) -> list[int]:
|
|
71
|
+
"""Summarize iterations that just left the verbatim window. Computed once,
|
|
72
|
+
stored, reused on resume — never recomputed."""
|
|
73
|
+
done = {a.iteration for a in state.artifacts if a.kind == "summary"}
|
|
74
|
+
cutoff = len(state.iterations) - self.verbatim_window
|
|
75
|
+
created = []
|
|
76
|
+
for it in state.iterations[:cutoff] if cutoff > 0 else []:
|
|
77
|
+
if it.index in done:
|
|
78
|
+
continue
|
|
79
|
+
body = _render_iteration(it, result_cap=600)
|
|
80
|
+
if self.summarizer:
|
|
81
|
+
resp = self.summarizer.complete(
|
|
82
|
+
[{"role": "user", "content": SUMMARY_PROMPT.format(body=body)}],
|
|
83
|
+
[], self.summary_max_tokens,
|
|
84
|
+
)
|
|
85
|
+
content = resp.text
|
|
86
|
+
state.metrics.input_tokens += resp.input_tokens
|
|
87
|
+
state.metrics.output_tokens += resp.output_tokens
|
|
88
|
+
state.metrics.llm_calls += 1
|
|
89
|
+
else:
|
|
90
|
+
content = body[:800] # deterministic fallback
|
|
91
|
+
state.artifacts.append(
|
|
92
|
+
ContextArtifact(kind="summary", iteration=it.index, content=content, stamp=self.stamp)
|
|
93
|
+
)
|
|
94
|
+
created.append(it.index)
|
|
95
|
+
return created
|
|
96
|
+
|
|
97
|
+
def build(self, state: State, observation: str) -> list[dict[str, str]]:
|
|
98
|
+
system_parts = [
|
|
99
|
+
"You are an agent executing one step of a structured loop. Use the provided "
|
|
100
|
+
"tools to make progress toward the goal. Respond with tool calls.",
|
|
101
|
+
f"Goal: {state.goal}",
|
|
102
|
+
]
|
|
103
|
+
if state.pinned_facts:
|
|
104
|
+
system_parts.append("Key facts (pinned):\n" + "\n".join(f"- {f}" for f in state.pinned_facts))
|
|
105
|
+
if state.failed_approaches:
|
|
106
|
+
system_parts.append(
|
|
107
|
+
"Approaches already tried that FAILED (do not repeat):\n"
|
|
108
|
+
+ "\n".join(f"- {f}" for f in state.failed_approaches)
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
user_parts = []
|
|
112
|
+
summaries = [a for a in state.artifacts if a.kind == "summary"]
|
|
113
|
+
if summaries:
|
|
114
|
+
user_parts.append(
|
|
115
|
+
"## Earlier iterations (summarized)\n"
|
|
116
|
+
+ "\n".join(f"- iter {a.iteration}: {a.content}" for a in summaries)
|
|
117
|
+
)
|
|
118
|
+
recent = state.iterations[-self.verbatim_window:]
|
|
119
|
+
if recent:
|
|
120
|
+
user_parts.append("## Recent iterations\n" + "\n\n".join(_render_iteration(it) for it in recent))
|
|
121
|
+
user_parts.append(f"## Current observation\n{observation}\n\nDecide the next action(s).")
|
|
122
|
+
|
|
123
|
+
return [
|
|
124
|
+
{"role": "system", "content": "\n\n".join(system_parts)},
|
|
125
|
+
{"role": "user", "content": "\n\n".join(user_parts)},
|
|
126
|
+
]
|
|
127
|
+
|
|
128
|
+
def budget_report(self, state: State, observation: str = "") -> dict[str, Any]:
|
|
129
|
+
"""Itemized token accounting per section."""
|
|
130
|
+
messages = self.build(state, observation)
|
|
131
|
+
system, user = messages[0]["content"], messages[1]["content"]
|
|
132
|
+
summaries = [a for a in state.artifacts if a.kind == "summary"]
|
|
133
|
+
return {
|
|
134
|
+
"pinned_system_tokens": _est_tokens(system),
|
|
135
|
+
"summary_tokens": _est_tokens("\n".join(a.content for a in summaries)),
|
|
136
|
+
"verbatim_tokens": _est_tokens(
|
|
137
|
+
"\n".join(_render_iteration(it) for it in state.iterations[-self.verbatim_window:])
|
|
138
|
+
),
|
|
139
|
+
"observation_tokens": _est_tokens(observation),
|
|
140
|
+
"total_context_tokens": _est_tokens(system) + _est_tokens(user),
|
|
141
|
+
"spent_input_tokens": state.metrics.input_tokens,
|
|
142
|
+
"spent_output_tokens": state.metrics.output_tokens,
|
|
143
|
+
"note": "estimates use a len/4 heuristic; provider-reported usage is authoritative",
|
|
144
|
+
}
|
loop/core/__init__.py
ADDED
|
File without changes
|