tightloop 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
loop/__init__.py ADDED
@@ -0,0 +1,40 @@
1
+ """Loop — production-grade loops for AI agents.
2
+
3
+ A structured runtime for reliable, observable, governable agent loops.
4
+ """
5
+ from .approval import (
6
+ ApprovalDecision,
7
+ ApprovalRequest,
8
+ CallbackApprovalRunner,
9
+ CLIApprovalRunner,
10
+ HeadlessApprovalRunner,
11
+ )
12
+ from .blueprints import PytestFailureMetric, TestFixLoop
13
+ from .core.engine import Loop, LoopConfigError, NestedLoopError
14
+ from .core.result import LoopResult, LoopStatus
15
+ from .core.state import (
16
+ ArtifactDriftError,
17
+ MetricSnapshot,
18
+ SchemaChangedError,
19
+ State,
20
+ )
21
+ from .exit import Exit, ExitCondition
22
+ from .llm import CallableLLM, LLMClient, LLMResponse, ToolCallReq
23
+ from .policy import CostLimit, NoProgress, Policy, RequireApproval
24
+ from .progress import GoalMetric
25
+ from .tools import Tool, ToolRegistry, UnsupportedTypeError, run_command, tool
26
+ from .trace import explain
27
+
28
+ __version__ = "0.1.0"
29
+
30
+ __all__ = [
31
+ "Loop", "LoopResult", "LoopStatus", "State", "MetricSnapshot",
32
+ "Exit", "ExitCondition", "Policy", "NoProgress", "CostLimit", "RequireApproval",
33
+ "GoalMetric", "Tool", "tool", "ToolRegistry", "run_command",
34
+ "LLMClient", "LLMResponse", "ToolCallReq", "CallableLLM",
35
+ "ApprovalRequest", "ApprovalDecision", "CLIApprovalRunner",
36
+ "CallbackApprovalRunner", "HeadlessApprovalRunner",
37
+ "TestFixLoop", "PytestFailureMetric", "explain",
38
+ "NestedLoopError", "LoopConfigError", "SchemaChangedError",
39
+ "ArtifactDriftError", "UnsupportedTypeError",
40
+ ]
@@ -0,0 +1,87 @@
1
+ """Human approval checkpoints.
2
+
3
+ ApprovalRequest is frozen at type level: callbacks get a
4
+ read-only payload — action, args, reason, digests — never the full context.
5
+ Callback runner: 60s timeout, deny-on-exception, every invocation traced by the engine.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import secrets
10
+ from concurrent.futures import ThreadPoolExecutor
11
+ from concurrent.futures import TimeoutError as FutureTimeout
12
+ from enum import Enum
13
+ from typing import Any, Callable
14
+
15
+ from pydantic import BaseModel, ConfigDict
16
+
17
+
18
+ class ApprovalDecision(str, Enum):
19
+ APPROVED = "APPROVED"
20
+ DENIED = "DENIED"
21
+ PENDING = "PENDING" # headless: serialize state, resume by token
22
+
23
+
24
+ class ApprovalRequest(BaseModel):
25
+ model_config = ConfigDict(frozen=True)
26
+
27
+ token: str
28
+ tool: str
29
+ args: dict[str, Any]
30
+ reason: str
31
+ action_hash: str
32
+ state_version: int
33
+ created_at: float
34
+ ttl_s: float
35
+
36
+
37
+ def new_token() -> str:
38
+ return secrets.token_urlsafe(8)
39
+
40
+
41
+ class ApprovalRunner:
42
+ def request(self, req: ApprovalRequest) -> tuple[ApprovalDecision, str]:
43
+ raise NotImplementedError
44
+
45
+
46
+ class CLIApprovalRunner(ApprovalRunner):
47
+ """Interactive default: prompts on stdin."""
48
+
49
+ def request(self, req: ApprovalRequest) -> tuple[ApprovalDecision, str]:
50
+ print(f"\n[loop] approval required: {req.tool}({req.args})\nreason: {req.reason}")
51
+ answer = input("approve? [y/N] ").strip().lower()
52
+ if answer in ("y", "yes"):
53
+ return ApprovalDecision.APPROVED, "approved via CLI"
54
+ return ApprovalDecision.DENIED, "denied via CLI"
55
+
56
+
57
+ class CallbackApprovalRunner(ApprovalRunner):
58
+ def __init__(self, fn: Callable[[ApprovalRequest], bool], timeout_s: float = 60.0):
59
+ self.fn = fn
60
+ self.timeout_s = timeout_s
61
+
62
+ def request(self, req: ApprovalRequest) -> tuple[ApprovalDecision, str]:
63
+ executor = ThreadPoolExecutor(max_workers=1)
64
+ try:
65
+ future = executor.submit(self.fn, req)
66
+ try:
67
+ approved = future.result(timeout=self.timeout_s)
68
+ except FutureTimeout:
69
+ return ApprovalDecision.DENIED, f"callback timed out after {self.timeout_s}s (deny-on-timeout)"
70
+ except Exception as e:
71
+ return ApprovalDecision.DENIED, f"callback raised {type(e).__name__}: {e} (deny-on-exception)"
72
+ if approved:
73
+ return ApprovalDecision.APPROVED, "approved via callback"
74
+ return ApprovalDecision.DENIED, "denied via callback"
75
+ finally:
76
+ executor.shutdown(wait=False)
77
+
78
+
79
+ class HeadlessApprovalRunner(ApprovalRunner):
80
+ """Always returns PENDING: the engine serializes state and exits AWAITING_APPROVAL;
81
+ resume with Loop.resume(path, approval={'token': ..., 'approved': True})."""
82
+
83
+ def __init__(self, ttl_s: float = 3600.0):
84
+ self.ttl_s = ttl_s
85
+
86
+ def request(self, req: ApprovalRequest) -> tuple[ApprovalDecision, str]:
87
+ return ApprovalDecision.PENDING, f"awaiting approval, token={req.token}"
@@ -0,0 +1,3 @@
1
+ from .testfix import PytestFailureMetric, TestFixLoop
2
+
3
+ __all__ = ["TestFixLoop", "PytestFailureMetric"]
@@ -0,0 +1,117 @@
1
+ """TestFixLoop blueprint: fix failing tests until all pass.
2
+
3
+ Progress tracks test IDENTITY, not counts: value = originally_failing_fixed −
4
+ newly_broken, and newly-broken tests flag `regression` even when totals improve.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import re
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from ..core.engine import Loop
13
+ from ..core.state import MetricSnapshot, State
14
+ from ..llm import LLMClient
15
+ from ..progress import GoalMetric
16
+ from ..tools import Tool, run_command
17
+
18
+ _FAILED_RE = re.compile(r"^(?:FAILED|ERROR)\s+(\S+)", re.MULTILINE)
19
+ _EXIT_RE = re.compile(r"\[exit code: (-?\d+)\]")
20
+
21
+
22
+ def parse_failing(output: str) -> set[str]:
23
+ return {m.split(" - ")[0] for m in _FAILED_RE.findall(output)}
24
+
25
+
26
+ class PytestFailureMetric(GoalMetric):
27
+ def measure(self, observation: str, state: State) -> MetricSnapshot:
28
+ failing = parse_failing(observation)
29
+ exit_match = _EXIT_RE.search(observation)
30
+ exit_code = int(exit_match.group(1)) if exit_match else None
31
+
32
+ baseline: set[str] | None = None
33
+ prev: set[str] | None = None
34
+ for it in state.iterations:
35
+ if it.metric and "failing" in it.metric.detail:
36
+ if baseline is None:
37
+ baseline = set(it.metric.detail["baseline"] or it.metric.detail["failing"])
38
+ prev = set(it.metric.detail["failing"])
39
+ if baseline is None:
40
+ baseline = set(failing)
41
+
42
+ fixed = baseline - failing
43
+ newly_broken = failing - baseline
44
+ regressed_vs_prev = bool(failing - prev) if prev is not None else False
45
+ return MetricSnapshot(
46
+ value=float(len(fixed) - len(newly_broken)),
47
+ regression=bool(newly_broken) or regressed_vs_prev,
48
+ detail={
49
+ "failing": sorted(failing),
50
+ "baseline": sorted(baseline),
51
+ "fixed": len(fixed),
52
+ "newly_broken": len(newly_broken),
53
+ "exit_code": exit_code,
54
+ },
55
+ )
56
+
57
+ def is_success(self, snapshot: MetricSnapshot) -> bool:
58
+ return not snapshot.detail.get("failing") and snapshot.detail.get("exit_code") == 0
59
+
60
+
61
+ class TestFixLoop(Loop):
62
+ __test__ = False # not a pytest test class, despite the name
63
+
64
+ def __init__(
65
+ self,
66
+ llm: LLMClient,
67
+ repo: str = ".",
68
+ test_cmd: str = "python -m pytest -q -rf --tb=short",
69
+ test_timeout_s: float = 300.0,
70
+ goal: str | None = None,
71
+ **kwargs: Any,
72
+ ):
73
+ repo_path = Path(repo).resolve()
74
+
75
+ def _run_tests() -> str:
76
+ res = run_command(test_cmd, timeout_s=test_timeout_s, cwd=str(repo_path))
77
+ suffix = " [timed out]" if res.timed_out else ""
78
+ return f"{res.stdout}\n{res.stderr}\n[exit code: {res.code}]{suffix}"
79
+
80
+ def run_tests() -> str:
81
+ """Run the test suite and return its output."""
82
+ return _run_tests()
83
+
84
+ def read_file(path: str) -> str:
85
+ """Read a file from the repository."""
86
+ target = (repo_path / path).resolve()
87
+ if not target.is_relative_to(repo_path):
88
+ raise ValueError(f"path {path!r} escapes the repository")
89
+ return target.read_text()
90
+
91
+ def edit_file(path: str, content: str) -> str:
92
+ """Replace the full contents of a file in the repository."""
93
+ target = (repo_path / path).resolve()
94
+ if not target.is_relative_to(repo_path):
95
+ raise ValueError(f"path {path!r} escapes the repository")
96
+ target.parent.mkdir(parents=True, exist_ok=True)
97
+ target.write_text(content)
98
+ # drop stale bytecode: pyc validation uses whole-second mtime + size,
99
+ # so a same-size edit within the same second would be masked by the cache
100
+ cache_dir = target.parent / "__pycache__"
101
+ if target.suffix == ".py" and cache_dir.is_dir():
102
+ for pyc in cache_dir.glob(f"{target.stem}.*.pyc"):
103
+ pyc.unlink(missing_ok=True)
104
+ return f"wrote {len(content)} chars to {path}"
105
+
106
+ super().__init__(
107
+ goal=goal or f"Fix failing tests in {repo_path.name} until all pass",
108
+ tools=[
109
+ Tool(run_tests, timeout_s=test_timeout_s + 10),
110
+ Tool(read_file),
111
+ Tool(edit_file),
112
+ ],
113
+ llm=llm,
114
+ observe=lambda state: _run_tests(),
115
+ goal_metric=PytestFailureMetric(),
116
+ **kwargs,
117
+ )
@@ -0,0 +1,144 @@
1
+ """Context manager: pinned facts never summarized away,
2
+ failed-approaches registry always in context, version-stamped summaries
3
+ computed once and stored (deterministic resume), transparent token accounting.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import hashlib
8
+ from typing import Any
9
+
10
+ from ..core.state import (
11
+ ENGINE_VERSION,
12
+ ArtifactStamp,
13
+ ContextArtifact,
14
+ IterationRecord,
15
+ State,
16
+ )
17
+ from ..llm import LLMClient
18
+
19
+ SUMMARY_PROMPT = (
20
+ "Summarize this agent-loop iteration in 3 sentences or fewer. Preserve: what was "
21
+ "attempted, the outcome, and any error messages verbatim.\n\n{body}"
22
+ )
23
+ _PROMPT_HASH = hashlib.sha256(SUMMARY_PROMPT.encode()).hexdigest()[:16]
24
+
25
+
26
+ def _render_iteration(it: IterationRecord, result_cap: int = 2000) -> str:
27
+ lines = [f"### Iteration {it.index}"]
28
+ if it.observation:
29
+ lines.append(f"Observed:\n{it.observation[:result_cap]}")
30
+ if it.plan_text:
31
+ lines.append(f"Planned: {it.plan_text[:600]}")
32
+ if it.plan_invalid:
33
+ lines.append("(plan failed tool-argument validation)")
34
+ for a in it.actions:
35
+ lines.append(f"Action {a.tool}({a.args_excerpt[:300]}) -> [{a.status}] {a.result_excerpt[:result_cap]}")
36
+ if it.metric:
37
+ lines.append(f"Metric: {it.metric.value}" + (" (REGRESSION)" if it.metric.regression else ""))
38
+ return "\n".join(lines)
39
+
40
+
41
+ def _est_tokens(text: str) -> int:
42
+ return len(text) // 4 # documented heuristic; itemized, not hidden
43
+
44
+
45
+ class ContextManager:
46
+ def __init__(self, verbatim_window: int = 3, summarizer: LLMClient | None = None,
47
+ summary_max_tokens: int = 400):
48
+ self.verbatim_window = verbatim_window
49
+ self.summarizer = summarizer
50
+ self.summary_max_tokens = summary_max_tokens
51
+
52
+ @property
53
+ def stamp(self) -> ArtifactStamp:
54
+ model_id = self.summarizer.model_id if self.summarizer else "deterministic-truncate"
55
+ return ArtifactStamp(engine_version=ENGINE_VERSION, model_id=model_id, prompt_hash=_PROMPT_HASH)
56
+
57
+ def check_artifact_drift(self, state: State) -> list[str]:
58
+ """Returns mismatch descriptions for artifacts produced under a different config."""
59
+ current = self.stamp
60
+ problems = []
61
+ for a in state.artifacts:
62
+ if a.kind == "summary" and a.stamp != current:
63
+ problems.append(
64
+ f"summary for iteration {a.iteration}: produced by "
65
+ f"{a.stamp.engine_version}/{a.stamp.model_id}, current is "
66
+ f"{current.engine_version}/{current.model_id}"
67
+ )
68
+ return problems
69
+
70
+ def ensure_summaries(self, state: State) -> list[int]:
71
+ """Summarize iterations that just left the verbatim window. Computed once,
72
+ stored, reused on resume — never recomputed."""
73
+ done = {a.iteration for a in state.artifacts if a.kind == "summary"}
74
+ cutoff = len(state.iterations) - self.verbatim_window
75
+ created = []
76
+ for it in state.iterations[:cutoff] if cutoff > 0 else []:
77
+ if it.index in done:
78
+ continue
79
+ body = _render_iteration(it, result_cap=600)
80
+ if self.summarizer:
81
+ resp = self.summarizer.complete(
82
+ [{"role": "user", "content": SUMMARY_PROMPT.format(body=body)}],
83
+ [], self.summary_max_tokens,
84
+ )
85
+ content = resp.text
86
+ state.metrics.input_tokens += resp.input_tokens
87
+ state.metrics.output_tokens += resp.output_tokens
88
+ state.metrics.llm_calls += 1
89
+ else:
90
+ content = body[:800] # deterministic fallback
91
+ state.artifacts.append(
92
+ ContextArtifact(kind="summary", iteration=it.index, content=content, stamp=self.stamp)
93
+ )
94
+ created.append(it.index)
95
+ return created
96
+
97
+ def build(self, state: State, observation: str) -> list[dict[str, str]]:
98
+ system_parts = [
99
+ "You are an agent executing one step of a structured loop. Use the provided "
100
+ "tools to make progress toward the goal. Respond with tool calls.",
101
+ f"Goal: {state.goal}",
102
+ ]
103
+ if state.pinned_facts:
104
+ system_parts.append("Key facts (pinned):\n" + "\n".join(f"- {f}" for f in state.pinned_facts))
105
+ if state.failed_approaches:
106
+ system_parts.append(
107
+ "Approaches already tried that FAILED (do not repeat):\n"
108
+ + "\n".join(f"- {f}" for f in state.failed_approaches)
109
+ )
110
+
111
+ user_parts = []
112
+ summaries = [a for a in state.artifacts if a.kind == "summary"]
113
+ if summaries:
114
+ user_parts.append(
115
+ "## Earlier iterations (summarized)\n"
116
+ + "\n".join(f"- iter {a.iteration}: {a.content}" for a in summaries)
117
+ )
118
+ recent = state.iterations[-self.verbatim_window:]
119
+ if recent:
120
+ user_parts.append("## Recent iterations\n" + "\n\n".join(_render_iteration(it) for it in recent))
121
+ user_parts.append(f"## Current observation\n{observation}\n\nDecide the next action(s).")
122
+
123
+ return [
124
+ {"role": "system", "content": "\n\n".join(system_parts)},
125
+ {"role": "user", "content": "\n\n".join(user_parts)},
126
+ ]
127
+
128
+ def budget_report(self, state: State, observation: str = "") -> dict[str, Any]:
129
+ """Itemized token accounting per section."""
130
+ messages = self.build(state, observation)
131
+ system, user = messages[0]["content"], messages[1]["content"]
132
+ summaries = [a for a in state.artifacts if a.kind == "summary"]
133
+ return {
134
+ "pinned_system_tokens": _est_tokens(system),
135
+ "summary_tokens": _est_tokens("\n".join(a.content for a in summaries)),
136
+ "verbatim_tokens": _est_tokens(
137
+ "\n".join(_render_iteration(it) for it in state.iterations[-self.verbatim_window:])
138
+ ),
139
+ "observation_tokens": _est_tokens(observation),
140
+ "total_context_tokens": _est_tokens(system) + _est_tokens(user),
141
+ "spent_input_tokens": state.metrics.input_tokens,
142
+ "spent_output_tokens": state.metrics.output_tokens,
143
+ "note": "estimates use a len/4 heuristic; provider-reported usage is authoritative",
144
+ }
loop/core/__init__.py ADDED
File without changes