tightloop 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
loop/core/state.py ADDED
@@ -0,0 +1,143 @@
1
+ """Explicit, serializable loop state."""
2
+ from __future__ import annotations
3
+
4
+ import hashlib
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Any, Literal
8
+
9
+ from pydantic import BaseModel, Field
10
+
11
+ SCHEMA_VERSION = 1
12
+ ENGINE_VERSION = "0.1.0"
13
+ INLINE_CAP = 16 * 1024 # max inline chars per record; large payloads live in the trace
14
+
15
+
16
+ class StateError(Exception):
17
+ pass
18
+
19
+
20
+ class SchemaChangedError(StateError):
21
+ """Tool schemas changed since the state was saved (resume requires allow_schema_change)."""
22
+
23
+
24
+ class ArtifactDriftError(StateError):
25
+ """Stored context artifacts were produced by a different engine/summarizer version."""
26
+
27
+
28
+ def digest(text: str) -> str:
29
+ return hashlib.sha256(text.encode()).hexdigest()[:16]
30
+
31
+
32
+ def excerpt(text: str, cap: int = INLINE_CAP) -> str:
33
+ if len(text) <= cap:
34
+ return text
35
+ return text[:cap] + f"\n...[truncated {len(text) - cap} chars, digest={digest(text)}]"
36
+
37
+
38
+ class ArtifactStamp(BaseModel):
39
+ engine_version: str
40
+ model_id: str
41
+ prompt_hash: str
42
+
43
+
44
+ class ContextArtifact(BaseModel):
45
+ kind: Literal["summary", "fact"]
46
+ iteration: int | None = None
47
+ content: str
48
+ stamp: ArtifactStamp
49
+
50
+
51
+ class MetricSnapshot(BaseModel):
52
+ value: float
53
+ regression: bool = False
54
+ detail: dict[str, Any] = Field(default_factory=dict)
55
+
56
+
57
+ class ActionRecord(BaseModel):
58
+ tool: str
59
+ args_excerpt: str
60
+ status: Literal["ok", "error", "aborted"]
61
+ result_excerpt: str
62
+ duration_s: float
63
+ fingerprint: str
64
+
65
+
66
+ class IterationRecord(BaseModel):
67
+ index: int
68
+ observation: str
69
+ plan_text: str = ""
70
+ actions: list[ActionRecord] = Field(default_factory=list)
71
+ metric: MetricSnapshot | None = None
72
+ repetition: bool = False
73
+ plan_invalid: bool = False
74
+ input_tokens: int = 0
75
+ output_tokens: int = 0
76
+
77
+
78
+ class Metrics(BaseModel):
79
+ input_tokens: int = 0
80
+ output_tokens: int = 0
81
+ llm_calls: int = 0
82
+ elapsed_s: float = 0.0
83
+ cost_usd: float | None = None
84
+
85
+ @property
86
+ def total_tokens(self) -> int:
87
+ return self.input_tokens + self.output_tokens
88
+
89
+
90
+ class PendingApproval(BaseModel):
91
+ token: str
92
+ tool: str
93
+ args: dict[str, Any]
94
+ reason: str
95
+ action_hash: str
96
+ state_version: int
97
+ created_at: float
98
+ ttl_s: float
99
+ plan_text: str = ""
100
+ precondition_metric: MetricSnapshot | None = None
101
+
102
+
103
+ class State(BaseModel):
104
+ schema_version: int = SCHEMA_VERSION
105
+ goal: str
106
+ config: dict[str, Any] = Field(default_factory=dict)
107
+ tool_schema_hash: str = ""
108
+ iterations: list[IterationRecord] = Field(default_factory=list)
109
+ artifacts: list[ContextArtifact] = Field(default_factory=list)
110
+ pinned_facts: list[str] = Field(default_factory=list)
111
+ failed_approaches: list[str] = Field(default_factory=list)
112
+ metrics: Metrics = Field(default_factory=Metrics)
113
+ no_progress_streak: int = 0
114
+ plan_invalid_streak: int = 0
115
+ state_version: int = 0
116
+ pending_approval: PendingApproval | None = None
117
+
118
+ def save(self, path: str | Path) -> None:
119
+ payload = self.model_dump(mode="json")
120
+ body = json.dumps(payload, sort_keys=True)
121
+ wrapper = {
122
+ "integrity": hashlib.sha256(body.encode()).hexdigest(),
123
+ "state": payload,
124
+ }
125
+ Path(path).write_text(json.dumps(wrapper, indent=2))
126
+
127
+ @classmethod
128
+ def load(cls, path: str | Path) -> "State":
129
+ try:
130
+ wrapper = json.loads(Path(path).read_text())
131
+ except (OSError, json.JSONDecodeError) as e:
132
+ raise StateError(f"cannot read state file {path}: {e}") from e
133
+ payload = wrapper.get("state")
134
+ if payload is None:
135
+ raise StateError(f"{path} is not a Loop state file")
136
+ body = json.dumps(payload, sort_keys=True)
137
+ if hashlib.sha256(body.encode()).hexdigest() != wrapper.get("integrity"):
138
+ raise StateError(f"integrity check failed for {path}")
139
+ if payload.get("schema_version") != SCHEMA_VERSION:
140
+ raise StateError(
141
+ f"state schema_version {payload.get('schema_version')} != supported {SCHEMA_VERSION}"
142
+ )
143
+ return cls.model_validate(payload)
loop/exit/__init__.py ADDED
@@ -0,0 +1,60 @@
1
+ """Declarative exit conditions. Evaluated after each iteration;
2
+ first matching exit wins. The always-on ceilings (max_iterations, token_limit,
3
+ wall-clock) are engine constructor args — these are additional conditions."""
4
+ from __future__ import annotations
5
+
6
+ from typing import Callable
7
+
8
+ from ..core.result import LoopStatus
9
+ from ..core.state import State
10
+
11
+
12
+ class ExitCondition:
13
+ def evaluate(self, state: State) -> tuple[LoopStatus, str] | None:
14
+ raise NotImplementedError
15
+
16
+
17
+ class _Lambda(ExitCondition):
18
+ def __init__(self, fn: Callable[[State], tuple[LoopStatus, str] | None]):
19
+ self.fn = fn
20
+
21
+ def evaluate(self, state: State) -> tuple[LoopStatus, str] | None:
22
+ return self.fn(state)
23
+
24
+
25
+ class Exit:
26
+ @staticmethod
27
+ def success(predicate: Callable[[State], bool], reason: str = "goal achieved") -> ExitCondition:
28
+ return _Lambda(lambda s: (LoopStatus.SUCCESS, reason) if predicate(s) else None)
29
+
30
+ @staticmethod
31
+ def max_iterations(n: int = 20) -> ExitCondition:
32
+ return _Lambda(
33
+ lambda s: (LoopStatus.BUDGET_EXHAUSTED, f"max_iterations {n} reached")
34
+ if len(s.iterations) >= n
35
+ else None
36
+ )
37
+
38
+ @staticmethod
39
+ def token_limit(n: int) -> ExitCondition:
40
+ return _Lambda(
41
+ lambda s: (LoopStatus.BUDGET_EXHAUSTED, f"token_limit {n} reached")
42
+ if s.metrics.total_tokens >= n
43
+ else None
44
+ )
45
+
46
+ @staticmethod
47
+ def cost_limit(usd: float) -> ExitCondition:
48
+ return _Lambda(
49
+ lambda s: (LoopStatus.BUDGET_EXHAUSTED, f"cost_limit ${usd:.2f} reached")
50
+ if s.metrics.cost_usd is not None and s.metrics.cost_usd >= usd
51
+ else None
52
+ )
53
+
54
+ @staticmethod
55
+ def stagnation(n: int = 3) -> ExitCondition:
56
+ return _Lambda(
57
+ lambda s: (LoopStatus.NO_PROGRESS, f"stagnation: {s.no_progress_streak} flat iterations")
58
+ if s.no_progress_streak >= n
59
+ else None
60
+ )
loop/llm/__init__.py ADDED
@@ -0,0 +1,70 @@
1
+ """LLMClient protocol + canonical ToolCall normalization.
2
+
3
+ Provider responses are normalized into LLMResponse/ToolCallReq at this
4
+ boundary, so the engine handles hallucinated or malformed tool calls uniformly
5
+ regardless of provider leniency.
6
+
7
+ Timeout defaults are recommended values, not SLA-backed (see README).
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import time
12
+ from typing import Any, Callable
13
+
14
+ from pydantic import BaseModel, Field
15
+
16
+ DEFAULT_TIMEOUT_S = 120.0
17
+
18
+
19
+ class ToolCallReq(BaseModel):
20
+ id: str = ""
21
+ name: str
22
+ args: dict[str, Any] = Field(default_factory=dict)
23
+
24
+
25
+ class LLMResponse(BaseModel):
26
+ text: str = ""
27
+ tool_calls: list[ToolCallReq] = Field(default_factory=list)
28
+ input_tokens: int = 0
29
+ output_tokens: int = 0
30
+ model_id: str = ""
31
+
32
+
33
+ class LLMClient:
34
+ """Adapter protocol. Subclasses implement complete()."""
35
+
36
+ model_id: str = "unknown"
37
+ timeout_s: float = DEFAULT_TIMEOUT_S
38
+
39
+ def complete(
40
+ self,
41
+ messages: list[dict[str, str]],
42
+ tool_schemas: list[dict[str, Any]],
43
+ max_tokens: int,
44
+ ) -> LLMResponse:
45
+ raise NotImplementedError
46
+
47
+
48
+ class CallableLLM(LLMClient):
49
+ """Wraps any fn(messages, tool_schemas) -> LLMResponse. Useful for tests and raw APIs."""
50
+
51
+ def __init__(self, fn: Callable[[list[dict], list[dict]], LLMResponse], model_id: str = "callable"):
52
+ self.fn = fn
53
+ self.model_id = model_id
54
+
55
+ def complete(self, messages, tool_schemas, max_tokens):
56
+ return self.fn(messages, tool_schemas)
57
+
58
+
59
+ def complete_with_retry(
60
+ client: LLMClient,
61
+ messages: list[dict[str, str]],
62
+ tool_schemas: list[dict[str, Any]],
63
+ max_tokens: int,
64
+ ) -> LLMResponse:
65
+ """One retry with backoff on timeout/transient errors."""
66
+ try:
67
+ return client.complete(messages, tool_schemas, max_tokens)
68
+ except Exception:
69
+ time.sleep(1.0)
70
+ return client.complete(messages, tool_schemas, max_tokens)
loop/llm/anthropic.py ADDED
@@ -0,0 +1,45 @@
1
+ """Anthropic adapter. Requires the `anthropic` extra. Default timeout: 120s (not SLA-backed)."""
2
+ from __future__ import annotations
3
+
4
+ from typing import Any
5
+
6
+ from . import DEFAULT_TIMEOUT_S, LLMClient, LLMResponse, ToolCallReq
7
+
8
+
9
+ class AnthropicLLM(LLMClient):
10
+ def __init__(self, model: str = "claude-sonnet-4-6", timeout_s: float = DEFAULT_TIMEOUT_S, **client_kwargs: Any):
11
+ import anthropic # lazy: optional dependency
12
+
13
+ self.model_id = model
14
+ self.timeout_s = timeout_s
15
+ self._client = anthropic.Anthropic(timeout=timeout_s, **client_kwargs)
16
+
17
+ def complete(self, messages, tool_schemas, max_tokens):
18
+ system = "\n\n".join(m["content"] for m in messages if m["role"] == "system")
19
+ convo = [m for m in messages if m["role"] != "system"]
20
+ tools = [
21
+ {"name": s["name"], "description": s.get("description", ""), "input_schema": s["input_schema"]}
22
+ for s in tool_schemas
23
+ ]
24
+ resp = self._client.messages.create(
25
+ model=self.model_id,
26
+ system=system or None,
27
+ messages=convo,
28
+ tools=tools or None,
29
+ max_tokens=max_tokens,
30
+ )
31
+ text_parts: list[str] = []
32
+ calls: list[ToolCallReq] = []
33
+ for block in resp.content:
34
+ if block.type == "text":
35
+ text_parts.append(block.text)
36
+ elif block.type == "tool_use":
37
+ args = block.input if isinstance(block.input, dict) else {}
38
+ calls.append(ToolCallReq(id=block.id, name=block.name, args=args))
39
+ return LLMResponse(
40
+ text="\n".join(text_parts),
41
+ tool_calls=calls,
42
+ input_tokens=resp.usage.input_tokens,
43
+ output_tokens=resp.usage.output_tokens,
44
+ model_id=self.model_id,
45
+ )
loop/llm/openai.py ADDED
@@ -0,0 +1,55 @@
1
+ """OpenAI adapter. Requires the `openai` extra. Default timeout: 120s (not SLA-backed)."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ from typing import Any
6
+
7
+ from . import DEFAULT_TIMEOUT_S, LLMClient, LLMResponse, ToolCallReq
8
+
9
+
10
+ class OpenAILLM(LLMClient):
11
+ def __init__(self, model: str = "gpt-4o", timeout_s: float = DEFAULT_TIMEOUT_S, **client_kwargs: Any):
12
+ import openai # lazy: optional dependency
13
+
14
+ self.model_id = model
15
+ self.timeout_s = timeout_s
16
+ self._client = openai.OpenAI(timeout=timeout_s, **client_kwargs)
17
+
18
+ def complete(self, messages, tool_schemas, max_tokens):
19
+ tools = [
20
+ {
21
+ "type": "function",
22
+ "function": {
23
+ "name": s["name"],
24
+ "description": s.get("description", ""),
25
+ "parameters": s["input_schema"],
26
+ },
27
+ }
28
+ for s in tool_schemas
29
+ ]
30
+ resp = self._client.chat.completions.create(
31
+ model=self.model_id,
32
+ messages=messages,
33
+ tools=tools or None,
34
+ max_tokens=max_tokens,
35
+ )
36
+ choice = resp.choices[0].message
37
+ calls: list[ToolCallReq] = []
38
+ for tc in choice.tool_calls or []:
39
+ try:
40
+ args = json.loads(tc.function.arguments)
41
+ except json.JSONDecodeError:
42
+ # malformed args become an empty call; engine-side validation feeds
43
+ # a structured error back to the model
44
+ args = {"__malformed__": tc.function.arguments}
45
+ if not isinstance(args, dict):
46
+ args = {"__malformed__": tc.function.arguments}
47
+ calls.append(ToolCallReq(id=tc.id, name=tc.function.name, args=args))
48
+ usage = resp.usage
49
+ return LLMResponse(
50
+ text=choice.content or "",
51
+ tool_calls=calls,
52
+ input_tokens=usage.prompt_tokens if usage else 0,
53
+ output_tokens=usage.completion_tokens if usage else 0,
54
+ model_id=self.model_id,
55
+ )
@@ -0,0 +1,96 @@
1
+ """Composable policies.
2
+
3
+ Precedence note: hard ceilings (iterations/tokens/wall-clock) are enforced by
4
+ the engine itself before every action and before granting any approval —
5
+ policies layer on top of those guarantees, they don't replace them.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from enum import Enum
10
+ from typing import Any, Callable, Iterable
11
+
12
+ from pydantic import BaseModel
13
+
14
+ from ..core.result import LoopStatus
15
+ from ..core.state import State
16
+ from ..pricing import DEFAULT_PRICING, estimate_cost
17
+
18
+
19
+ class DecisionKind(str, Enum):
20
+ CONTINUE = "CONTINUE"
21
+ STOP = "STOP"
22
+ PAUSE = "PAUSE" # request human approval
23
+
24
+
25
+ class Decision(BaseModel):
26
+ kind: DecisionKind = DecisionKind.CONTINUE
27
+ reason: str = ""
28
+ status: LoopStatus | None = None # for STOP
29
+
30
+
31
+ CONTINUE = Decision()
32
+
33
+
34
+ class Policy:
35
+ def before_iteration(self, state: State) -> Decision:
36
+ return CONTINUE
37
+
38
+ def before_action(self, state: State, tool_name: str, args: dict[str, Any]) -> Decision:
39
+ return CONTINUE
40
+
41
+
42
+ class NoProgress(Policy):
43
+ """Fires after `window` consecutive flagged iterations with zero goal-metric delta
44
+ . Streak accounting is done by the ProgressEngine; this reads it."""
45
+
46
+ def __init__(self, window: int = 3):
47
+ self.window = window
48
+
49
+ def before_iteration(self, state: State) -> Decision:
50
+ if state.no_progress_streak >= self.window:
51
+ return Decision(
52
+ kind=DecisionKind.STOP,
53
+ status=LoopStatus.NO_PROGRESS,
54
+ reason=(
55
+ f"{state.no_progress_streak} consecutive iterations with repeated/invalid "
56
+ "actions and zero goal-metric delta"
57
+ ),
58
+ )
59
+ return CONTINUE
60
+
61
+
62
+ class CostLimit(Policy):
63
+ def __init__(self, usd: float, model_id: str, pricing: dict | None = None):
64
+ self.usd = usd
65
+ self.model_id = model_id
66
+ self.pricing = pricing or DEFAULT_PRICING
67
+
68
+ def before_iteration(self, state: State) -> Decision:
69
+ cost = estimate_cost(
70
+ state.metrics.input_tokens, state.metrics.output_tokens, self.model_id, self.pricing
71
+ )
72
+ if cost is not None and cost >= self.usd:
73
+ return Decision(
74
+ kind=DecisionKind.STOP,
75
+ status=LoopStatus.BUDGET_EXHAUSTED,
76
+ reason=f"estimated cost ${cost:.2f} >= limit ${self.usd:.2f} (tokens are authoritative)",
77
+ )
78
+ return CONTINUE
79
+
80
+
81
+ class RequireApproval(Policy):
82
+ """Pause for human approval when an action matches. Matcher: tool-name iterable
83
+ or callable(tool_name, args) -> bool."""
84
+
85
+ def __init__(self, matcher: Iterable[str] | Callable[[str, dict], bool], reason: str = "requires approval"):
86
+ if callable(matcher):
87
+ self._match = matcher
88
+ else:
89
+ names = set(matcher)
90
+ self._match = lambda name, args: name in names
91
+ self.reason = reason
92
+
93
+ def before_action(self, state: State, tool_name: str, args: dict[str, Any]) -> Decision:
94
+ if self._match(tool_name, args):
95
+ return Decision(kind=DecisionKind.PAUSE, reason=self.reason)
96
+ return CONTINUE
loop/pricing.py ADDED
@@ -0,0 +1,47 @@
1
+ """USD cost estimation. Tokens are the authoritative unit; USD is a convenience
2
+ derived from a pricing table that carries an as-of date.
3
+ """
4
+ from __future__ import annotations
5
+
6
+ import warnings
7
+ from datetime import date, datetime
8
+ from typing import Any
9
+
10
+ STALENESS_DAYS = 90
11
+
12
+ DEFAULT_PRICING: dict[str, Any] = {
13
+ "as_of": "2026-01-15",
14
+ "models": {
15
+ # USD per million tokens
16
+ "claude-sonnet-4-6": {"input": 3.00, "output": 15.00},
17
+ "claude-haiku-4-5-20251001": {"input": 1.00, "output": 5.00},
18
+ "claude-opus-4-8": {"input": 15.00, "output": 75.00},
19
+ "gpt-4o": {"input": 2.50, "output": 10.00},
20
+ },
21
+ }
22
+
23
+
24
+ class PricingStalenessError(Exception):
25
+ pass
26
+
27
+
28
+ def check_staleness(pricing: dict[str, Any], behavior: str = "warn", today: date | None = None) -> bool:
29
+ """Returns True if cost accounting should proceed in USD; False for token-only fallback."""
30
+ as_of = datetime.strptime(pricing["as_of"], "%Y-%m-%d").date()
31
+ age = ((today or date.today()) - as_of).days
32
+ if age <= STALENESS_DAYS:
33
+ return True
34
+ msg = f"pricing table as_of={pricing['as_of']} is {age} days old (> {STALENESS_DAYS})"
35
+ if behavior == "refuse":
36
+ raise PricingStalenessError(msg + "; refusing USD cost accounting")
37
+ if behavior == "token-only":
38
+ return False
39
+ warnings.warn(msg + "; USD figures are estimates — tokens remain authoritative", stacklevel=2)
40
+ return True
41
+
42
+
43
+ def estimate_cost(input_tokens: int, output_tokens: int, model_id: str, pricing: dict[str, Any]) -> float | None:
44
+ rates = pricing["models"].get(model_id)
45
+ if not rates:
46
+ return None
47
+ return (input_tokens * rates["input"] + output_tokens * rates["output"]) / 1_000_000
@@ -0,0 +1,72 @@
1
+ """Progress engine: raw signals, no fake-precision score.
2
+
3
+ Signals: blueprint goal metric (regression-aware), repetition detection via
4
+ per-tool fingerprints (advisory), LLM self-assessment (trace-only, never gates
5
+ exits in v1 — by design).
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from pydantic import BaseModel
10
+
11
+ from ..core.state import IterationRecord, MetricSnapshot, State
12
+
13
+
14
+ class GoalMetric:
15
+ """Blueprint-supplied. measure() returns a snapshot; is_success() gates SUCCESS."""
16
+
17
+ def measure(self, observation: str, state: State) -> MetricSnapshot:
18
+ raise NotImplementedError
19
+
20
+ def is_success(self, snapshot: MetricSnapshot) -> bool:
21
+ return False
22
+
23
+
24
+ class ProgressReport(BaseModel):
25
+ trend: str # improving | flat | regressing | unknown
26
+ repetition: bool
27
+ no_progress_streak: int
28
+ metric_delta: float | None = None
29
+
30
+
31
+ class ProgressEngine:
32
+ def evaluate(self, state: State, iteration: IterationRecord) -> ProgressReport:
33
+ prev = state.iterations[-1] if state.iterations else None
34
+
35
+ # repetition: identical fingerprint set to the previous iteration's (non-empty)
36
+ fps = {a.fingerprint for a in iteration.actions}
37
+ prev_fps = {a.fingerprint for a in prev.actions} if prev else set()
38
+ iteration.repetition = bool(fps) and fps == prev_fps
39
+
40
+ delta: float | None = None
41
+ if iteration.metric and prev and prev.metric:
42
+ delta = iteration.metric.value - prev.metric.value
43
+
44
+ if iteration.metric and iteration.metric.regression:
45
+ trend = "regressing"
46
+ elif delta is None:
47
+ trend = "unknown"
48
+ elif delta > 0:
49
+ trend = "improving"
50
+ elif delta < 0:
51
+ trend = "regressing"
52
+ else:
53
+ trend = "flat"
54
+
55
+ flat = delta is None or delta == 0
56
+ flagged = iteration.repetition or iteration.plan_invalid
57
+ if flagged and flat:
58
+ state.no_progress_streak += 1
59
+ else:
60
+ state.no_progress_streak = 0
61
+
62
+ if iteration.plan_invalid:
63
+ state.plan_invalid_streak += 1
64
+ else:
65
+ state.plan_invalid_streak = 0
66
+
67
+ return ProgressReport(
68
+ trend=trend,
69
+ repetition=iteration.repetition,
70
+ no_progress_streak=state.no_progress_streak,
71
+ metric_delta=delta,
72
+ )