verifyloop 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
verifyloop/memory.py ADDED
@@ -0,0 +1,197 @@
1
+ """Memory system: short-term (in-process) and long-term (persistent file) stores."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import time
7
+ from abc import ABC, abstractmethod
8
+ from datetime import datetime, timezone
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ import aiofiles
13
+
14
+
15
+ class MemoryStore(ABC):
16
+ @abstractmethod
17
+ async def store(self, key: str, value: Any, namespace: str = "default") -> None:
18
+ ...
19
+
20
+ @abstractmethod
21
+ async def retrieve(self, key: str, namespace: str = "default") -> Any | None:
22
+ ...
23
+
24
+ @abstractmethod
25
+ async def search(self, query: str, namespace: str = "default", limit: int = 10) -> list[dict[str, Any]]:
26
+ ...
27
+
28
+ @abstractmethod
29
+ async def delete(self, key: str, namespace: str = "default") -> bool:
30
+ ...
31
+
32
+ @abstractmethod
33
+ async def list_keys(self, namespace: str = "default") -> list[str]:
34
+ ...
35
+
36
+
37
+ class InMemoryStore(MemoryStore):
38
+ def __init__(self) -> None:
39
+ self._store: dict[str, dict[str, dict[str, Any]]] = {}
40
+
41
+ def _ns(self, namespace: str) -> dict[str, dict[str, Any]]:
42
+ if namespace not in self._store:
43
+ self._store[namespace] = {}
44
+ return self._store[namespace]
45
+
46
+ async def store(self, key: str, value: Any, namespace: str = "default") -> None:
47
+ ns = self._ns(namespace)
48
+ ns[key] = {
49
+ "value": value,
50
+ "stored_at": datetime.now(timezone.utc).isoformat(),
51
+ "access_count": ns.get(key, {}).get("access_count", 0),
52
+ }
53
+
54
+ async def retrieve(self, key: str, namespace: str = "default") -> Any | None:
55
+ ns = self._ns(namespace)
56
+ entry = ns.get(key)
57
+ if entry is None:
58
+ return None
59
+ entry["access_count"] = entry.get("access_count", 0) + 1
60
+ return entry["value"]
61
+
62
+ async def search(self, query: str, namespace: str = "default", limit: int = 10) -> list[dict[str, Any]]:
63
+ ns = self._ns(namespace)
64
+ query_lower = query.lower()
65
+ results = []
66
+ for key, entry in ns.items():
67
+ value_str = str(entry.get("value", "")).lower()
68
+ if query_lower in value_str or query_lower in key.lower():
69
+ results.append({"key": key, **entry})
70
+ results.sort(key=lambda r: r.get("access_count", 0), reverse=True)
71
+ return results[:limit]
72
+
73
+ async def delete(self, key: str, namespace: str = "default") -> bool:
74
+ ns = self._ns(namespace)
75
+ if key in ns:
76
+ del ns[key]
77
+ return True
78
+ return False
79
+
80
+ async def list_keys(self, namespace: str = "default") -> list[str]:
81
+ return list(self._ns(namespace).keys())
82
+
83
+
84
+ class FileStore(MemoryStore):
85
+ def __init__(self, base_dir: str = ".verifyloop_memory") -> None:
86
+ self.base_dir = Path(base_dir)
87
+ self._cache: dict[str, dict[str, dict[str, Any]]] = {}
88
+
89
+ def _ns_path(self, namespace: str) -> Path:
90
+ return self.base_dir / f"{namespace}.json"
91
+
92
+ async def _load_ns(self, namespace: str) -> dict[str, dict[str, Any]]:
93
+ if namespace in self._cache:
94
+ return self._cache[namespace]
95
+ path = self._ns_path(namespace)
96
+ if path.exists():
97
+ async with aiofiles.open(path, "r") as f:
98
+ data = json.loads(await f.read())
99
+ self._cache[namespace] = data
100
+ return data
101
+ self._cache[namespace] = {}
102
+ return {}
103
+
104
+ async def _save_ns(self, namespace: str, data: dict[str, dict[str, Any]]) -> None:
105
+ self._cache[namespace] = data
106
+ path = self._ns_path(namespace)
107
+ path.parent.mkdir(parents=True, exist_ok=True)
108
+ async with aiofiles.open(path, "w") as f:
109
+ await f.write(json.dumps(data, indent=2, default=str))
110
+
111
+ async def store(self, key: str, value: Any, namespace: str = "default") -> None:
112
+ data = await self._load_ns(namespace)
113
+ data[key] = {
114
+ "value": value,
115
+ "stored_at": datetime.now(timezone.utc).isoformat(),
116
+ "access_count": data.get(key, {}).get("access_count", 0),
117
+ }
118
+ await self._save_ns(namespace, data)
119
+
120
+ async def retrieve(self, key: str, namespace: str = "default") -> Any | None:
121
+ data = await self._load_ns(namespace)
122
+ entry = data.get(key)
123
+ if entry is None:
124
+ return None
125
+ entry["access_count"] = entry.get("access_count", 0) + 1
126
+ await self._save_ns(namespace, data)
127
+ return entry["value"]
128
+
129
+ async def search(self, query: str, namespace: str = "default", limit: int = 10) -> list[dict[str, Any]]:
130
+ data = await self._load_ns(namespace)
131
+ query_lower = query.lower()
132
+ results = []
133
+ for key, entry in data.items():
134
+ value_str = str(entry.get("value", "")).lower()
135
+ if query_lower in value_str or query_lower in key.lower():
136
+ results.append({"key": key, **entry})
137
+ results.sort(key=lambda r: r.get("access_count", 0), reverse=True)
138
+ return results[:limit]
139
+
140
+ async def delete(self, key: str, namespace: str = "default") -> bool:
141
+ data = await self._load_ns(namespace)
142
+ if key in data:
143
+ del data[key]
144
+ await self._save_ns(namespace, data)
145
+ return True
146
+ return False
147
+
148
+ async def list_keys(self, namespace: str = "default") -> list[str]:
149
+ data = await self._load_ns(namespace)
150
+ return list(data.keys())
151
+
152
+
153
+ class ConversationContext:
154
+ def __init__(self, memory: MemoryStore | None = None) -> None:
155
+ self.memory = memory or InMemoryStore()
156
+ self._messages: list[dict[str, str]] = []
157
+ self._file_context: dict[str, str] = {}
158
+
159
+ def add_message(self, role: str, content: str) -> None:
160
+ self._messages.append({"role": role, "content": content})
161
+
162
+ def get_messages(self) -> list[dict[str, str]]:
163
+ return list(self._messages)
164
+
165
+ def add_file_context(self, file_path: str, content: str) -> None:
166
+ self._file_context[file_path] = content
167
+ if self.memory:
168
+ import asyncio as _asyncio
169
+ try:
170
+ loop = _asyncio.get_event_loop()
171
+ if loop.is_running():
172
+ _asyncio.ensure_future(
173
+ self.memory.store(f"file:{file_path}", content, namespace="files")
174
+ )
175
+ else:
176
+ loop.run_until_complete(
177
+ self.memory.store(f"file:{file_path}", content, namespace="files")
178
+ )
179
+ except RuntimeError:
180
+ pass
181
+
182
+ def get_file_context(self, file_path: str) -> str | None:
183
+ return self._file_context.get(file_path)
184
+
185
+ def get_all_file_paths(self) -> list[str]:
186
+ return list(self._file_context.keys())
187
+
188
+ def build_context_string(self, max_files: int = 5) -> str:
189
+ parts = []
190
+ if self._messages:
191
+ last_msg = self._messages[-1] if self._messages else {}
192
+ parts.append(f"Last message: {last_msg.get('content', '')[:500]}")
193
+ if self._file_context:
194
+ for path, content in list(self._file_context.items())[:max_files]:
195
+ preview = content[:300] + "..." if len(content) > 300 else content
196
+ parts.append(f"File {path}:\n{preview}")
197
+ return "\n\n".join(parts)
verifyloop/models.py ADDED
@@ -0,0 +1,146 @@
1
+ """Core data models for the VerifyLoop framework."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import time
6
+ from datetime import datetime, timezone
7
+ from enum import Enum
8
+ from typing import Any, Literal
9
+
10
+ from pydantic import BaseModel, Field
11
+
12
+
13
+ class StepType(str, Enum):
14
+ PLAN = "plan"
15
+ EXECUTE = "execute"
16
+ VERIFY = "verify"
17
+ RECOVER = "recover"
18
+
19
+
20
+ class Step(BaseModel):
21
+ step_type: StepType
22
+ content: str
23
+ tool_calls: list[dict[str, Any]] = Field(default_factory=list)
24
+ confidence: float = 0.0
25
+ timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
26
+
27
+
28
+ class Substep(BaseModel):
29
+ description: str
30
+ tool: str
31
+ arguments: dict[str, Any] = Field(default_factory=dict)
32
+ order: int = 0
33
+
34
+
35
+ class PlanStep(BaseModel):
36
+ description: str
37
+ substeps: list[str] = Field(default_factory=list)
38
+ estimated_tools: list[str] = Field(default_factory=list)
39
+ substep_details: list[Substep] = Field(default_factory=list)
40
+ complexity: Literal["low", "medium", "high"] = "medium"
41
+ context_tokens: int = 0
42
+ estimated_duration_seconds: float = 0.0
43
+
44
+
45
+ class ExecuteStep(BaseModel):
46
+ tool: str
47
+ arguments: dict[str, Any] = Field(default_factory=dict)
48
+ result: str = ""
49
+ success: bool = False
50
+ duration_seconds: float = 0.0
51
+ exit_code: int | None = None
52
+ error: str | None = None
53
+ artifacts: dict[str, str] = Field(default_factory=dict)
54
+
55
+ @property
56
+ def failed(self) -> bool:
57
+ return not self.success
58
+
59
+
60
+ class VerifyCheckResult(BaseModel):
61
+ check: str
62
+ passed: bool
63
+ detail: str = ""
64
+
65
+
66
+ class VerifyStep(BaseModel):
67
+ checks: list[str] = Field(default_factory=list)
68
+ check_results: list[VerifyCheckResult] = Field(default_factory=list)
69
+ passed: bool = False
70
+ confidence: float = 0.0
71
+ failures: list[str] = Field(default_factory=list)
72
+ fix_suggestions: list[str] = Field(default_factory=list)
73
+ verification_model: str = "reason-critic-7b"
74
+ used_trained_model: bool = False
75
+
76
+
77
+ class RecoverStep(BaseModel):
78
+ original_error: str
79
+ recovery_attempt: str = ""
80
+ recovery_type: Literal["edit", "create", "retry", "simplify", "analyze"] = "edit"
81
+ success: bool = False
82
+ attempt_number: int = 1
83
+ max_attempts: int = 3
84
+ patched_arguments: dict[str, Any] = Field(default_factory=dict)
85
+
86
+ @property
87
+ def exhausted(self) -> bool:
88
+ return self.attempt_number >= self.max_attempts and not self.success
89
+
90
+
91
+ class RunStatus(str, Enum):
92
+ PENDING = "pending"
93
+ PLANNING = "planning"
94
+ EXECUTING = "executing"
95
+ VERIFYING = "verifying"
96
+ RECOVERING = "recovering"
97
+ COMPLETED = "completed"
98
+ FAILED = "failed"
99
+
100
+
101
+ class TokenUsage(BaseModel):
102
+ prompt_tokens: int = 0
103
+ completion_tokens: int = 0
104
+ total_tokens: int = 0
105
+
106
+ def merge(self, other: TokenUsage) -> TokenUsage:
107
+ return TokenUsage(
108
+ prompt_tokens=self.prompt_tokens + other.prompt_tokens,
109
+ completion_tokens=self.completion_tokens + other.completion_tokens,
110
+ total_tokens=self.total_tokens + other.total_tokens,
111
+ )
112
+
113
+
114
+ class AgentRun(BaseModel):
115
+ task: str
116
+ steps: list[Step] = Field(default_factory=list)
117
+ status: RunStatus = RunStatus.PENDING
118
+ token_usage: TokenUsage = Field(default_factory=TokenUsage)
119
+ duration_seconds: float = 0.0
120
+ iteration: int = 0
121
+ max_iterations: int = 5
122
+ created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
123
+ completed_at: datetime | None = None
124
+ metadata: dict[str, Any] = Field(default_factory=dict)
125
+
126
+ def add_step(self, step: Step) -> None:
127
+ self.steps.append(step)
128
+
129
+ def elapsed(self) -> float:
130
+ if self.completed_at:
131
+ return (self.completed_at - self.created_at).total_seconds()
132
+ return (datetime.now(timezone.utc) - self.created_at).total_seconds()
133
+
134
+
135
+ class PipelineConfig(BaseModel):
136
+ model: str = "gpt-4o"
137
+ verify_model: str = "reason-critic-7b"
138
+ max_iterations: int = 5
139
+ confidence_threshold: float = 0.8
140
+ max_recovery_attempts: int = 3
141
+ working_dir: str = "."
142
+ dry_run: bool = False
143
+ interactive: bool = False
144
+ sandbox: bool = False
145
+ sandbox_image: str = "python:3.11-slim"
146
+ callbacks: dict[str, Any] = Field(default_factory=dict)
verifyloop/pipeline.py ADDED
@@ -0,0 +1,246 @@
1
+ """Full Pipeline: Plan → Execute → Verify → Recover loop."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import time
6
+ from datetime import datetime, timezone
7
+ from typing import Any, Callable, Coroutine
8
+
9
+ from verifyloop.executor import Executor
10
+ from verifyloop.memory import ConversationContext, InMemoryStore, MemoryStore
11
+ from verifyloop.models import (
12
+ AgentRun,
13
+ ExecuteStep,
14
+ PipelineConfig,
15
+ PlanStep,
16
+ RecoverStep,
17
+ RunStatus,
18
+ Step,
19
+ StepType,
20
+ TokenUsage,
21
+ VerifyStep,
22
+ )
23
+ from verifyloop.planner import PlanGenerator
24
+ from verifyloop.recoverer import Recoverer
25
+ from verifyloop.verifier import Verifier, VerifierConfig
26
+
27
+
28
+ CallbackFn = Callable[[str, dict[str, Any]], Coroutine[Any, Any, None]] | None
29
+
30
+
31
+ class AgentPipeline:
32
+ def __init__(self, config: PipelineConfig | None = None) -> None:
33
+ self.config = config or PipelineConfig()
34
+ self._planner = PlanGenerator(
35
+ model=self.config.model,
36
+ )
37
+ self._executor = Executor(
38
+ working_dir=self.config.working_dir,
39
+ sandbox=self.config.sandbox,
40
+ sandbox_image=self.config.sandbox_image,
41
+ )
42
+ self._verifier = Verifier(
43
+ VerifierConfig(
44
+ verify_model=self.config.verify_model,
45
+ confidence_threshold=self.config.confidence_threshold,
46
+ )
47
+ )
48
+ self._recoverer = Recoverer(
49
+ model=self.config.model,
50
+ max_recovery_attempts=self.config.max_recovery_attempts,
51
+ )
52
+ self._memory: MemoryStore = InMemoryStore()
53
+ self._context = ConversationContext(self._memory)
54
+ self._callbacks: list[CallbackFn] = []
55
+
56
+ @property
57
+ def token_usage(self) -> TokenUsage:
58
+ return (
59
+ self._planner.token_usage
60
+ .merge(self._verifier.token_usage)
61
+ .merge(self._recoverer.token_usage)
62
+ )
63
+
64
+ def on_event(self, callback: CallbackFn) -> None:
65
+ self._callbacks.append(callback)
66
+
67
+ async def _emit(self, event: str, data: dict[str, Any]) -> None:
68
+ for cb in self._callbacks:
69
+ if cb is not None:
70
+ try:
71
+ await cb(event, data)
72
+ except Exception:
73
+ pass
74
+
75
+ async def run(
76
+ self,
77
+ task: str,
78
+ context: str = "",
79
+ max_iterations: int | None = None,
80
+ ) -> AgentRun:
81
+ max_iters = max_iterations or self.config.max_iterations
82
+ run = AgentRun(
83
+ task=task,
84
+ max_iterations=max_iters,
85
+ status=RunStatus.PENDING,
86
+ )
87
+ start_time = time.monotonic()
88
+
89
+ try:
90
+ await self._emit("run_start", {"task": task})
91
+
92
+ for iteration in range(1, max_iters + 1):
93
+ run.iteration = iteration
94
+ await self._emit("iteration_start", {"iteration": iteration})
95
+
96
+ # Phase 1: Plan
97
+ run.status = RunStatus.PLANNING
98
+ await self._emit("phase_start", {"phase": "plan", "iteration": iteration})
99
+
100
+ plan = await self._planner.generate_plan(task, context or self._context.build_context_string())
101
+ run.add_step(Step(
102
+ step_type=StepType.PLAN,
103
+ content=plan.description,
104
+ confidence=0.7,
105
+ ))
106
+ await self._emit("phase_complete", {
107
+ "phase": "plan",
108
+ "description": plan.description,
109
+ "substeps": plan.substeps,
110
+ })
111
+
112
+ if self.config.dry_run:
113
+ run.status = RunStatus.COMPLETED
114
+ run.duration_seconds = time.monotonic() - start_time
115
+ return run
116
+
117
+ # Phase 2: Execute
118
+ run.status = RunStatus.EXECUTING
119
+ await self._emit("phase_start", {"phase": "execute", "iteration": iteration})
120
+
121
+ execute_steps: list[ExecuteStep] = []
122
+ for substep in plan.substep_details:
123
+ if self.config.interactive:
124
+ proceed = await self._confirm_substep(substep)
125
+ if not proceed:
126
+ continue
127
+
128
+ step_result = await self._executor.execute(substep.tool, substep.arguments)
129
+ execute_steps.append(step_result)
130
+ run.add_step(Step(
131
+ step_type=StepType.EXECUTE,
132
+ content=f"{substep.tool}: {substep.description}",
133
+ tool_calls=[{"tool": substep.tool, "args": substep.arguments}],
134
+ confidence=1.0 if step_result.success else 0.0,
135
+ ))
136
+ await self._emit("step_complete", {
137
+ "tool": substep.tool,
138
+ "success": step_result.success,
139
+ "iteration": iteration,
140
+ })
141
+
142
+ if substep.tool == "read" and step_result.success:
143
+ self._context.add_file_context(
144
+ substep.arguments.get("file_path", ""), step_result.result
145
+ )
146
+
147
+ # Phase 3: Verify
148
+ run.status = RunStatus.VERIFYING
149
+ await self._emit("phase_start", {"phase": "verify", "iteration": iteration})
150
+
151
+ verification = await self._verifier.verify_code_edits(plan, execute_steps)
152
+ run.add_step(Step(
153
+ step_type=StepType.VERIFY,
154
+ content=f"Passed: {verification.passed}, Confidence: {verification.confidence:.2f}",
155
+ confidence=verification.confidence,
156
+ ))
157
+ await self._emit("phase_complete", {
158
+ "phase": "verify",
159
+ "passed": verification.passed,
160
+ "confidence": verification.confidence,
161
+ "failures": verification.failures,
162
+ })
163
+
164
+ if verification.passed and verification.confidence >= self.config.confidence_threshold:
165
+ run.status = RunStatus.COMPLETED
166
+ run.duration_seconds = time.monotonic() - start_time
167
+ run.completed_at = datetime.now(timezone.utc)
168
+ run.token_usage = self.token_usage
169
+ await self._emit("run_complete", {"status": "completed", "iterations": iteration})
170
+ return run
171
+
172
+ # Phase 4: Recover (if verification failed)
173
+ run.status = RunStatus.RECOVERING
174
+ await self._emit("phase_start", {"phase": "recover", "iteration": iteration})
175
+
176
+ failure_messages = verification.failures or ["Verification failed"]
177
+ all_errors = "; ".join(failure_messages)
178
+
179
+ for recovery_attempt in range(1, self.config.max_recovery_attempts + 1):
180
+ recovery = await self._recoverer.recover(
181
+ error=all_errors,
182
+ context=self._context.build_context_string(),
183
+ attempt=recovery_attempt,
184
+ failed_step=execute_steps[-1] if execute_steps else None,
185
+ )
186
+ run.add_step(Step(
187
+ step_type=StepType.RECOVER,
188
+ content=f"Recovery attempt {recovery_attempt}: {recovery.recovery_attempt}",
189
+ confidence=0.5,
190
+ ))
191
+ await self._emit("recovery_attempt", {
192
+ "attempt": recovery_attempt,
193
+ "type": recovery.recovery_type,
194
+ "description": recovery.recovery_attempt,
195
+ })
196
+
197
+ if recovery.patched_arguments:
198
+ tool = recovery.patched_arguments.get("tool", "bash")
199
+ args = recovery.patched_arguments.get("arguments", {})
200
+ recovery_exec = await self._executor.execute(tool, args)
201
+ execute_steps.append(recovery_exec)
202
+
203
+ # Re-verify after recovery
204
+ recheck = await self._verifier.verify_code_edits(plan, execute_steps)
205
+ if recheck.passed and recheck.confidence >= self.config.confidence_threshold:
206
+ run.status = RunStatus.COMPLETED
207
+ run.duration_seconds = time.monotonic() - start_time
208
+ run.completed_at = datetime.now(timezone.utc)
209
+ run.token_usage = self.token_usage
210
+ await self._emit("run_complete", {"status": "completed_after_recovery"})
211
+ return run
212
+
213
+ if recovery.exhausted:
214
+ break
215
+
216
+ # If we get here, recovery didn't fix it — loop back for next iteration
217
+ context = self._context.build_context_string() + f"\nPrevious failures: {all_errors}"
218
+
219
+ run.status = RunStatus.FAILED
220
+ run.duration_seconds = time.monotonic() - start_time
221
+ run.completed_at = datetime.now(timezone.utc)
222
+ run.token_usage = self.token_usage
223
+ await self._emit("run_complete", {"status": "failed", "iterations": max_iters})
224
+ return run
225
+
226
+ except Exception as exc:
227
+ run.status = RunStatus.FAILED
228
+ run.duration_seconds = time.monotonic() - start_time
229
+ run.completed_at = datetime.now(timezone.utc)
230
+ run.metadata["error"] = str(exc)
231
+ run.token_usage = self.token_usage
232
+ await self._emit("run_error", {"error": str(exc)})
233
+ return run
234
+
235
+ async def _confirm_substep(self, substep: Any) -> bool:
236
+ try:
237
+ from rich.console import Console
238
+ from rich.prompt import Confirm
239
+
240
+ console = Console()
241
+ console.print(f"\n[bold blue]Step:[/] {substep.description}")
242
+ console.print(f" [dim]Tool: {substep.tool}[/dim]")
243
+ console.print(f" [dim]Args: {substep.arguments}[/dim]")
244
+ return Confirm.ask("Execute this step?", default=True)
245
+ except ImportError:
246
+ return True