verifyloop 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- verifyloop/__init__.py +41 -0
- verifyloop/cli.py +186 -0
- verifyloop/executor.py +330 -0
- verifyloop/memory.py +197 -0
- verifyloop/models.py +146 -0
- verifyloop/pipeline.py +246 -0
- verifyloop/planner.py +190 -0
- verifyloop/recoverer.py +204 -0
- verifyloop/verifier.py +390 -0
- verifyloop-0.1.0.dist-info/METADATA +383 -0
- verifyloop-0.1.0.dist-info/RECORD +14 -0
- verifyloop-0.1.0.dist-info/WHEEL +4 -0
- verifyloop-0.1.0.dist-info/entry_points.txt +2 -0
- verifyloop-0.1.0.dist-info/licenses/LICENSE +21 -0
verifyloop/planner.py
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
"""Plan phase: decompose tasks into executable substeps."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import litellm
|
|
10
|
+
|
|
11
|
+
from verifyloop.models import PlanStep, Substep, TokenUsage
|
|
12
|
+
|
|
13
|
+
PLAN_SYSTEM_PROMPT = """You are a planning agent. Given a task and optional context, produce a JSON plan.
|
|
14
|
+
|
|
15
|
+
Your plan must be a JSON object with these fields:
|
|
16
|
+
{
|
|
17
|
+
"description": "One-line summary of the overall task",
|
|
18
|
+
"substeps": ["Step 1 description", "Step 2 description", ...],
|
|
19
|
+
"estimated_tools": ["bash", "edit", "read", "write", "web_search", "web_fetch"],
|
|
20
|
+
"complexity": "low" | "medium" | "high",
|
|
21
|
+
"substep_details": [
|
|
22
|
+
{
|
|
23
|
+
"description": "What this step does",
|
|
24
|
+
"tool": "Tool name to use",
|
|
25
|
+
"arguments": {"arg_name": "value"},
|
|
26
|
+
"order": 0
|
|
27
|
+
}
|
|
28
|
+
]
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
Rules:
|
|
32
|
+
- Each substep should be atomic and independently verifiable
|
|
33
|
+
- Prefer small, targeted edits over large rewrites
|
|
34
|
+
- Estimate which tools will be needed
|
|
35
|
+
- Be specific about file paths, commands, and expected outcomes
|
|
36
|
+
- Order substeps so earlier steps produce artifacts later steps need
|
|
37
|
+
|
|
38
|
+
Respond ONLY with valid JSON, no markdown fences."""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class PlanGenerator:
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
model: str = "gpt-4o",
|
|
45
|
+
api_key: str | None = None,
|
|
46
|
+
api_base: str | None = None,
|
|
47
|
+
temperature: float = 0.1,
|
|
48
|
+
) -> None:
|
|
49
|
+
self.model = model
|
|
50
|
+
self.api_key = api_key
|
|
51
|
+
self.api_base = api_base
|
|
52
|
+
self.temperature = temperature
|
|
53
|
+
self._token_usage = TokenUsage()
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def token_usage(self) -> TokenUsage:
|
|
57
|
+
return self._token_usage
|
|
58
|
+
|
|
59
|
+
async def generate_plan(
|
|
60
|
+
self,
|
|
61
|
+
task: str,
|
|
62
|
+
context: str = "",
|
|
63
|
+
) -> PlanStep:
|
|
64
|
+
messages = []
|
|
65
|
+
messages.append({"role": "system", "content": PLAN_SYSTEM_PROMPT})
|
|
66
|
+
|
|
67
|
+
user_content = f"Task: {task}"
|
|
68
|
+
if context:
|
|
69
|
+
user_content += f"\n\nContext:\n{context}"
|
|
70
|
+
messages.append({"role": "user", "content": user_content})
|
|
71
|
+
|
|
72
|
+
kwargs: dict[str, Any] = {
|
|
73
|
+
"model": self.model,
|
|
74
|
+
"messages": messages,
|
|
75
|
+
"temperature": self.temperature,
|
|
76
|
+
}
|
|
77
|
+
if self.api_key:
|
|
78
|
+
kwargs["api_key"] = self.api_key
|
|
79
|
+
if self.api_base:
|
|
80
|
+
kwargs["api_base"] = self.api_base
|
|
81
|
+
|
|
82
|
+
response = await litellm.acompletion(**kwargs)
|
|
83
|
+
|
|
84
|
+
self._token_usage = self._token_usage.merge(
|
|
85
|
+
TokenUsage(
|
|
86
|
+
prompt_tokens=response.usage.prompt_tokens if response.usage else 0,
|
|
87
|
+
completion_tokens=response.usage.completion_tokens if response.usage else 0,
|
|
88
|
+
total_tokens=response.usage.total_tokens if response.usage else 0,
|
|
89
|
+
)
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
content = response.choices[0].message.content or "{}"
|
|
93
|
+
content = re.sub(r"^```(?:json)?\s*", "", content.strip())
|
|
94
|
+
content = re.sub(r"\s*```$", "", content.strip())
|
|
95
|
+
|
|
96
|
+
try:
|
|
97
|
+
plan_data = json.loads(content)
|
|
98
|
+
except json.JSONDecodeError:
|
|
99
|
+
return self._fallback_plan(task)
|
|
100
|
+
|
|
101
|
+
details = []
|
|
102
|
+
for sd in plan_data.get("substep_details", []):
|
|
103
|
+
details.append(
|
|
104
|
+
Substep(
|
|
105
|
+
description=sd.get("description", ""),
|
|
106
|
+
tool=sd.get("tool", "bash"),
|
|
107
|
+
arguments=sd.get("arguments", {}),
|
|
108
|
+
order=sd.get("order", len(details)),
|
|
109
|
+
)
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
return PlanStep(
|
|
113
|
+
description=plan_data.get("description", task),
|
|
114
|
+
substeps=plan_data.get("substeps", []),
|
|
115
|
+
estimated_tools=plan_data.get("estimated_tools", ["bash"]),
|
|
116
|
+
substep_details=details,
|
|
117
|
+
complexity=plan_data.get("complexity", "medium"),
|
|
118
|
+
context_tokens=self._token_usage.prompt_tokens,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
def decompose_task(self, task: str) -> list[Substep]:
|
|
122
|
+
patterns = [
|
|
123
|
+
(r"add\s+(\w+)\s+to\s+(\w+\.\w+)", "edit", lambda m: {
|
|
124
|
+
"file_path": m.group(2),
|
|
125
|
+
"description": f"Add {m.group(1)} to {m.group(2)}",
|
|
126
|
+
}),
|
|
127
|
+
(r"fix\s+(?:the\s+)?(\w+)\s+in\s+(\w+\.\w+)", "edit", lambda m: {
|
|
128
|
+
"file_path": m.group(2),
|
|
129
|
+
"description": f"Fix {m.group(1)} in {m.group(2)}",
|
|
130
|
+
}),
|
|
131
|
+
(r"(?:create|write|make)\s+(?:a\s+)?(\w+\.\w+)", "write", lambda m: {
|
|
132
|
+
"file_path": m.group(1),
|
|
133
|
+
"description": f"Create {m.group(1)}",
|
|
134
|
+
}),
|
|
135
|
+
(r"(?:read|show|cat|view)\s+(\w+\.\w+)", "read", lambda m: {
|
|
136
|
+
"file_path": m.group(1),
|
|
137
|
+
"description": f"Read {m.group(1)}",
|
|
138
|
+
}),
|
|
139
|
+
(r"run\s+(.+)", "bash", lambda m: {
|
|
140
|
+
"command": m.group(1),
|
|
141
|
+
"description": f"Run: {m.group(1)}",
|
|
142
|
+
}),
|
|
143
|
+
(r"(?:search|look up|find)\s+(?:for\s+)?(.+?)(?:\s+on(?:line| the web))?\.?$", "web_search", lambda m: {
|
|
144
|
+
"query": m.group(1),
|
|
145
|
+
"description": f"Search for: {m.group(1)}",
|
|
146
|
+
}),
|
|
147
|
+
]
|
|
148
|
+
|
|
149
|
+
for pattern, tool, arg_fn in patterns:
|
|
150
|
+
match = re.match(pattern, task, re.IGNORECASE)
|
|
151
|
+
if match:
|
|
152
|
+
args = arg_fn(match)
|
|
153
|
+
return [
|
|
154
|
+
Substep(
|
|
155
|
+
description=args.pop("description", task),
|
|
156
|
+
tool=tool,
|
|
157
|
+
arguments=args,
|
|
158
|
+
order=0,
|
|
159
|
+
)
|
|
160
|
+
]
|
|
161
|
+
|
|
162
|
+
return [Substep(description=task, tool="bash", arguments={"command": task}, order=0)]
|
|
163
|
+
|
|
164
|
+
def estimate_tools(self, task: str) -> list[str]:
|
|
165
|
+
tools: set[str] = set()
|
|
166
|
+
|
|
167
|
+
if re.search(r"(?:run|execute|install|pip|npm|cargo|make|build)", task, re.I):
|
|
168
|
+
tools.add("bash")
|
|
169
|
+
if re.search(r"(?:add|fix|edit|modify|update|change|refactor)", task, re.I):
|
|
170
|
+
tools.add("edit")
|
|
171
|
+
if re.search(r"(?:create|write|new|make)\s+(?:a\s+)?(?:file|module|class)", task, re.I):
|
|
172
|
+
tools.add("write")
|
|
173
|
+
if re.search(r"(?:read|show|view|check|inspect|display)", task, re.I):
|
|
174
|
+
tools.add("read")
|
|
175
|
+
if re.search(r"(?:search|look up|find|google)", task, re.I):
|
|
176
|
+
tools.add("web_search")
|
|
177
|
+
if re.search(r"(?:fetch|download|curl|url|http)", task, re.I):
|
|
178
|
+
tools.add("web_fetch")
|
|
179
|
+
|
|
180
|
+
return list(tools) or ["bash"]
|
|
181
|
+
|
|
182
|
+
def _fallback_plan(self, task: str) -> PlanStep:
|
|
183
|
+
substeps = self.decompose_task(task)
|
|
184
|
+
return PlanStep(
|
|
185
|
+
description=task,
|
|
186
|
+
substeps=[s.description for s in substeps],
|
|
187
|
+
estimated_tools=[s.tool for s in substeps],
|
|
188
|
+
substep_details=substeps,
|
|
189
|
+
complexity="low" if len(substeps) <= 2 else "medium",
|
|
190
|
+
)
|
verifyloop/recoverer.py
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
"""Recover phase: analyze errors and generate targeted recovery strategies."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import litellm
|
|
10
|
+
|
|
11
|
+
from verifyloop.models import ExecuteStep, RecoverStep, TokenUsage
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class RecoveryType(str, Enum):
|
|
15
|
+
EDIT = "edit"
|
|
16
|
+
CREATE = "create"
|
|
17
|
+
RETRY = "retry"
|
|
18
|
+
SIMPLIFY = "simplify"
|
|
19
|
+
ANALYZE = "analyze"
|
|
20
|
+
BASH = "bash"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
RECOVERY_PATTERNS: list[tuple[re.Pattern[str], RecoveryType, str]] = [
|
|
24
|
+
(re.compile(r"SyntaxError"), RecoveryType.EDIT, "Fix syntax error in the file"),
|
|
25
|
+
(re.compile(r"IndentationError"), RecoveryType.EDIT, "Fix indentation in the file"),
|
|
26
|
+
(re.compile(r"FileNotFoundError|No such file"), RecoveryType.CREATE, "Create the missing file or find correct path"),
|
|
27
|
+
(re.compile(r"ModuleNotFoundError|ImportError"), RecoveryType.BASH, "Install missing module or fix import"),
|
|
28
|
+
(re.compile(r"TimeoutError|timed out"), RecoveryType.SIMPLIFY, "Simplify the approach or increase timeout"),
|
|
29
|
+
(re.compile(r"Permission denied"), RecoveryType.BASH, "Fix file permissions"),
|
|
30
|
+
(re.compile(r"AssertionError|FAILED"), RecoveryType.ANALYZE, "Analyze test failure and fix the code"),
|
|
31
|
+
(re.compile(r"TypeError"), RecoveryType.EDIT, "Fix type mismatch in the code"),
|
|
32
|
+
(re.compile(r"NameError"), RecoveryType.EDIT, "Fix undefined variable or missing import"),
|
|
33
|
+
(re.compile(r"KeyError"), RecoveryType.EDIT, "Fix missing key access or add default"),
|
|
34
|
+
(re.compile(r"IndexError"), RecoveryType.EDIT, "Fix out-of-bounds access"),
|
|
35
|
+
(re.compile(r"AttributeError"), RecoveryType.EDIT, "Fix attribute access on wrong type"),
|
|
36
|
+
(re.compile(r"ConnectionError|ConnectionRefused"), RecoveryType.RETRY, "Retry the connection or check service availability"),
|
|
37
|
+
(re.compile(r"old_content not found"), RecoveryType.EDIT, "Read the file first, then edit with exact content match"),
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
RECOVERY_SYSTEM_PROMPT = """You are a recovery agent. Given an error and its context, produce a JSON recovery plan:
|
|
41
|
+
|
|
42
|
+
{
|
|
43
|
+
"recovery_type": "edit" | "create" | "retry" | "simplify" | "analyze",
|
|
44
|
+
"recovery_attempt": "Description of what to do",
|
|
45
|
+
"patched_arguments": {
|
|
46
|
+
"tool": "tool_name",
|
|
47
|
+
"arguments": {"arg": "value"}
|
|
48
|
+
},
|
|
49
|
+
"reasoning": "Why this recovery should work"
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
Be specific about exact file paths, line numbers, and content changes.
|
|
53
|
+
Respond ONLY with valid JSON, no markdown fences."""
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class Recoverer:
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
model: str = "gpt-4o",
|
|
60
|
+
max_recovery_attempts: int = 3,
|
|
61
|
+
api_key: str | None = None,
|
|
62
|
+
api_base: str | None = None,
|
|
63
|
+
) -> None:
|
|
64
|
+
self.model = model
|
|
65
|
+
self.max_recovery_attempts = max_recovery_attempts
|
|
66
|
+
self.api_key = api_key
|
|
67
|
+
self.api_base = api_base
|
|
68
|
+
self._token_usage = TokenUsage()
|
|
69
|
+
self._recovery_history: list[dict[str, Any]] = []
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def token_usage(self) -> TokenUsage:
|
|
73
|
+
return self._token_usage
|
|
74
|
+
|
|
75
|
+
async def recover(
|
|
76
|
+
self,
|
|
77
|
+
error: str,
|
|
78
|
+
context: str = "",
|
|
79
|
+
attempt: int = 1,
|
|
80
|
+
failed_step: ExecuteStep | None = None,
|
|
81
|
+
) -> RecoverStep:
|
|
82
|
+
if attempt > self.max_recovery_attempts:
|
|
83
|
+
return RecoverStep(
|
|
84
|
+
original_error=error,
|
|
85
|
+
recovery_attempt="Max recovery attempts exhausted",
|
|
86
|
+
recovery_type="analyze",
|
|
87
|
+
success=False,
|
|
88
|
+
attempt_number=attempt,
|
|
89
|
+
max_attempts=self.max_recovery_attempts,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
pattern_recovery = self._match_pattern_recovery(error)
|
|
93
|
+
if pattern_recovery and attempt == 1:
|
|
94
|
+
recovery = pattern_recovery
|
|
95
|
+
else:
|
|
96
|
+
recovery = await self._llm_recovery(error, context, attempt, failed_step)
|
|
97
|
+
|
|
98
|
+
self._recovery_history.append({
|
|
99
|
+
"error": error,
|
|
100
|
+
"attempt": attempt,
|
|
101
|
+
"recovery_type": recovery.recovery_type,
|
|
102
|
+
"recovery_attempt": recovery.recovery_attempt,
|
|
103
|
+
})
|
|
104
|
+
|
|
105
|
+
return recovery
|
|
106
|
+
|
|
107
|
+
def _match_pattern_recovery(self, error: str) -> RecoverStep | None:
|
|
108
|
+
for pattern, recovery_type, description in RECOVERY_PATTERNS:
|
|
109
|
+
if pattern.search(error):
|
|
110
|
+
return RecoverStep(
|
|
111
|
+
original_error=error,
|
|
112
|
+
recovery_attempt=description,
|
|
113
|
+
recovery_type=recovery_type.value,
|
|
114
|
+
success=False,
|
|
115
|
+
attempt_number=1,
|
|
116
|
+
max_attempts=self.max_recovery_attempts,
|
|
117
|
+
)
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
async def _llm_recovery(
|
|
121
|
+
self,
|
|
122
|
+
error: str,
|
|
123
|
+
context: str,
|
|
124
|
+
attempt: int,
|
|
125
|
+
failed_step: ExecuteStep | None = None,
|
|
126
|
+
) -> RecoverStep:
|
|
127
|
+
messages = [{"role": "system", "content": RECOVERY_SYSTEM_PROMPT}]
|
|
128
|
+
|
|
129
|
+
user_msg = f"Error: {error}\nAttempt: {attempt}/{self.max_recovery_attempts}"
|
|
130
|
+
if context:
|
|
131
|
+
user_msg += f"\n\nContext:\n{context}"
|
|
132
|
+
if failed_step:
|
|
133
|
+
user_msg += (
|
|
134
|
+
f"\n\nFailed step tool: {failed_step.tool}"
|
|
135
|
+
f"\nFailed step arguments: {failed_step.arguments}"
|
|
136
|
+
)
|
|
137
|
+
if failed_step.error:
|
|
138
|
+
user_msg += f"\nFailed step error: {failed_step.error}"
|
|
139
|
+
|
|
140
|
+
messages.append({"role": "user", "content": user_msg})
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
kwargs: dict[str, Any] = {
|
|
144
|
+
"model": self.model,
|
|
145
|
+
"messages": messages,
|
|
146
|
+
"temperature": 0.1,
|
|
147
|
+
"max_tokens": 1024,
|
|
148
|
+
}
|
|
149
|
+
if self.api_key:
|
|
150
|
+
kwargs["api_key"] = self.api_key
|
|
151
|
+
if self.api_base:
|
|
152
|
+
kwargs["api_base"] = self.api_base
|
|
153
|
+
|
|
154
|
+
response = await litellm.acompletion(**kwargs)
|
|
155
|
+
|
|
156
|
+
self._token_usage = self._token_usage.merge(
|
|
157
|
+
TokenUsage(
|
|
158
|
+
prompt_tokens=response.usage.prompt_tokens if response.usage else 0,
|
|
159
|
+
completion_tokens=response.usage.completion_tokens if response.usage else 0,
|
|
160
|
+
total_tokens=response.usage.total_tokens if response.usage else 0,
|
|
161
|
+
)
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
content = response.choices[0].message.content or "{}"
|
|
165
|
+
content = re.sub(r"^```(?:json)?\s*", "", content.strip())
|
|
166
|
+
content = re.sub(r"\s*```$", "", content.strip())
|
|
167
|
+
|
|
168
|
+
import json
|
|
169
|
+
data = json.loads(content)
|
|
170
|
+
|
|
171
|
+
return RecoverStep(
|
|
172
|
+
original_error=error,
|
|
173
|
+
recovery_attempt=data.get("recovery_attempt", ""),
|
|
174
|
+
recovery_type=data.get("recovery_type", "analyze"),
|
|
175
|
+
success=False,
|
|
176
|
+
attempt_number=attempt,
|
|
177
|
+
max_attempts=self.max_recovery_attempts,
|
|
178
|
+
patched_arguments=data.get("patched_arguments", {}),
|
|
179
|
+
)
|
|
180
|
+
except Exception:
|
|
181
|
+
recovery = self._match_pattern_recovery(error)
|
|
182
|
+
if recovery:
|
|
183
|
+
recovery.attempt_number = attempt
|
|
184
|
+
return recovery
|
|
185
|
+
|
|
186
|
+
return RecoverStep(
|
|
187
|
+
original_error=error,
|
|
188
|
+
recovery_attempt=f"Generic recovery attempt {attempt}: retry with simpler approach",
|
|
189
|
+
recovery_type="simplify",
|
|
190
|
+
success=False,
|
|
191
|
+
attempt_number=attempt,
|
|
192
|
+
max_attempts=self.max_recovery_attempts,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
def get_recovery_history(self) -> list[dict[str, Any]]:
|
|
196
|
+
return list(self._recovery_history)
|
|
197
|
+
|
|
198
|
+
def should_retry(self, error: str, attempt: int) -> bool:
|
|
199
|
+
if attempt >= self.max_recovery_attempts:
|
|
200
|
+
return False
|
|
201
|
+
for pattern, _, _ in RECOVERY_PATTERNS:
|
|
202
|
+
if pattern.search(error):
|
|
203
|
+
return True
|
|
204
|
+
return attempt < self.max_recovery_attempts
|