verifyloop 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
verifyloop/planner.py ADDED
@@ -0,0 +1,190 @@
1
+ """Plan phase: decompose tasks into executable substeps."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ from typing import Any
8
+
9
+ import litellm
10
+
11
+ from verifyloop.models import PlanStep, Substep, TokenUsage
12
+
13
+ PLAN_SYSTEM_PROMPT = """You are a planning agent. Given a task and optional context, produce a JSON plan.
14
+
15
+ Your plan must be a JSON object with these fields:
16
+ {
17
+ "description": "One-line summary of the overall task",
18
+ "substeps": ["Step 1 description", "Step 2 description", ...],
19
+ "estimated_tools": ["bash", "edit", "read", "write", "web_search", "web_fetch"],
20
+ "complexity": "low" | "medium" | "high",
21
+ "substep_details": [
22
+ {
23
+ "description": "What this step does",
24
+ "tool": "Tool name to use",
25
+ "arguments": {"arg_name": "value"},
26
+ "order": 0
27
+ }
28
+ ]
29
+ }
30
+
31
+ Rules:
32
+ - Each substep should be atomic and independently verifiable
33
+ - Prefer small, targeted edits over large rewrites
34
+ - Estimate which tools will be needed
35
+ - Be specific about file paths, commands, and expected outcomes
36
+ - Order substeps so earlier steps produce artifacts later steps need
37
+
38
+ Respond ONLY with valid JSON, no markdown fences."""
39
+
40
+
41
+ class PlanGenerator:
42
+ def __init__(
43
+ self,
44
+ model: str = "gpt-4o",
45
+ api_key: str | None = None,
46
+ api_base: str | None = None,
47
+ temperature: float = 0.1,
48
+ ) -> None:
49
+ self.model = model
50
+ self.api_key = api_key
51
+ self.api_base = api_base
52
+ self.temperature = temperature
53
+ self._token_usage = TokenUsage()
54
+
55
+ @property
56
+ def token_usage(self) -> TokenUsage:
57
+ return self._token_usage
58
+
59
+ async def generate_plan(
60
+ self,
61
+ task: str,
62
+ context: str = "",
63
+ ) -> PlanStep:
64
+ messages = []
65
+ messages.append({"role": "system", "content": PLAN_SYSTEM_PROMPT})
66
+
67
+ user_content = f"Task: {task}"
68
+ if context:
69
+ user_content += f"\n\nContext:\n{context}"
70
+ messages.append({"role": "user", "content": user_content})
71
+
72
+ kwargs: dict[str, Any] = {
73
+ "model": self.model,
74
+ "messages": messages,
75
+ "temperature": self.temperature,
76
+ }
77
+ if self.api_key:
78
+ kwargs["api_key"] = self.api_key
79
+ if self.api_base:
80
+ kwargs["api_base"] = self.api_base
81
+
82
+ response = await litellm.acompletion(**kwargs)
83
+
84
+ self._token_usage = self._token_usage.merge(
85
+ TokenUsage(
86
+ prompt_tokens=response.usage.prompt_tokens if response.usage else 0,
87
+ completion_tokens=response.usage.completion_tokens if response.usage else 0,
88
+ total_tokens=response.usage.total_tokens if response.usage else 0,
89
+ )
90
+ )
91
+
92
+ content = response.choices[0].message.content or "{}"
93
+ content = re.sub(r"^```(?:json)?\s*", "", content.strip())
94
+ content = re.sub(r"\s*```$", "", content.strip())
95
+
96
+ try:
97
+ plan_data = json.loads(content)
98
+ except json.JSONDecodeError:
99
+ return self._fallback_plan(task)
100
+
101
+ details = []
102
+ for sd in plan_data.get("substep_details", []):
103
+ details.append(
104
+ Substep(
105
+ description=sd.get("description", ""),
106
+ tool=sd.get("tool", "bash"),
107
+ arguments=sd.get("arguments", {}),
108
+ order=sd.get("order", len(details)),
109
+ )
110
+ )
111
+
112
+ return PlanStep(
113
+ description=plan_data.get("description", task),
114
+ substeps=plan_data.get("substeps", []),
115
+ estimated_tools=plan_data.get("estimated_tools", ["bash"]),
116
+ substep_details=details,
117
+ complexity=plan_data.get("complexity", "medium"),
118
+ context_tokens=self._token_usage.prompt_tokens,
119
+ )
120
+
121
+ def decompose_task(self, task: str) -> list[Substep]:
122
+ patterns = [
123
+ (r"add\s+(\w+)\s+to\s+(\w+\.\w+)", "edit", lambda m: {
124
+ "file_path": m.group(2),
125
+ "description": f"Add {m.group(1)} to {m.group(2)}",
126
+ }),
127
+ (r"fix\s+(?:the\s+)?(\w+)\s+in\s+(\w+\.\w+)", "edit", lambda m: {
128
+ "file_path": m.group(2),
129
+ "description": f"Fix {m.group(1)} in {m.group(2)}",
130
+ }),
131
+ (r"(?:create|write|make)\s+(?:a\s+)?(\w+\.\w+)", "write", lambda m: {
132
+ "file_path": m.group(1),
133
+ "description": f"Create {m.group(1)}",
134
+ }),
135
+ (r"(?:read|show|cat|view)\s+(\w+\.\w+)", "read", lambda m: {
136
+ "file_path": m.group(1),
137
+ "description": f"Read {m.group(1)}",
138
+ }),
139
+ (r"run\s+(.+)", "bash", lambda m: {
140
+ "command": m.group(1),
141
+ "description": f"Run: {m.group(1)}",
142
+ }),
143
+ (r"(?:search|look up|find)\s+(?:for\s+)?(.+?)(?:\s+on(?:line| the web))?\.?$", "web_search", lambda m: {
144
+ "query": m.group(1),
145
+ "description": f"Search for: {m.group(1)}",
146
+ }),
147
+ ]
148
+
149
+ for pattern, tool, arg_fn in patterns:
150
+ match = re.match(pattern, task, re.IGNORECASE)
151
+ if match:
152
+ args = arg_fn(match)
153
+ return [
154
+ Substep(
155
+ description=args.pop("description", task),
156
+ tool=tool,
157
+ arguments=args,
158
+ order=0,
159
+ )
160
+ ]
161
+
162
+ return [Substep(description=task, tool="bash", arguments={"command": task}, order=0)]
163
+
164
+ def estimate_tools(self, task: str) -> list[str]:
165
+ tools: set[str] = set()
166
+
167
+ if re.search(r"(?:run|execute|install|pip|npm|cargo|make|build)", task, re.I):
168
+ tools.add("bash")
169
+ if re.search(r"(?:add|fix|edit|modify|update|change|refactor)", task, re.I):
170
+ tools.add("edit")
171
+ if re.search(r"(?:create|write|new|make)\s+(?:a\s+)?(?:file|module|class)", task, re.I):
172
+ tools.add("write")
173
+ if re.search(r"(?:read|show|view|check|inspect|display)", task, re.I):
174
+ tools.add("read")
175
+ if re.search(r"(?:search|look up|find|google)", task, re.I):
176
+ tools.add("web_search")
177
+ if re.search(r"(?:fetch|download|curl|url|http)", task, re.I):
178
+ tools.add("web_fetch")
179
+
180
+ return list(tools) or ["bash"]
181
+
182
+ def _fallback_plan(self, task: str) -> PlanStep:
183
+ substeps = self.decompose_task(task)
184
+ return PlanStep(
185
+ description=task,
186
+ substeps=[s.description for s in substeps],
187
+ estimated_tools=[s.tool for s in substeps],
188
+ substep_details=substeps,
189
+ complexity="low" if len(substeps) <= 2 else "medium",
190
+ )
@@ -0,0 +1,204 @@
1
+ """Recover phase: analyze errors and generate targeted recovery strategies."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from enum import Enum
7
+ from typing import Any
8
+
9
+ import litellm
10
+
11
+ from verifyloop.models import ExecuteStep, RecoverStep, TokenUsage
12
+
13
+
14
+ class RecoveryType(str, Enum):
15
+ EDIT = "edit"
16
+ CREATE = "create"
17
+ RETRY = "retry"
18
+ SIMPLIFY = "simplify"
19
+ ANALYZE = "analyze"
20
+ BASH = "bash"
21
+
22
+
23
+ RECOVERY_PATTERNS: list[tuple[re.Pattern[str], RecoveryType, str]] = [
24
+ (re.compile(r"SyntaxError"), RecoveryType.EDIT, "Fix syntax error in the file"),
25
+ (re.compile(r"IndentationError"), RecoveryType.EDIT, "Fix indentation in the file"),
26
+ (re.compile(r"FileNotFoundError|No such file"), RecoveryType.CREATE, "Create the missing file or find correct path"),
27
+ (re.compile(r"ModuleNotFoundError|ImportError"), RecoveryType.BASH, "Install missing module or fix import"),
28
+ (re.compile(r"TimeoutError|timed out"), RecoveryType.SIMPLIFY, "Simplify the approach or increase timeout"),
29
+ (re.compile(r"Permission denied"), RecoveryType.BASH, "Fix file permissions"),
30
+ (re.compile(r"AssertionError|FAILED"), RecoveryType.ANALYZE, "Analyze test failure and fix the code"),
31
+ (re.compile(r"TypeError"), RecoveryType.EDIT, "Fix type mismatch in the code"),
32
+ (re.compile(r"NameError"), RecoveryType.EDIT, "Fix undefined variable or missing import"),
33
+ (re.compile(r"KeyError"), RecoveryType.EDIT, "Fix missing key access or add default"),
34
+ (re.compile(r"IndexError"), RecoveryType.EDIT, "Fix out-of-bounds access"),
35
+ (re.compile(r"AttributeError"), RecoveryType.EDIT, "Fix attribute access on wrong type"),
36
+ (re.compile(r"ConnectionError|ConnectionRefused"), RecoveryType.RETRY, "Retry the connection or check service availability"),
37
+ (re.compile(r"old_content not found"), RecoveryType.EDIT, "Read the file first, then edit with exact content match"),
38
+ ]
39
+
40
+ RECOVERY_SYSTEM_PROMPT = """You are a recovery agent. Given an error and its context, produce a JSON recovery plan:
41
+
42
+ {
43
+ "recovery_type": "edit" | "create" | "retry" | "simplify" | "analyze",
44
+ "recovery_attempt": "Description of what to do",
45
+ "patched_arguments": {
46
+ "tool": "tool_name",
47
+ "arguments": {"arg": "value"}
48
+ },
49
+ "reasoning": "Why this recovery should work"
50
+ }
51
+
52
+ Be specific about exact file paths, line numbers, and content changes.
53
+ Respond ONLY with valid JSON, no markdown fences."""
54
+
55
+
56
+ class Recoverer:
57
+ def __init__(
58
+ self,
59
+ model: str = "gpt-4o",
60
+ max_recovery_attempts: int = 3,
61
+ api_key: str | None = None,
62
+ api_base: str | None = None,
63
+ ) -> None:
64
+ self.model = model
65
+ self.max_recovery_attempts = max_recovery_attempts
66
+ self.api_key = api_key
67
+ self.api_base = api_base
68
+ self._token_usage = TokenUsage()
69
+ self._recovery_history: list[dict[str, Any]] = []
70
+
71
+ @property
72
+ def token_usage(self) -> TokenUsage:
73
+ return self._token_usage
74
+
75
+ async def recover(
76
+ self,
77
+ error: str,
78
+ context: str = "",
79
+ attempt: int = 1,
80
+ failed_step: ExecuteStep | None = None,
81
+ ) -> RecoverStep:
82
+ if attempt > self.max_recovery_attempts:
83
+ return RecoverStep(
84
+ original_error=error,
85
+ recovery_attempt="Max recovery attempts exhausted",
86
+ recovery_type="analyze",
87
+ success=False,
88
+ attempt_number=attempt,
89
+ max_attempts=self.max_recovery_attempts,
90
+ )
91
+
92
+ pattern_recovery = self._match_pattern_recovery(error)
93
+ if pattern_recovery and attempt == 1:
94
+ recovery = pattern_recovery
95
+ else:
96
+ recovery = await self._llm_recovery(error, context, attempt, failed_step)
97
+
98
+ self._recovery_history.append({
99
+ "error": error,
100
+ "attempt": attempt,
101
+ "recovery_type": recovery.recovery_type,
102
+ "recovery_attempt": recovery.recovery_attempt,
103
+ })
104
+
105
+ return recovery
106
+
107
+ def _match_pattern_recovery(self, error: str) -> RecoverStep | None:
108
+ for pattern, recovery_type, description in RECOVERY_PATTERNS:
109
+ if pattern.search(error):
110
+ return RecoverStep(
111
+ original_error=error,
112
+ recovery_attempt=description,
113
+ recovery_type=recovery_type.value,
114
+ success=False,
115
+ attempt_number=1,
116
+ max_attempts=self.max_recovery_attempts,
117
+ )
118
+ return None
119
+
120
+ async def _llm_recovery(
121
+ self,
122
+ error: str,
123
+ context: str,
124
+ attempt: int,
125
+ failed_step: ExecuteStep | None = None,
126
+ ) -> RecoverStep:
127
+ messages = [{"role": "system", "content": RECOVERY_SYSTEM_PROMPT}]
128
+
129
+ user_msg = f"Error: {error}\nAttempt: {attempt}/{self.max_recovery_attempts}"
130
+ if context:
131
+ user_msg += f"\n\nContext:\n{context}"
132
+ if failed_step:
133
+ user_msg += (
134
+ f"\n\nFailed step tool: {failed_step.tool}"
135
+ f"\nFailed step arguments: {failed_step.arguments}"
136
+ )
137
+ if failed_step.error:
138
+ user_msg += f"\nFailed step error: {failed_step.error}"
139
+
140
+ messages.append({"role": "user", "content": user_msg})
141
+
142
+ try:
143
+ kwargs: dict[str, Any] = {
144
+ "model": self.model,
145
+ "messages": messages,
146
+ "temperature": 0.1,
147
+ "max_tokens": 1024,
148
+ }
149
+ if self.api_key:
150
+ kwargs["api_key"] = self.api_key
151
+ if self.api_base:
152
+ kwargs["api_base"] = self.api_base
153
+
154
+ response = await litellm.acompletion(**kwargs)
155
+
156
+ self._token_usage = self._token_usage.merge(
157
+ TokenUsage(
158
+ prompt_tokens=response.usage.prompt_tokens if response.usage else 0,
159
+ completion_tokens=response.usage.completion_tokens if response.usage else 0,
160
+ total_tokens=response.usage.total_tokens if response.usage else 0,
161
+ )
162
+ )
163
+
164
+ content = response.choices[0].message.content or "{}"
165
+ content = re.sub(r"^```(?:json)?\s*", "", content.strip())
166
+ content = re.sub(r"\s*```$", "", content.strip())
167
+
168
+ import json
169
+ data = json.loads(content)
170
+
171
+ return RecoverStep(
172
+ original_error=error,
173
+ recovery_attempt=data.get("recovery_attempt", ""),
174
+ recovery_type=data.get("recovery_type", "analyze"),
175
+ success=False,
176
+ attempt_number=attempt,
177
+ max_attempts=self.max_recovery_attempts,
178
+ patched_arguments=data.get("patched_arguments", {}),
179
+ )
180
+ except Exception:
181
+ recovery = self._match_pattern_recovery(error)
182
+ if recovery:
183
+ recovery.attempt_number = attempt
184
+ return recovery
185
+
186
+ return RecoverStep(
187
+ original_error=error,
188
+ recovery_attempt=f"Generic recovery attempt {attempt}: retry with simpler approach",
189
+ recovery_type="simplify",
190
+ success=False,
191
+ attempt_number=attempt,
192
+ max_attempts=self.max_recovery_attempts,
193
+ )
194
+
195
+ def get_recovery_history(self) -> list[dict[str, Any]]:
196
+ return list(self._recovery_history)
197
+
198
+ def should_retry(self, error: str, attempt: int) -> bool:
199
+ if attempt >= self.max_recovery_attempts:
200
+ return False
201
+ for pattern, _, _ in RECOVERY_PATTERNS:
202
+ if pattern.search(error):
203
+ return True
204
+ return attempt < self.max_recovery_attempts