vision-agent 0.2.25__py3-none-any.whl → 0.2.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/__init__.py +1 -0
- vision_agent/agent/vision_agent_v2.py +26 -2
- vision_agent/agent/vision_agent_v3.py +305 -0
- vision_agent/agent/vision_agent_v3_prompts.py +221 -0
- vision_agent/tools/tools_v2.py +3 -3
- vision_agent/utils/execute.py +3 -0
- {vision_agent-0.2.25.dist-info → vision_agent-0.2.27.dist-info}/METADATA +1 -1
- {vision_agent-0.2.25.dist-info → vision_agent-0.2.27.dist-info}/RECORD +11 -9
- /vision_agent/agent/{vision_agent_v2_prompt.py → vision_agent_v2_prompts.py} +0 -0
- {vision_agent-0.2.25.dist-info → vision_agent-0.2.27.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.25.dist-info → vision_agent-0.2.27.dist-info}/WHEEL +0 -0
vision_agent/agent/__init__.py
CHANGED
@@ -10,7 +10,7 @@ from rich.syntax import Syntax
|
|
10
10
|
from tabulate import tabulate
|
11
11
|
|
12
12
|
from vision_agent.agent import Agent
|
13
|
-
from vision_agent.agent.
|
13
|
+
from vision_agent.agent.vision_agent_v2_prompts import (
|
14
14
|
CODE,
|
15
15
|
CODE_SYS_MSG,
|
16
16
|
DEBUG,
|
@@ -165,6 +165,7 @@ def write_and_exec_code(
|
|
165
165
|
tool_info: str,
|
166
166
|
exec: Execute,
|
167
167
|
retrieved_ltm: str,
|
168
|
+
log_progress: Callable[..., str],
|
168
169
|
max_retry: int = 3,
|
169
170
|
verbosity: int = 0,
|
170
171
|
) -> Tuple[bool, str, str, Dict[str, List[str]]]:
|
@@ -178,6 +179,7 @@ def write_and_exec_code(
|
|
178
179
|
success, result = exec.run_isolation(code)
|
179
180
|
if verbosity == 2:
|
180
181
|
_CONSOLE.print(Syntax(code, "python", theme="gruvbox-dark", line_numbers=True))
|
182
|
+
log_progress(f"\tCode success: {success}\n\tResult: {str(result)}", code)
|
181
183
|
_LOGGER.info(f"\tCode success: {success}, result: {str(result)}")
|
182
184
|
working_memory: Dict[str, List[str]] = {}
|
183
185
|
while not success and counter < max_retry:
|
@@ -204,6 +206,7 @@ def write_and_exec_code(
|
|
204
206
|
_CONSOLE.print(
|
205
207
|
Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
|
206
208
|
)
|
209
|
+
log_progress(f"\tDebugging reflection: {reflection}\n\tResult: {result}")
|
207
210
|
_LOGGER.info(f"\tDebugging reflection: {reflection}, result: {result}")
|
208
211
|
|
209
212
|
if success:
|
@@ -224,6 +227,7 @@ def run_plan(
|
|
224
227
|
exec: Execute,
|
225
228
|
code: str,
|
226
229
|
tool_recommender: Sim,
|
230
|
+
log_progress: Callable[..., str],
|
227
231
|
long_term_memory: Optional[Sim] = None,
|
228
232
|
verbosity: int = 0,
|
229
233
|
) -> Tuple[str, str, List[Dict[str, Any]], Dict[str, List[str]]]:
|
@@ -234,6 +238,10 @@ def run_plan(
|
|
234
238
|
working_memory: Dict[str, List[str]] = {}
|
235
239
|
|
236
240
|
for task in active_plan:
|
241
|
+
log_progress(
|
242
|
+
f"""Going to run the following task(s) in sequence:
|
243
|
+
{tabulate(tabular_data=[task], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
|
244
|
+
)
|
237
245
|
_LOGGER.info(
|
238
246
|
f"""
|
239
247
|
{tabulate(tabular_data=[task], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
|
@@ -242,6 +250,7 @@ def run_plan(
|
|
242
250
|
tool_info = "\n".join([e["doc"] for e in tools])
|
243
251
|
|
244
252
|
if verbosity == 2:
|
253
|
+
log_progress(f"Tools retrieved: {[e['desc'] for e in tools]}")
|
245
254
|
_LOGGER.info(f"Tools retrieved: {[e['desc'] for e in tools]}")
|
246
255
|
|
247
256
|
if long_term_memory is not None:
|
@@ -258,6 +267,7 @@ def run_plan(
|
|
258
267
|
tool_info,
|
259
268
|
exec,
|
260
269
|
retrieved_ltm,
|
270
|
+
log_progress,
|
261
271
|
verbosity=verbosity,
|
262
272
|
)
|
263
273
|
if task["type"] == "code":
|
@@ -271,6 +281,8 @@ def run_plan(
|
|
271
281
|
_CONSOLE.print(
|
272
282
|
Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
|
273
283
|
)
|
284
|
+
|
285
|
+
log_progress(f"\tCode success: {success}\n\tResult: {str(result)}")
|
274
286
|
_LOGGER.info(f"\tCode success: {success} result: {str(result)}")
|
275
287
|
|
276
288
|
task["success"] = success
|
@@ -308,10 +320,12 @@ class VisionAgentV2(Agent):
|
|
308
320
|
tool_recommender: Optional[Sim] = None,
|
309
321
|
long_term_memory: Optional[Sim] = None,
|
310
322
|
verbosity: int = 0,
|
323
|
+
report_progress_callback: Optional[Callable[..., Any]] = None,
|
311
324
|
) -> None:
|
312
325
|
self.planner = OpenAILLM(temperature=0.0, json_mode=True)
|
313
326
|
self.coder = OpenAILLM(temperature=0.0)
|
314
327
|
self.exec = Execute(timeout=timeout)
|
328
|
+
self.report_progress_callback = report_progress_callback
|
315
329
|
if tool_recommender is None:
|
316
330
|
self.tool_recommender = Sim(TOOLS_DF, sim_key="desc")
|
317
331
|
else:
|
@@ -361,6 +375,10 @@ class VisionAgentV2(Agent):
|
|
361
375
|
working_code = task["code"]
|
362
376
|
|
363
377
|
user_req, plan = write_plan(chat, plan, TOOL_DESCRIPTIONS, self.planner)
|
378
|
+
self.log_progress(
|
379
|
+
f"""Plan:
|
380
|
+
{tabulate(tabular_data=plan, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
|
381
|
+
)
|
364
382
|
_LOGGER.info(
|
365
383
|
f"""Plan:
|
366
384
|
{tabulate(tabular_data=plan, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
|
@@ -379,6 +397,7 @@ class VisionAgentV2(Agent):
|
|
379
397
|
self.exec,
|
380
398
|
working_code,
|
381
399
|
self.tool_recommender,
|
400
|
+
self.log_progress,
|
382
401
|
self.long_term_memory,
|
383
402
|
self.verbosity,
|
384
403
|
)
|
@@ -393,6 +412,9 @@ class VisionAgentV2(Agent):
|
|
393
412
|
|
394
413
|
retries += 1
|
395
414
|
|
415
|
+
self.log_progress("The Vision Agent V2 has concluded this chat.")
|
416
|
+
self.log_progress(f"<ANSWER>Plan success: {success}</ANSWER>")
|
417
|
+
|
396
418
|
return {
|
397
419
|
"code": working_code,
|
398
420
|
"test": working_test,
|
@@ -401,5 +423,7 @@ class VisionAgentV2(Agent):
|
|
401
423
|
"plan": plan,
|
402
424
|
}
|
403
425
|
|
404
|
-
def log_progress(self, description: str) -> None:
|
426
|
+
def log_progress(self, description: str, code: Optional[str] = "") -> None:
|
427
|
+
if self.report_progress_callback is not None:
|
428
|
+
self.report_progress_callback(description, code)
|
405
429
|
pass
|
@@ -0,0 +1,305 @@
|
|
1
|
+
import copy
|
2
|
+
import json
|
3
|
+
import logging
|
4
|
+
import sys
|
5
|
+
from pathlib import Path
|
6
|
+
from typing import Any, Dict, List, Optional, Union, cast
|
7
|
+
|
8
|
+
from rich.console import Console
|
9
|
+
from rich.syntax import Syntax
|
10
|
+
from tabulate import tabulate
|
11
|
+
|
12
|
+
from vision_agent.agent import Agent
|
13
|
+
from vision_agent.agent.vision_agent_v3_prompts import (
|
14
|
+
CODE,
|
15
|
+
FEEDBACK,
|
16
|
+
FIX_BUG,
|
17
|
+
PLAN,
|
18
|
+
REFLECT,
|
19
|
+
SIMPLE_TEST,
|
20
|
+
USER_REQ,
|
21
|
+
)
|
22
|
+
from vision_agent.llm import LLM, OpenAILLM
|
23
|
+
from vision_agent.tools.tools_v2 import TOOL_DESCRIPTIONS, TOOLS_DF, UTILITIES_DOCSTRING
|
24
|
+
from vision_agent.utils import Execute
|
25
|
+
from vision_agent.utils.sim import Sim
|
26
|
+
|
27
|
+
logging.basicConfig(stream=sys.stdout)
|
28
|
+
_LOGGER = logging.getLogger(__name__)
|
29
|
+
_MAX_TABULATE_COL_WIDTH = 80
|
30
|
+
_EXECUTE = Execute(600)
|
31
|
+
_CONSOLE = Console()
|
32
|
+
|
33
|
+
|
34
|
+
def format_memory(memory: List[Dict[str, str]]) -> str:
|
35
|
+
return FEEDBACK.format(
|
36
|
+
feedback="\n".join(
|
37
|
+
[
|
38
|
+
f"### Feedback {i}:\nCode: ```python\n{m['code']}\n```\nFeedback: {m['feedback']}\n"
|
39
|
+
for i, m in enumerate(memory)
|
40
|
+
]
|
41
|
+
)
|
42
|
+
)
|
43
|
+
|
44
|
+
|
45
|
+
def extract_code(code: str) -> str:
|
46
|
+
if "\n```python" in code:
|
47
|
+
start = "\n```python"
|
48
|
+
elif "```python" in code:
|
49
|
+
start = "```python"
|
50
|
+
else:
|
51
|
+
return code
|
52
|
+
|
53
|
+
code = code[code.find(start) + len(start) :]
|
54
|
+
code = code[: code.find("```")]
|
55
|
+
if code.startswith("python\n"):
|
56
|
+
code = code[len("python\n") :]
|
57
|
+
return code
|
58
|
+
|
59
|
+
|
60
|
+
def extract_json(json_str: str) -> Dict[str, Any]:
|
61
|
+
try:
|
62
|
+
json_dict = json.loads(json_str)
|
63
|
+
except json.JSONDecodeError:
|
64
|
+
if "```json" in json_str:
|
65
|
+
json_str = json_str[json_str.find("```json") + len("```json") :]
|
66
|
+
json_str = json_str[: json_str.find("```")]
|
67
|
+
elif "```" in json_str:
|
68
|
+
json_str = json_str[json_str.find("```") + len("```") :]
|
69
|
+
# get the last ``` not one from an intermediate string
|
70
|
+
json_str = json_str[: json_str.find("}```")]
|
71
|
+
json_dict = json.loads(json_str)
|
72
|
+
return json_dict # type: ignore
|
73
|
+
|
74
|
+
|
75
|
+
def write_plan(
|
76
|
+
chat: List[Dict[str, str]],
|
77
|
+
tool_desc: str,
|
78
|
+
working_memory: str,
|
79
|
+
model: LLM,
|
80
|
+
) -> List[Dict[str, str]]:
|
81
|
+
chat = copy.deepcopy(chat)
|
82
|
+
if chat[-1]["role"] != "user":
|
83
|
+
raise ValueError("Last chat message must be from the user.")
|
84
|
+
|
85
|
+
user_request = chat[-1]["content"]
|
86
|
+
context = USER_REQ.format(user_request=user_request)
|
87
|
+
prompt = PLAN.format(context=context, tool_desc=tool_desc, feedback=working_memory)
|
88
|
+
chat[-1]["content"] = prompt
|
89
|
+
return extract_json(model.chat(chat))["plan"] # type: ignore
|
90
|
+
|
91
|
+
|
92
|
+
def reflect(
|
93
|
+
chat: List[Dict[str, str]],
|
94
|
+
plan: str,
|
95
|
+
code: str,
|
96
|
+
model: LLM,
|
97
|
+
) -> Dict[str, Union[str, bool]]:
|
98
|
+
chat = copy.deepcopy(chat)
|
99
|
+
if chat[-1]["role"] != "user":
|
100
|
+
raise ValueError("Last chat message must be from the user.")
|
101
|
+
|
102
|
+
user_request = chat[-1]["content"]
|
103
|
+
context = USER_REQ.format(user_request=user_request)
|
104
|
+
prompt = REFLECT.format(context=context, plan=plan, code=code)
|
105
|
+
chat[-1]["content"] = prompt
|
106
|
+
return extract_json(model.chat(chat))
|
107
|
+
|
108
|
+
|
109
|
+
def write_and_test_code(
|
110
|
+
task: str,
|
111
|
+
tool_info: str,
|
112
|
+
tool_utils: str,
|
113
|
+
working_memory: str,
|
114
|
+
coder: LLM,
|
115
|
+
tester: LLM,
|
116
|
+
debugger: LLM,
|
117
|
+
verbosity: int = 0,
|
118
|
+
max_retries: int = 3,
|
119
|
+
) -> Dict[str, Any]:
|
120
|
+
code = extract_code(
|
121
|
+
coder(CODE.format(docstring=tool_info, question=task, feedback=working_memory))
|
122
|
+
)
|
123
|
+
test = extract_code(
|
124
|
+
tester(
|
125
|
+
SIMPLE_TEST.format(
|
126
|
+
docstring=tool_utils, question=task, code=code, feedback=working_memory
|
127
|
+
)
|
128
|
+
)
|
129
|
+
)
|
130
|
+
|
131
|
+
success, result = _EXECUTE.run_isolation(f"{code}\n{test}")
|
132
|
+
if verbosity == 2:
|
133
|
+
_LOGGER.info("First code and tests:")
|
134
|
+
_CONSOLE.print(
|
135
|
+
Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
|
136
|
+
)
|
137
|
+
_LOGGER.info(f"First result: {result}")
|
138
|
+
|
139
|
+
count = 0
|
140
|
+
new_working_memory = []
|
141
|
+
while not success and count < max_retries:
|
142
|
+
fixed_code_and_test = extract_json(
|
143
|
+
debugger(
|
144
|
+
FIX_BUG.format(
|
145
|
+
code=code, tests=test, result=result, feedback=working_memory
|
146
|
+
)
|
147
|
+
)
|
148
|
+
)
|
149
|
+
if fixed_code_and_test["code"].strip() != "":
|
150
|
+
code = extract_code(fixed_code_and_test["code"])
|
151
|
+
if fixed_code_and_test["test"].strip() != "":
|
152
|
+
test = extract_code(fixed_code_and_test["test"])
|
153
|
+
new_working_memory.append(
|
154
|
+
{"code": f"{code}\n{test}", "feedback": fixed_code_and_test["reflections"]}
|
155
|
+
)
|
156
|
+
|
157
|
+
success, result = _EXECUTE.run_isolation(f"{code}\n{test}")
|
158
|
+
if verbosity == 2:
|
159
|
+
_LOGGER.info(
|
160
|
+
f"Debug attempt {count + 1}, reflection: {fixed_code_and_test['reflections']}"
|
161
|
+
)
|
162
|
+
_CONSOLE.print(
|
163
|
+
Syntax(
|
164
|
+
f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True
|
165
|
+
)
|
166
|
+
)
|
167
|
+
_LOGGER.info(f"Debug result: {result}")
|
168
|
+
count += 1
|
169
|
+
|
170
|
+
if verbosity == 1:
|
171
|
+
_CONSOLE.print(
|
172
|
+
Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
|
173
|
+
)
|
174
|
+
_LOGGER.info(f"Result: {result}")
|
175
|
+
|
176
|
+
return {
|
177
|
+
"code": code,
|
178
|
+
"test": test,
|
179
|
+
"success": success,
|
180
|
+
"working_memory": new_working_memory,
|
181
|
+
}
|
182
|
+
|
183
|
+
|
184
|
+
def retrieve_tools(
|
185
|
+
plan: List[Dict[str, str]], tool_recommender: Sim, verbosity: int = 0
|
186
|
+
) -> str:
|
187
|
+
tool_info = []
|
188
|
+
tool_desc = []
|
189
|
+
for task in plan:
|
190
|
+
tools = tool_recommender.top_k(task["instructions"], k=2, thresh=0.3)
|
191
|
+
tool_info.extend([e["doc"] for e in tools])
|
192
|
+
tool_desc.extend([e["desc"] for e in tools])
|
193
|
+
if verbosity == 2:
|
194
|
+
_LOGGER.info(f"Tools: {tool_desc}")
|
195
|
+
tool_info_set = set(tool_info)
|
196
|
+
return "\n\n".join(tool_info_set)
|
197
|
+
|
198
|
+
|
199
|
+
class VisionAgentV3(Agent):
|
200
|
+
def __init__(
|
201
|
+
self,
|
202
|
+
timeout: int = 600,
|
203
|
+
planner: Optional[LLM] = None,
|
204
|
+
coder: Optional[LLM] = None,
|
205
|
+
tester: Optional[LLM] = None,
|
206
|
+
debugger: Optional[LLM] = None,
|
207
|
+
tool_recommender: Optional[Sim] = None,
|
208
|
+
verbosity: int = 0,
|
209
|
+
) -> None:
|
210
|
+
self.planner = (
|
211
|
+
OpenAILLM(temperature=0.0, json_mode=True) if planner is None else planner
|
212
|
+
)
|
213
|
+
self.coder = OpenAILLM(temperature=0.0) if coder is None else coder
|
214
|
+
self.tester = OpenAILLM(temperature=0.0) if tester is None else tester
|
215
|
+
self.debugger = (
|
216
|
+
OpenAILLM(temperature=0.0, json_mode=True) if debugger is None else debugger
|
217
|
+
)
|
218
|
+
|
219
|
+
self.tool_recommender = (
|
220
|
+
Sim(TOOLS_DF, sim_key="desc")
|
221
|
+
if tool_recommender is None
|
222
|
+
else tool_recommender
|
223
|
+
)
|
224
|
+
self.verbosity = verbosity
|
225
|
+
self.max_retries = 3
|
226
|
+
|
227
|
+
def __call__(
|
228
|
+
self,
|
229
|
+
input: Union[List[Dict[str, str]], str],
|
230
|
+
image: Optional[Union[str, Path]] = None,
|
231
|
+
) -> str:
|
232
|
+
if isinstance(input, str):
|
233
|
+
input = [{"role": "user", "content": input}]
|
234
|
+
results = self.chat_with_workflow(input, image)
|
235
|
+
return results["code"] # type: ignore
|
236
|
+
|
237
|
+
def chat_with_workflow(
|
238
|
+
self,
|
239
|
+
chat: List[Dict[str, str]],
|
240
|
+
image: Optional[Union[str, Path]] = None,
|
241
|
+
) -> Dict[str, Any]:
|
242
|
+
if len(chat) == 0:
|
243
|
+
raise ValueError("Chat cannot be empty.")
|
244
|
+
|
245
|
+
if image is not None:
|
246
|
+
for chat_i in chat:
|
247
|
+
if chat_i["role"] == "user":
|
248
|
+
chat_i["content"] += f" Image name {image}"
|
249
|
+
|
250
|
+
code = ""
|
251
|
+
test = ""
|
252
|
+
working_memory: List[Dict[str, str]] = []
|
253
|
+
results = {"code": "", "test": "", "plan": []}
|
254
|
+
plan = []
|
255
|
+
success = False
|
256
|
+
retries = 0
|
257
|
+
|
258
|
+
while not success and retries < self.max_retries:
|
259
|
+
plan_i = write_plan(
|
260
|
+
chat, TOOL_DESCRIPTIONS, format_memory(working_memory), self.planner
|
261
|
+
)
|
262
|
+
plan_i_str = "\n-".join([e["instructions"] for e in plan_i])
|
263
|
+
if self.verbosity == 1 or self.verbosity == 2:
|
264
|
+
_LOGGER.info(
|
265
|
+
f"""
|
266
|
+
{tabulate(tabular_data=plan_i, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
|
267
|
+
)
|
268
|
+
|
269
|
+
tool_info = retrieve_tools(
|
270
|
+
plan_i,
|
271
|
+
self.tool_recommender,
|
272
|
+
self.verbosity,
|
273
|
+
)
|
274
|
+
results = write_and_test_code(
|
275
|
+
plan_i_str,
|
276
|
+
tool_info,
|
277
|
+
UTILITIES_DOCSTRING,
|
278
|
+
format_memory(working_memory),
|
279
|
+
self.coder,
|
280
|
+
self.tester,
|
281
|
+
self.debugger,
|
282
|
+
verbosity=self.verbosity,
|
283
|
+
)
|
284
|
+
success = cast(bool, results["success"])
|
285
|
+
code = cast(str, results["code"])
|
286
|
+
test = cast(str, results["test"])
|
287
|
+
working_memory.extend(results["working_memory"]) # type: ignore
|
288
|
+
plan.append({"code": code, "test": test, "plan": plan_i})
|
289
|
+
|
290
|
+
reflection = reflect(chat, plan_i_str, code, self.planner)
|
291
|
+
if self.verbosity > 0:
|
292
|
+
_LOGGER.info(f"Reflection: {reflection}")
|
293
|
+
feedback = cast(str, reflection["feedback"])
|
294
|
+
success = cast(bool, reflection["success"])
|
295
|
+
working_memory.append({"code": f"{code}\n{test}", "feedback": feedback})
|
296
|
+
|
297
|
+
return {
|
298
|
+
"code": code,
|
299
|
+
"test": test,
|
300
|
+
"plan": plan,
|
301
|
+
"working_memory": working_memory,
|
302
|
+
}
|
303
|
+
|
304
|
+
def log_progress(self, description: str) -> None:
|
305
|
+
pass
|
@@ -0,0 +1,221 @@
|
|
1
|
+
USER_REQ = """
|
2
|
+
## User Request
|
3
|
+
{user_request}
|
4
|
+
"""
|
5
|
+
|
6
|
+
FEEDBACK = """
|
7
|
+
## This contains code and feedback from previous runs and is used for providing context so you do not make the same mistake again.
|
8
|
+
|
9
|
+
{feedback}
|
10
|
+
"""
|
11
|
+
|
12
|
+
|
13
|
+
PLAN = """
|
14
|
+
**Context**
|
15
|
+
{context}
|
16
|
+
|
17
|
+
**Tools Available**:
|
18
|
+
{tool_desc}
|
19
|
+
|
20
|
+
**Previous Feedback**:
|
21
|
+
{feedback}
|
22
|
+
|
23
|
+
**Instructions**:
|
24
|
+
Based on the context and tools you have available, write a plan of subtasks to achieve the user request utilizing given tools when necessary. Output a list of jsons in the following format:
|
25
|
+
|
26
|
+
```json
|
27
|
+
{{
|
28
|
+
"plan":
|
29
|
+
[
|
30
|
+
{{
|
31
|
+
"instructions": str # what you should do in this task, one short phrase or sentence
|
32
|
+
}}
|
33
|
+
]
|
34
|
+
}}
|
35
|
+
```
|
36
|
+
"""
|
37
|
+
|
38
|
+
CODE = """
|
39
|
+
**Role**: You are a software programmer.
|
40
|
+
|
41
|
+
**Task**: As a programmer, you are required to complete the function. Use a Chain-of-Thought approach to break down the problem, create pseudocode, and then write the code in Python language. Ensure that your code is efficient, readable, and well-commented. Return the requested information from the function you create. Do not call your code, a test will be run after the code is submitted.
|
42
|
+
|
43
|
+
**Documentation**:
|
44
|
+
This is the documentation for the functions you have access to. You may call any of these functions to help you complete the task. They are available through importing `from vision_agent.tools.tools_v2 import *`.
|
45
|
+
|
46
|
+
{docstring}
|
47
|
+
|
48
|
+
**Input Code Snippet**:
|
49
|
+
```python
|
50
|
+
# Your code here
|
51
|
+
```
|
52
|
+
|
53
|
+
**User Instructions**:
|
54
|
+
{question}
|
55
|
+
|
56
|
+
**Previous Feedback**:
|
57
|
+
{feedback}
|
58
|
+
|
59
|
+
**Instructions**:
|
60
|
+
1. **Understand and Clarify**: Make sure you understand the task.
|
61
|
+
2. **Algorithm/Method Selection**: Decide on the most efficient way.
|
62
|
+
3. **Pseudocode Creation**: Write down the steps you will follow in pseudocode.
|
63
|
+
4. **Code Generation**: Translate your pseudocode into executable Python code.
|
64
|
+
"""
|
65
|
+
|
66
|
+
TEST = """
|
67
|
+
**Role**: As a tester, your task is to create comprehensive test cases for the provided code. These test cases should encompass Basic and Edge case scenarios to ensure the code's robustness and reliability if possible.
|
68
|
+
|
69
|
+
**Documentation**:
|
70
|
+
This is the documentation for the functions you have access to. You may call any of these functions to help you complete the task. They are available through importing `from vision_agent.tools.tools_v2 import *`. You do not need to test these functions. Test only the code provided by the user.
|
71
|
+
|
72
|
+
{docstring}
|
73
|
+
|
74
|
+
**User Instructions**:
|
75
|
+
{question}
|
76
|
+
|
77
|
+
**Input Code Snippet**:
|
78
|
+
```python
|
79
|
+
### Please decided how would you want to generate test cases. Based on incomplete code or completed version.
|
80
|
+
{code}
|
81
|
+
```
|
82
|
+
|
83
|
+
**Instructions**:
|
84
|
+
1. Verify the fundamental functionality under normal conditions.
|
85
|
+
2. Ensure each test case is well-documented with comments explaining the scenario it covers.
|
86
|
+
3. DO NOT use any files that are not provided by the user's instructions, your test must be run and will crash if it tries to load a non-existent file.
|
87
|
+
4. DO NOT mock any functions, you must test their functionality as is.
|
88
|
+
|
89
|
+
You should format your test cases at the end of your response wrapped in ```python ``` tags like in the following example:
|
90
|
+
```python
|
91
|
+
# You can run assertions to ensure the function is working as expected
|
92
|
+
assert function(input) == expected_output, "Test case description"
|
93
|
+
|
94
|
+
# You can simply call the function to ensure it runs
|
95
|
+
function(input)
|
96
|
+
|
97
|
+
# Or you can visualize the output
|
98
|
+
output = function(input)
|
99
|
+
visualize(output)
|
100
|
+
```
|
101
|
+
|
102
|
+
**Examples**:
|
103
|
+
## Prompt 1:
|
104
|
+
```python
|
105
|
+
def detect_cats_and_dogs(image_path: str) -> Dict[str, List[List[float]]]:
|
106
|
+
\""" Detects cats and dogs in an image. Returns a dictionary with
|
107
|
+
{{
|
108
|
+
"cats": [[x1, y1, x2, y2], ...], "dogs": [[x1, y1, x2, y2], ...]
|
109
|
+
}}
|
110
|
+
\"""
|
111
|
+
```
|
112
|
+
|
113
|
+
## Completion 1:
|
114
|
+
```python
|
115
|
+
# We can test to ensure the output has the correct structure but we cannot test the
|
116
|
+
# content of the output without knowing the image. We can test on "image.jpg" because
|
117
|
+
# it is provided by the user so we know it exists.
|
118
|
+
output = detect_cats_and_dogs("image.jpg")
|
119
|
+
assert "cats" in output, "The output should contain 'cats'
|
120
|
+
assert "dogs" in output, "The output should contain 'dogs'
|
121
|
+
```
|
122
|
+
|
123
|
+
## Prompt 2:
|
124
|
+
```python
|
125
|
+
def find_text(image_path: str, text: str) -> str:
|
126
|
+
\""" Finds the text in the image and returns the text. \"""
|
127
|
+
|
128
|
+
## Completion 2:
|
129
|
+
```python
|
130
|
+
# Because we do not know ahead of time what text is in the image, we can only run the
|
131
|
+
# code and print the results. We can test on "image.jpg" because it is provided by the
|
132
|
+
# user so we know it exists.
|
133
|
+
found_text = find_text("image.jpg", "Hello World")
|
134
|
+
print(found_text)
|
135
|
+
```
|
136
|
+
"""
|
137
|
+
|
138
|
+
|
139
|
+
SIMPLE_TEST = """
|
140
|
+
**Role**: As a tester, your task is to create a simple test case for the provided code. This test case should verify the fundamental functionality under normal conditions.
|
141
|
+
|
142
|
+
**Documentation**:
|
143
|
+
This is the documentation for the functions you have access to. You may call any of these functions to help you complete the task. They are available through importing `from vision_agent.tools.tools_v2 import *`. You do not need to test these functions, only the code provided by the user.
|
144
|
+
|
145
|
+
{docstring}
|
146
|
+
|
147
|
+
**User Instructions**:
|
148
|
+
{question}
|
149
|
+
|
150
|
+
**Input Code Snippet**:
|
151
|
+
```python
|
152
|
+
### Please decided how would you want to generate test cases. Based on incomplete code or completed version.
|
153
|
+
{code}
|
154
|
+
```
|
155
|
+
|
156
|
+
**Previous Feedback**:
|
157
|
+
{feedback}
|
158
|
+
|
159
|
+
**Instructions**:
|
160
|
+
1. Verify the fundamental functionality under normal conditions.
|
161
|
+
2. Ensure each test case is well-documented with comments explaining the scenario it covers.
|
162
|
+
3. DO NOT use any files that are not provided by the user's instructions, your test must be run and will crash if it tries to load a non-existent file.
|
163
|
+
4. DO NOT mock any functions, you must test their functionality as is.
|
164
|
+
"""
|
165
|
+
|
166
|
+
|
167
|
+
FIX_BUG = """
|
168
|
+
**Role** As a coder, your job is to find the error in the code and fix it. You are running in a notebook setting so feel free to run !pip install to install missing packages.
|
169
|
+
|
170
|
+
**Instructions**:
|
171
|
+
Please re-complete the code to fix the error message. Here is the previous version:
|
172
|
+
```python
|
173
|
+
{code}
|
174
|
+
```
|
175
|
+
|
176
|
+
When we run this test code:
|
177
|
+
```python
|
178
|
+
{tests}
|
179
|
+
```
|
180
|
+
|
181
|
+
It raises this error:
|
182
|
+
```python
|
183
|
+
{result}
|
184
|
+
```
|
185
|
+
|
186
|
+
This is previous feedback provided on the code:
|
187
|
+
{feedback}
|
188
|
+
|
189
|
+
Please fix the bug by follow the error information and return a JSON object with the following format:
|
190
|
+
{{
|
191
|
+
"reflections": str # any thoughts you have about the bug and how you fixed it
|
192
|
+
"code": str # the fixed code if any, else an empty string
|
193
|
+
"test": str # the fixed test code if any, else an empty string
|
194
|
+
}}
|
195
|
+
"""
|
196
|
+
|
197
|
+
|
198
|
+
REFLECT = """
|
199
|
+
**Role**: You are a reflection agent. Your job is to look at the original user request and the code produced and determine if the code satisfies the user's request. If it does not, you must provide feedback on how to improve the code. You are concerned only if the code meets the user request, not if the code is good or bad.
|
200
|
+
|
201
|
+
**Context**:
|
202
|
+
{context}
|
203
|
+
|
204
|
+
**Plan**:
|
205
|
+
{plan}
|
206
|
+
|
207
|
+
**Code**:
|
208
|
+
{code}
|
209
|
+
|
210
|
+
**Instructions**:
|
211
|
+
1. **Understand the User Request**: Read the user request and understand what the user is asking for.
|
212
|
+
2. **Review the Plan**: Check the plan to see if it is a viable approach to solving the user request.
|
213
|
+
3. **Review the Code**: Check the code to see if it solves the user request.
|
214
|
+
4. DO NOT add any reflections for test cases, these are taken care of.
|
215
|
+
|
216
|
+
Respond in JSON format with the following structure:
|
217
|
+
{{
|
218
|
+
"feedback": str # the feedback you would give to the coder and tester
|
219
|
+
"success": bool # whether the code and tests meet the user request
|
220
|
+
}}
|
221
|
+
"""
|
vision_agent/tools/tools_v2.py
CHANGED
@@ -16,12 +16,12 @@ from scipy.spatial import distance # type: ignore
|
|
16
16
|
from vision_agent.tools.tool_utils import _send_inference_request
|
17
17
|
from vision_agent.utils import extract_frames_from_video
|
18
18
|
from vision_agent.utils.image_utils import (
|
19
|
+
b64_to_pil,
|
19
20
|
convert_to_b64,
|
21
|
+
denormalize_bbox,
|
22
|
+
get_image_size,
|
20
23
|
normalize_bbox,
|
21
24
|
rle_decode,
|
22
|
-
b64_to_pil,
|
23
|
-
get_image_size,
|
24
|
-
denormalize_bbox,
|
25
25
|
)
|
26
26
|
|
27
27
|
COLORS = [
|
vision_agent/utils/execute.py
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
import base64 as b64
|
5
5
|
import io
|
6
6
|
import re
|
7
|
+
from time import sleep
|
7
8
|
from typing import Dict, List, Tuple
|
8
9
|
|
9
10
|
import nbformat
|
@@ -75,6 +76,7 @@ class Execute:
|
|
75
76
|
self.terminate()
|
76
77
|
self.nb = nbformat.v4.new_notebook()
|
77
78
|
self.nb_client = NotebookClient(self.nb, timeout=self.timeout)
|
79
|
+
sleep(1)
|
78
80
|
self.build()
|
79
81
|
|
80
82
|
def run_cell(self, cell: NotebookNode, cell_index: int) -> Tuple[bool, str]:
|
@@ -83,6 +85,7 @@ class Execute:
|
|
83
85
|
return parse_outputs(self.nb.cells[-1].outputs)
|
84
86
|
except CellTimeoutError:
|
85
87
|
run_sync(self.nb_client.km.interrupt_kernel)() # type: ignore
|
88
|
+
sleep(1)
|
86
89
|
return False, "Cell execution timed out."
|
87
90
|
except DeadKernelError:
|
88
91
|
self.reset()
|
@@ -1,5 +1,5 @@
|
|
1
1
|
vision_agent/__init__.py,sha256=GVLHCeK_R-zgldpbcPmOzJat-BkadvkuRCMxDvTIcXs,108
|
2
|
-
vision_agent/agent/__init__.py,sha256=
|
2
|
+
vision_agent/agent/__init__.py,sha256=jpmL6z5e4PFfQM21JbSsRwcERRXn58XFmURAMwWeoRM,249
|
3
3
|
vision_agent/agent/agent.py,sha256=X7kON-g9ePUKumCDaYfQNBX_MEFE-ax5PnRp7-Cc5Wo,529
|
4
4
|
vision_agent/agent/agent_coder.py,sha256=4iB732bX4wDnPAuyYBk6HWlf4aFq2l9EcL695qfDIXw,7004
|
5
5
|
vision_agent/agent/agent_coder_prompts.py,sha256=CJe3v7xvHQ32u3RQAXQga_Tk_4UgU64RBAMHZ3S70KY,5538
|
@@ -9,8 +9,10 @@ vision_agent/agent/reflexion.py,sha256=4gz30BuFMeGxSsTzoDV4p91yE0R8LISXp28IaOI6w
|
|
9
9
|
vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
|
10
10
|
vision_agent/agent/vision_agent.py,sha256=pnx7gtTPazR7Dck5_kfZC3S3QWKu4e28YVigzOicOX0,27130
|
11
11
|
vision_agent/agent/vision_agent_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47bfOL3-Zn6NiC0,8573
|
12
|
-
vision_agent/agent/vision_agent_v2.py,sha256=
|
13
|
-
vision_agent/agent/
|
12
|
+
vision_agent/agent/vision_agent_v2.py,sha256=eQS5w0aURWWCc0x1dqlApep65DKttePR-ZQPSxkWuvw,14487
|
13
|
+
vision_agent/agent/vision_agent_v2_prompts.py,sha256=b_0BMq6GrbGfl09MHrv4mj-mqyE1FxMl3Xq44qD4S1E,6161
|
14
|
+
vision_agent/agent/vision_agent_v3.py,sha256=EGA3zQKVIVdDlZOWwZNgueMnlqKqNwGvSc9v_XM-b34,9696
|
15
|
+
vision_agent/agent/vision_agent_v3_prompts.py,sha256=LRZBKObeb0Bs48vo7vtB2M8loPO1lQzruH-3IiMS5ts,7484
|
14
16
|
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
17
|
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
16
18
|
vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,48
|
@@ -21,14 +23,14 @@ vision_agent/tools/__init__.py,sha256=dRHXGpjhItXZRQs0r_l3Z3bQIreaZaYP0CJrl8mOJx
|
|
21
23
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
22
24
|
vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
|
23
25
|
vision_agent/tools/tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
|
24
|
-
vision_agent/tools/tools_v2.py,sha256=
|
26
|
+
vision_agent/tools/tools_v2.py,sha256=3Bv1xuZFoPjaCb-VixF5Vl3uoyac03571FXUzBI8FBQ,21404
|
25
27
|
vision_agent/utils/__init__.py,sha256=xsHFyJSDbLdonB9Dh74cwZnVTiT__2OQF3Brd3Nmglc,116
|
26
|
-
vision_agent/utils/execute.py,sha256=
|
28
|
+
vision_agent/utils/execute.py,sha256=8_SfK-IkHH4lXF0JVyV7sDFszZn9HKsh1bFITKGCJ1g,3881
|
27
29
|
vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
|
28
30
|
vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
|
29
31
|
vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
|
30
32
|
vision_agent/utils/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
|
31
|
-
vision_agent-0.2.
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
34
|
-
vision_agent-0.2.
|
33
|
+
vision_agent-0.2.27.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
34
|
+
vision_agent-0.2.27.dist-info/METADATA,sha256=rnWYNUve9b4hBvZp5hlNCz_B_7PMb5mhjj_zo6al-O0,9212
|
35
|
+
vision_agent-0.2.27.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
36
|
+
vision_agent-0.2.27.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|