vision-agent 0.2.25__py3-none-any.whl → 0.2.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,3 +4,4 @@ from .easytool import EasyTool
4
4
  from .reflexion import Reflexion
5
5
  from .vision_agent import VisionAgent
6
6
  from .vision_agent_v2 import VisionAgentV2
7
+ from .vision_agent_v3 import VisionAgentV3
@@ -10,7 +10,7 @@ from rich.syntax import Syntax
10
10
  from tabulate import tabulate
11
11
 
12
12
  from vision_agent.agent import Agent
13
- from vision_agent.agent.vision_agent_v2_prompt import (
13
+ from vision_agent.agent.vision_agent_v2_prompts import (
14
14
  CODE,
15
15
  CODE_SYS_MSG,
16
16
  DEBUG,
@@ -165,6 +165,7 @@ def write_and_exec_code(
165
165
  tool_info: str,
166
166
  exec: Execute,
167
167
  retrieved_ltm: str,
168
+ log_progress: Callable[..., str],
168
169
  max_retry: int = 3,
169
170
  verbosity: int = 0,
170
171
  ) -> Tuple[bool, str, str, Dict[str, List[str]]]:
@@ -178,6 +179,7 @@ def write_and_exec_code(
178
179
  success, result = exec.run_isolation(code)
179
180
  if verbosity == 2:
180
181
  _CONSOLE.print(Syntax(code, "python", theme="gruvbox-dark", line_numbers=True))
182
+ log_progress(f"\tCode success: {success}\n\tResult: {str(result)}", code)
181
183
  _LOGGER.info(f"\tCode success: {success}, result: {str(result)}")
182
184
  working_memory: Dict[str, List[str]] = {}
183
185
  while not success and counter < max_retry:
@@ -204,6 +206,7 @@ def write_and_exec_code(
204
206
  _CONSOLE.print(
205
207
  Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
206
208
  )
209
+ log_progress(f"\tDebugging reflection: {reflection}\n\tResult: {result}")
207
210
  _LOGGER.info(f"\tDebugging reflection: {reflection}, result: {result}")
208
211
 
209
212
  if success:
@@ -224,6 +227,7 @@ def run_plan(
224
227
  exec: Execute,
225
228
  code: str,
226
229
  tool_recommender: Sim,
230
+ log_progress: Callable[..., str],
227
231
  long_term_memory: Optional[Sim] = None,
228
232
  verbosity: int = 0,
229
233
  ) -> Tuple[str, str, List[Dict[str, Any]], Dict[str, List[str]]]:
@@ -234,6 +238,10 @@ def run_plan(
234
238
  working_memory: Dict[str, List[str]] = {}
235
239
 
236
240
  for task in active_plan:
241
+ log_progress(
242
+ f"""Going to run the following task(s) in sequence:
243
+ {tabulate(tabular_data=[task], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
244
+ )
237
245
  _LOGGER.info(
238
246
  f"""
239
247
  {tabulate(tabular_data=[task], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
@@ -242,6 +250,7 @@ def run_plan(
242
250
  tool_info = "\n".join([e["doc"] for e in tools])
243
251
 
244
252
  if verbosity == 2:
253
+ log_progress(f"Tools retrieved: {[e['desc'] for e in tools]}")
245
254
  _LOGGER.info(f"Tools retrieved: {[e['desc'] for e in tools]}")
246
255
 
247
256
  if long_term_memory is not None:
@@ -258,6 +267,7 @@ def run_plan(
258
267
  tool_info,
259
268
  exec,
260
269
  retrieved_ltm,
270
+ log_progress,
261
271
  verbosity=verbosity,
262
272
  )
263
273
  if task["type"] == "code":
@@ -271,6 +281,8 @@ def run_plan(
271
281
  _CONSOLE.print(
272
282
  Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
273
283
  )
284
+
285
+ log_progress(f"\tCode success: {success}\n\tResult: {str(result)}")
274
286
  _LOGGER.info(f"\tCode success: {success} result: {str(result)}")
275
287
 
276
288
  task["success"] = success
@@ -308,10 +320,12 @@ class VisionAgentV2(Agent):
308
320
  tool_recommender: Optional[Sim] = None,
309
321
  long_term_memory: Optional[Sim] = None,
310
322
  verbosity: int = 0,
323
+ report_progress_callback: Optional[Callable[..., Any]] = None,
311
324
  ) -> None:
312
325
  self.planner = OpenAILLM(temperature=0.0, json_mode=True)
313
326
  self.coder = OpenAILLM(temperature=0.0)
314
327
  self.exec = Execute(timeout=timeout)
328
+ self.report_progress_callback = report_progress_callback
315
329
  if tool_recommender is None:
316
330
  self.tool_recommender = Sim(TOOLS_DF, sim_key="desc")
317
331
  else:
@@ -361,6 +375,10 @@ class VisionAgentV2(Agent):
361
375
  working_code = task["code"]
362
376
 
363
377
  user_req, plan = write_plan(chat, plan, TOOL_DESCRIPTIONS, self.planner)
378
+ self.log_progress(
379
+ f"""Plan:
380
+ {tabulate(tabular_data=plan, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
381
+ )
364
382
  _LOGGER.info(
365
383
  f"""Plan:
366
384
  {tabulate(tabular_data=plan, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
@@ -379,6 +397,7 @@ class VisionAgentV2(Agent):
379
397
  self.exec,
380
398
  working_code,
381
399
  self.tool_recommender,
400
+ self.log_progress,
382
401
  self.long_term_memory,
383
402
  self.verbosity,
384
403
  )
@@ -393,6 +412,9 @@ class VisionAgentV2(Agent):
393
412
 
394
413
  retries += 1
395
414
 
415
+ self.log_progress("The Vision Agent V2 has concluded this chat.")
416
+ self.log_progress(f"<ANSWER>Plan success: {success}</ANSWER>")
417
+
396
418
  return {
397
419
  "code": working_code,
398
420
  "test": working_test,
@@ -401,5 +423,7 @@ class VisionAgentV2(Agent):
401
423
  "plan": plan,
402
424
  }
403
425
 
404
- def log_progress(self, description: str) -> None:
426
+ def log_progress(self, description: str, code: Optional[str] = "") -> None:
427
+ if self.report_progress_callback is not None:
428
+ self.report_progress_callback(description, code)
405
429
  pass
@@ -0,0 +1,305 @@
1
+ import copy
2
+ import json
3
+ import logging
4
+ import sys
5
+ from pathlib import Path
6
+ from typing import Any, Dict, List, Optional, Union, cast
7
+
8
+ from rich.console import Console
9
+ from rich.syntax import Syntax
10
+ from tabulate import tabulate
11
+
12
+ from vision_agent.agent import Agent
13
+ from vision_agent.agent.vision_agent_v3_prompts import (
14
+ CODE,
15
+ FEEDBACK,
16
+ FIX_BUG,
17
+ PLAN,
18
+ REFLECT,
19
+ SIMPLE_TEST,
20
+ USER_REQ,
21
+ )
22
+ from vision_agent.llm import LLM, OpenAILLM
23
+ from vision_agent.tools.tools_v2 import TOOL_DESCRIPTIONS, TOOLS_DF, UTILITIES_DOCSTRING
24
+ from vision_agent.utils import Execute
25
+ from vision_agent.utils.sim import Sim
26
+
27
+ logging.basicConfig(stream=sys.stdout)
28
+ _LOGGER = logging.getLogger(__name__)
29
+ _MAX_TABULATE_COL_WIDTH = 80
30
+ _EXECUTE = Execute(600)
31
+ _CONSOLE = Console()
32
+
33
+
34
+ def format_memory(memory: List[Dict[str, str]]) -> str:
35
+ return FEEDBACK.format(
36
+ feedback="\n".join(
37
+ [
38
+ f"### Feedback {i}:\nCode: ```python\n{m['code']}\n```\nFeedback: {m['feedback']}\n"
39
+ for i, m in enumerate(memory)
40
+ ]
41
+ )
42
+ )
43
+
44
+
45
+ def extract_code(code: str) -> str:
46
+ if "\n```python" in code:
47
+ start = "\n```python"
48
+ elif "```python" in code:
49
+ start = "```python"
50
+ else:
51
+ return code
52
+
53
+ code = code[code.find(start) + len(start) :]
54
+ code = code[: code.find("```")]
55
+ if code.startswith("python\n"):
56
+ code = code[len("python\n") :]
57
+ return code
58
+
59
+
60
+ def extract_json(json_str: str) -> Dict[str, Any]:
61
+ try:
62
+ json_dict = json.loads(json_str)
63
+ except json.JSONDecodeError:
64
+ if "```json" in json_str:
65
+ json_str = json_str[json_str.find("```json") + len("```json") :]
66
+ json_str = json_str[: json_str.find("```")]
67
+ elif "```" in json_str:
68
+ json_str = json_str[json_str.find("```") + len("```") :]
69
+ # get the last ``` not one from an intermediate string
70
+ json_str = json_str[: json_str.find("}```")]
71
+ json_dict = json.loads(json_str)
72
+ return json_dict # type: ignore
73
+
74
+
75
+ def write_plan(
76
+ chat: List[Dict[str, str]],
77
+ tool_desc: str,
78
+ working_memory: str,
79
+ model: LLM,
80
+ ) -> List[Dict[str, str]]:
81
+ chat = copy.deepcopy(chat)
82
+ if chat[-1]["role"] != "user":
83
+ raise ValueError("Last chat message must be from the user.")
84
+
85
+ user_request = chat[-1]["content"]
86
+ context = USER_REQ.format(user_request=user_request)
87
+ prompt = PLAN.format(context=context, tool_desc=tool_desc, feedback=working_memory)
88
+ chat[-1]["content"] = prompt
89
+ return extract_json(model.chat(chat))["plan"] # type: ignore
90
+
91
+
92
+ def reflect(
93
+ chat: List[Dict[str, str]],
94
+ plan: str,
95
+ code: str,
96
+ model: LLM,
97
+ ) -> Dict[str, Union[str, bool]]:
98
+ chat = copy.deepcopy(chat)
99
+ if chat[-1]["role"] != "user":
100
+ raise ValueError("Last chat message must be from the user.")
101
+
102
+ user_request = chat[-1]["content"]
103
+ context = USER_REQ.format(user_request=user_request)
104
+ prompt = REFLECT.format(context=context, plan=plan, code=code)
105
+ chat[-1]["content"] = prompt
106
+ return extract_json(model.chat(chat))
107
+
108
+
109
+ def write_and_test_code(
110
+ task: str,
111
+ tool_info: str,
112
+ tool_utils: str,
113
+ working_memory: str,
114
+ coder: LLM,
115
+ tester: LLM,
116
+ debugger: LLM,
117
+ verbosity: int = 0,
118
+ max_retries: int = 3,
119
+ ) -> Dict[str, Any]:
120
+ code = extract_code(
121
+ coder(CODE.format(docstring=tool_info, question=task, feedback=working_memory))
122
+ )
123
+ test = extract_code(
124
+ tester(
125
+ SIMPLE_TEST.format(
126
+ docstring=tool_utils, question=task, code=code, feedback=working_memory
127
+ )
128
+ )
129
+ )
130
+
131
+ success, result = _EXECUTE.run_isolation(f"{code}\n{test}")
132
+ if verbosity == 2:
133
+ _LOGGER.info("First code and tests:")
134
+ _CONSOLE.print(
135
+ Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
136
+ )
137
+ _LOGGER.info(f"First result: {result}")
138
+
139
+ count = 0
140
+ new_working_memory = []
141
+ while not success and count < max_retries:
142
+ fixed_code_and_test = extract_json(
143
+ debugger(
144
+ FIX_BUG.format(
145
+ code=code, tests=test, result=result, feedback=working_memory
146
+ )
147
+ )
148
+ )
149
+ if fixed_code_and_test["code"].strip() != "":
150
+ code = extract_code(fixed_code_and_test["code"])
151
+ if fixed_code_and_test["test"].strip() != "":
152
+ test = extract_code(fixed_code_and_test["test"])
153
+ new_working_memory.append(
154
+ {"code": f"{code}\n{test}", "feedback": fixed_code_and_test["reflections"]}
155
+ )
156
+
157
+ success, result = _EXECUTE.run_isolation(f"{code}\n{test}")
158
+ if verbosity == 2:
159
+ _LOGGER.info(
160
+ f"Debug attempt {count + 1}, reflection: {fixed_code_and_test['reflections']}"
161
+ )
162
+ _CONSOLE.print(
163
+ Syntax(
164
+ f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True
165
+ )
166
+ )
167
+ _LOGGER.info(f"Debug result: {result}")
168
+ count += 1
169
+
170
+ if verbosity == 1:
171
+ _CONSOLE.print(
172
+ Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
173
+ )
174
+ _LOGGER.info(f"Result: {result}")
175
+
176
+ return {
177
+ "code": code,
178
+ "test": test,
179
+ "success": success,
180
+ "working_memory": new_working_memory,
181
+ }
182
+
183
+
184
+ def retrieve_tools(
185
+ plan: List[Dict[str, str]], tool_recommender: Sim, verbosity: int = 0
186
+ ) -> str:
187
+ tool_info = []
188
+ tool_desc = []
189
+ for task in plan:
190
+ tools = tool_recommender.top_k(task["instructions"], k=2, thresh=0.3)
191
+ tool_info.extend([e["doc"] for e in tools])
192
+ tool_desc.extend([e["desc"] for e in tools])
193
+ if verbosity == 2:
194
+ _LOGGER.info(f"Tools: {tool_desc}")
195
+ tool_info_set = set(tool_info)
196
+ return "\n\n".join(tool_info_set)
197
+
198
+
199
+ class VisionAgentV3(Agent):
200
+ def __init__(
201
+ self,
202
+ timeout: int = 600,
203
+ planner: Optional[LLM] = None,
204
+ coder: Optional[LLM] = None,
205
+ tester: Optional[LLM] = None,
206
+ debugger: Optional[LLM] = None,
207
+ tool_recommender: Optional[Sim] = None,
208
+ verbosity: int = 0,
209
+ ) -> None:
210
+ self.planner = (
211
+ OpenAILLM(temperature=0.0, json_mode=True) if planner is None else planner
212
+ )
213
+ self.coder = OpenAILLM(temperature=0.0) if coder is None else coder
214
+ self.tester = OpenAILLM(temperature=0.0) if tester is None else tester
215
+ self.debugger = (
216
+ OpenAILLM(temperature=0.0, json_mode=True) if debugger is None else debugger
217
+ )
218
+
219
+ self.tool_recommender = (
220
+ Sim(TOOLS_DF, sim_key="desc")
221
+ if tool_recommender is None
222
+ else tool_recommender
223
+ )
224
+ self.verbosity = verbosity
225
+ self.max_retries = 3
226
+
227
+ def __call__(
228
+ self,
229
+ input: Union[List[Dict[str, str]], str],
230
+ image: Optional[Union[str, Path]] = None,
231
+ ) -> str:
232
+ if isinstance(input, str):
233
+ input = [{"role": "user", "content": input}]
234
+ results = self.chat_with_workflow(input, image)
235
+ return results["code"] # type: ignore
236
+
237
+ def chat_with_workflow(
238
+ self,
239
+ chat: List[Dict[str, str]],
240
+ image: Optional[Union[str, Path]] = None,
241
+ ) -> Dict[str, Any]:
242
+ if len(chat) == 0:
243
+ raise ValueError("Chat cannot be empty.")
244
+
245
+ if image is not None:
246
+ for chat_i in chat:
247
+ if chat_i["role"] == "user":
248
+ chat_i["content"] += f" Image name {image}"
249
+
250
+ code = ""
251
+ test = ""
252
+ working_memory: List[Dict[str, str]] = []
253
+ results = {"code": "", "test": "", "plan": []}
254
+ plan = []
255
+ success = False
256
+ retries = 0
257
+
258
+ while not success and retries < self.max_retries:
259
+ plan_i = write_plan(
260
+ chat, TOOL_DESCRIPTIONS, format_memory(working_memory), self.planner
261
+ )
262
+ plan_i_str = "\n-".join([e["instructions"] for e in plan_i])
263
+ if self.verbosity == 1 or self.verbosity == 2:
264
+ _LOGGER.info(
265
+ f"""
266
+ {tabulate(tabular_data=plan_i, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
267
+ )
268
+
269
+ tool_info = retrieve_tools(
270
+ plan_i,
271
+ self.tool_recommender,
272
+ self.verbosity,
273
+ )
274
+ results = write_and_test_code(
275
+ plan_i_str,
276
+ tool_info,
277
+ UTILITIES_DOCSTRING,
278
+ format_memory(working_memory),
279
+ self.coder,
280
+ self.tester,
281
+ self.debugger,
282
+ verbosity=self.verbosity,
283
+ )
284
+ success = cast(bool, results["success"])
285
+ code = cast(str, results["code"])
286
+ test = cast(str, results["test"])
287
+ working_memory.extend(results["working_memory"]) # type: ignore
288
+ plan.append({"code": code, "test": test, "plan": plan_i})
289
+
290
+ reflection = reflect(chat, plan_i_str, code, self.planner)
291
+ if self.verbosity > 0:
292
+ _LOGGER.info(f"Reflection: {reflection}")
293
+ feedback = cast(str, reflection["feedback"])
294
+ success = cast(bool, reflection["success"])
295
+ working_memory.append({"code": f"{code}\n{test}", "feedback": feedback})
296
+
297
+ return {
298
+ "code": code,
299
+ "test": test,
300
+ "plan": plan,
301
+ "working_memory": working_memory,
302
+ }
303
+
304
+ def log_progress(self, description: str) -> None:
305
+ pass
@@ -0,0 +1,221 @@
1
+ USER_REQ = """
2
+ ## User Request
3
+ {user_request}
4
+ """
5
+
6
+ FEEDBACK = """
7
+ ## This contains code and feedback from previous runs and is used for providing context so you do not make the same mistake again.
8
+
9
+ {feedback}
10
+ """
11
+
12
+
13
+ PLAN = """
14
+ **Context**
15
+ {context}
16
+
17
+ **Tools Available**:
18
+ {tool_desc}
19
+
20
+ **Previous Feedback**:
21
+ {feedback}
22
+
23
+ **Instructions**:
24
+ Based on the context and tools you have available, write a plan of subtasks to achieve the user request utilizing given tools when necessary. Output a list of jsons in the following format:
25
+
26
+ ```json
27
+ {{
28
+ "plan":
29
+ [
30
+ {{
31
+ "instructions": str # what you should do in this task, one short phrase or sentence
32
+ }}
33
+ ]
34
+ }}
35
+ ```
36
+ """
37
+
38
+ CODE = """
39
+ **Role**: You are a software programmer.
40
+
41
+ **Task**: As a programmer, you are required to complete the function. Use a Chain-of-Thought approach to break down the problem, create pseudocode, and then write the code in Python language. Ensure that your code is efficient, readable, and well-commented. Return the requested information from the function you create. Do not call your code, a test will be run after the code is submitted.
42
+
43
+ **Documentation**:
44
+ This is the documentation for the functions you have access to. You may call any of these functions to help you complete the task. They are available through importing `from vision_agent.tools.tools_v2 import *`.
45
+
46
+ {docstring}
47
+
48
+ **Input Code Snippet**:
49
+ ```python
50
+ # Your code here
51
+ ```
52
+
53
+ **User Instructions**:
54
+ {question}
55
+
56
+ **Previous Feedback**:
57
+ {feedback}
58
+
59
+ **Instructions**:
60
+ 1. **Understand and Clarify**: Make sure you understand the task.
61
+ 2. **Algorithm/Method Selection**: Decide on the most efficient way.
62
+ 3. **Pseudocode Creation**: Write down the steps you will follow in pseudocode.
63
+ 4. **Code Generation**: Translate your pseudocode into executable Python code.
64
+ """
65
+
66
+ TEST = """
67
+ **Role**: As a tester, your task is to create comprehensive test cases for the provided code. These test cases should encompass Basic and Edge case scenarios to ensure the code's robustness and reliability if possible.
68
+
69
+ **Documentation**:
70
+ This is the documentation for the functions you have access to. You may call any of these functions to help you complete the task. They are available through importing `from vision_agent.tools.tools_v2 import *`. You do not need to test these functions. Test only the code provided by the user.
71
+
72
+ {docstring}
73
+
74
+ **User Instructions**:
75
+ {question}
76
+
77
+ **Input Code Snippet**:
78
+ ```python
79
+ ### Please decided how would you want to generate test cases. Based on incomplete code or completed version.
80
+ {code}
81
+ ```
82
+
83
+ **Instructions**:
84
+ 1. Verify the fundamental functionality under normal conditions.
85
+ 2. Ensure each test case is well-documented with comments explaining the scenario it covers.
86
+ 3. DO NOT use any files that are not provided by the user's instructions, your test must be run and will crash if it tries to load a non-existent file.
87
+ 4. DO NOT mock any functions, you must test their functionality as is.
88
+
89
+ You should format your test cases at the end of your response wrapped in ```python ``` tags like in the following example:
90
+ ```python
91
+ # You can run assertions to ensure the function is working as expected
92
+ assert function(input) == expected_output, "Test case description"
93
+
94
+ # You can simply call the function to ensure it runs
95
+ function(input)
96
+
97
+ # Or you can visualize the output
98
+ output = function(input)
99
+ visualize(output)
100
+ ```
101
+
102
+ **Examples**:
103
+ ## Prompt 1:
104
+ ```python
105
+ def detect_cats_and_dogs(image_path: str) -> Dict[str, List[List[float]]]:
106
+ \""" Detects cats and dogs in an image. Returns a dictionary with
107
+ {{
108
+ "cats": [[x1, y1, x2, y2], ...], "dogs": [[x1, y1, x2, y2], ...]
109
+ }}
110
+ \"""
111
+ ```
112
+
113
+ ## Completion 1:
114
+ ```python
115
+ # We can test to ensure the output has the correct structure but we cannot test the
116
+ # content of the output without knowing the image. We can test on "image.jpg" because
117
+ # it is provided by the user so we know it exists.
118
+ output = detect_cats_and_dogs("image.jpg")
119
+ assert "cats" in output, "The output should contain 'cats'
120
+ assert "dogs" in output, "The output should contain 'dogs'
121
+ ```
122
+
123
+ ## Prompt 2:
124
+ ```python
125
+ def find_text(image_path: str, text: str) -> str:
126
+ \""" Finds the text in the image and returns the text. \"""
127
+
128
+ ## Completion 2:
129
+ ```python
130
+ # Because we do not know ahead of time what text is in the image, we can only run the
131
+ # code and print the results. We can test on "image.jpg" because it is provided by the
132
+ # user so we know it exists.
133
+ found_text = find_text("image.jpg", "Hello World")
134
+ print(found_text)
135
+ ```
136
+ """
137
+
138
+
139
+ SIMPLE_TEST = """
140
+ **Role**: As a tester, your task is to create a simple test case for the provided code. This test case should verify the fundamental functionality under normal conditions.
141
+
142
+ **Documentation**:
143
+ This is the documentation for the functions you have access to. You may call any of these functions to help you complete the task. They are available through importing `from vision_agent.tools.tools_v2 import *`. You do not need to test these functions, only the code provided by the user.
144
+
145
+ {docstring}
146
+
147
+ **User Instructions**:
148
+ {question}
149
+
150
+ **Input Code Snippet**:
151
+ ```python
152
+ ### Please decided how would you want to generate test cases. Based on incomplete code or completed version.
153
+ {code}
154
+ ```
155
+
156
+ **Previous Feedback**:
157
+ {feedback}
158
+
159
+ **Instructions**:
160
+ 1. Verify the fundamental functionality under normal conditions.
161
+ 2. Ensure each test case is well-documented with comments explaining the scenario it covers.
162
+ 3. DO NOT use any files that are not provided by the user's instructions, your test must be run and will crash if it tries to load a non-existent file.
163
+ 4. DO NOT mock any functions, you must test their functionality as is.
164
+ """
165
+
166
+
167
+ FIX_BUG = """
168
+ **Role** As a coder, your job is to find the error in the code and fix it. You are running in a notebook setting so feel free to run !pip install to install missing packages.
169
+
170
+ **Instructions**:
171
+ Please re-complete the code to fix the error message. Here is the previous version:
172
+ ```python
173
+ {code}
174
+ ```
175
+
176
+ When we run this test code:
177
+ ```python
178
+ {tests}
179
+ ```
180
+
181
+ It raises this error:
182
+ ```python
183
+ {result}
184
+ ```
185
+
186
+ This is previous feedback provided on the code:
187
+ {feedback}
188
+
189
+ Please fix the bug by follow the error information and return a JSON object with the following format:
190
+ {{
191
+ "reflections": str # any thoughts you have about the bug and how you fixed it
192
+ "code": str # the fixed code if any, else an empty string
193
+ "test": str # the fixed test code if any, else an empty string
194
+ }}
195
+ """
196
+
197
+
198
+ REFLECT = """
199
+ **Role**: You are a reflection agent. Your job is to look at the original user request and the code produced and determine if the code satisfies the user's request. If it does not, you must provide feedback on how to improve the code. You are concerned only if the code meets the user request, not if the code is good or bad.
200
+
201
+ **Context**:
202
+ {context}
203
+
204
+ **Plan**:
205
+ {plan}
206
+
207
+ **Code**:
208
+ {code}
209
+
210
+ **Instructions**:
211
+ 1. **Understand the User Request**: Read the user request and understand what the user is asking for.
212
+ 2. **Review the Plan**: Check the plan to see if it is a viable approach to solving the user request.
213
+ 3. **Review the Code**: Check the code to see if it solves the user request.
214
+ 4. DO NOT add any reflections for test cases, these are taken care of.
215
+
216
+ Respond in JSON format with the following structure:
217
+ {{
218
+ "feedback": str # the feedback you would give to the coder and tester
219
+ "success": bool # whether the code and tests meet the user request
220
+ }}
221
+ """
@@ -16,12 +16,12 @@ from scipy.spatial import distance # type: ignore
16
16
  from vision_agent.tools.tool_utils import _send_inference_request
17
17
  from vision_agent.utils import extract_frames_from_video
18
18
  from vision_agent.utils.image_utils import (
19
+ b64_to_pil,
19
20
  convert_to_b64,
21
+ denormalize_bbox,
22
+ get_image_size,
20
23
  normalize_bbox,
21
24
  rle_decode,
22
- b64_to_pil,
23
- get_image_size,
24
- denormalize_bbox,
25
25
  )
26
26
 
27
27
  COLORS = [
@@ -4,6 +4,7 @@
4
4
  import base64 as b64
5
5
  import io
6
6
  import re
7
+ from time import sleep
7
8
  from typing import Dict, List, Tuple
8
9
 
9
10
  import nbformat
@@ -75,6 +76,7 @@ class Execute:
75
76
  self.terminate()
76
77
  self.nb = nbformat.v4.new_notebook()
77
78
  self.nb_client = NotebookClient(self.nb, timeout=self.timeout)
79
+ sleep(1)
78
80
  self.build()
79
81
 
80
82
  def run_cell(self, cell: NotebookNode, cell_index: int) -> Tuple[bool, str]:
@@ -83,6 +85,7 @@ class Execute:
83
85
  return parse_outputs(self.nb.cells[-1].outputs)
84
86
  except CellTimeoutError:
85
87
  run_sync(self.nb_client.km.interrupt_kernel)() # type: ignore
88
+ sleep(1)
86
89
  return False, "Cell execution timed out."
87
90
  except DeadKernelError:
88
91
  self.reset()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.25
3
+ Version: 0.2.27
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -1,5 +1,5 @@
1
1
  vision_agent/__init__.py,sha256=GVLHCeK_R-zgldpbcPmOzJat-BkadvkuRCMxDvTIcXs,108
2
- vision_agent/agent/__init__.py,sha256=Zv8lc91mPy0iDySId38_vc4mo56JQ9mCMvUWdAKQjh0,206
2
+ vision_agent/agent/__init__.py,sha256=jpmL6z5e4PFfQM21JbSsRwcERRXn58XFmURAMwWeoRM,249
3
3
  vision_agent/agent/agent.py,sha256=X7kON-g9ePUKumCDaYfQNBX_MEFE-ax5PnRp7-Cc5Wo,529
4
4
  vision_agent/agent/agent_coder.py,sha256=4iB732bX4wDnPAuyYBk6HWlf4aFq2l9EcL695qfDIXw,7004
5
5
  vision_agent/agent/agent_coder_prompts.py,sha256=CJe3v7xvHQ32u3RQAXQga_Tk_4UgU64RBAMHZ3S70KY,5538
@@ -9,8 +9,10 @@ vision_agent/agent/reflexion.py,sha256=4gz30BuFMeGxSsTzoDV4p91yE0R8LISXp28IaOI6w
9
9
  vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
10
10
  vision_agent/agent/vision_agent.py,sha256=pnx7gtTPazR7Dck5_kfZC3S3QWKu4e28YVigzOicOX0,27130
11
11
  vision_agent/agent/vision_agent_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47bfOL3-Zn6NiC0,8573
12
- vision_agent/agent/vision_agent_v2.py,sha256=3qjvaj-yyrXmoY_cecUsiuY4Rn6MmJanFZeoXFJRK2c,13229
13
- vision_agent/agent/vision_agent_v2_prompt.py,sha256=b_0BMq6GrbGfl09MHrv4mj-mqyE1FxMl3Xq44qD4S1E,6161
12
+ vision_agent/agent/vision_agent_v2.py,sha256=eQS5w0aURWWCc0x1dqlApep65DKttePR-ZQPSxkWuvw,14487
13
+ vision_agent/agent/vision_agent_v2_prompts.py,sha256=b_0BMq6GrbGfl09MHrv4mj-mqyE1FxMl3Xq44qD4S1E,6161
14
+ vision_agent/agent/vision_agent_v3.py,sha256=EGA3zQKVIVdDlZOWwZNgueMnlqKqNwGvSc9v_XM-b34,9696
15
+ vision_agent/agent/vision_agent_v3_prompts.py,sha256=LRZBKObeb0Bs48vo7vtB2M8loPO1lQzruH-3IiMS5ts,7484
14
16
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
17
  vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
16
18
  vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,48
@@ -21,14 +23,14 @@ vision_agent/tools/__init__.py,sha256=dRHXGpjhItXZRQs0r_l3Z3bQIreaZaYP0CJrl8mOJx
21
23
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
22
24
  vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
23
25
  vision_agent/tools/tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
24
- vision_agent/tools/tools_v2.py,sha256=Tdam-cWBI4ipXWwGyxim-SK07zP97_hcdUtYd1a4CnI,21404
26
+ vision_agent/tools/tools_v2.py,sha256=3Bv1xuZFoPjaCb-VixF5Vl3uoyac03571FXUzBI8FBQ,21404
25
27
  vision_agent/utils/__init__.py,sha256=xsHFyJSDbLdonB9Dh74cwZnVTiT__2OQF3Brd3Nmglc,116
26
- vision_agent/utils/execute.py,sha256=RC_jKrm2kOWwzNe9xKuA2xJcbsNcD0Hb95_o3_Le0_E,3820
28
+ vision_agent/utils/execute.py,sha256=8_SfK-IkHH4lXF0JVyV7sDFszZn9HKsh1bFITKGCJ1g,3881
27
29
  vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
28
30
  vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
29
31
  vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
30
32
  vision_agent/utils/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
31
- vision_agent-0.2.25.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
32
- vision_agent-0.2.25.dist-info/METADATA,sha256=5bycdwOp0pnRpUBQo_JM1c1Abq2fmWJcVYE_7YgtoUY,9212
33
- vision_agent-0.2.25.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
34
- vision_agent-0.2.25.dist-info/RECORD,,
33
+ vision_agent-0.2.27.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
+ vision_agent-0.2.27.dist-info/METADATA,sha256=rnWYNUve9b4hBvZp5hlNCz_B_7PMb5mhjj_zo6al-O0,9212
35
+ vision_agent-0.2.27.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
36
+ vision_agent-0.2.27.dist-info/RECORD,,