vision-agent 0.2.27__tar.gz → 0.2.29__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vision_agent-0.2.27 → vision_agent-0.2.29}/PKG-INFO +1 -1
- {vision_agent-0.2.27 → vision_agent-0.2.29}/pyproject.toml +1 -1
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/agent/agent.py +2 -2
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/agent/agent_coder.py +3 -3
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/agent/vision_agent.py +23 -15
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/agent/vision_agent_v2.py +57 -15
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/agent/vision_agent_v3.py +99 -18
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/agent/vision_agent_v3_prompts.py +8 -3
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/tools/tools_v2.py +6 -3
- {vision_agent-0.2.27 → vision_agent-0.2.29}/LICENSE +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/README.md +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/agent/agent_coder_prompts.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/agent/easytool.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/agent/easytool_prompts.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/agent/reflexion.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/agent/reflexion_prompts.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/agent/vision_agent_v2_prompts.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/llm/__init__.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/llm/llm.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/tools/__init__.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/tools/tools.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.27 → vision_agent-0.2.29}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
from abc import ABC, abstractmethod
|
2
2
|
from pathlib import Path
|
3
|
-
from typing import Dict, List, Optional, Union
|
3
|
+
from typing import Dict, List, Optional, Union, Any
|
4
4
|
|
5
5
|
|
6
6
|
class Agent(ABC):
|
@@ -13,7 +13,7 @@ class Agent(ABC):
|
|
13
13
|
pass
|
14
14
|
|
15
15
|
@abstractmethod
|
16
|
-
def log_progress(self,
|
16
|
+
def log_progress(self, data: Dict[str, Any]) -> None:
|
17
17
|
"""Log the progress of the agent.
|
18
18
|
This is a hook that is intended for reporting the progress of the agent.
|
19
19
|
"""
|
@@ -3,7 +3,7 @@ import logging
|
|
3
3
|
import os
|
4
4
|
import sys
|
5
5
|
from pathlib import Path
|
6
|
-
from typing import Dict, List, Optional, Union
|
6
|
+
from typing import Dict, List, Optional, Union, Any
|
7
7
|
|
8
8
|
from rich.console import Console
|
9
9
|
from rich.syntax import Syntax
|
@@ -206,5 +206,5 @@ class AgentCoder(Agent):
|
|
206
206
|
|
207
207
|
return f"{IMPORT_HELPER}\n{code}"
|
208
208
|
|
209
|
-
def log_progress(self,
|
210
|
-
_LOGGER.info(
|
209
|
+
def log_progress(self, data: Dict[str, Any]) -> None:
|
210
|
+
_LOGGER.info(data)
|
@@ -451,7 +451,7 @@ class VisionAgent(Agent):
|
|
451
451
|
reflect_model: Optional[Union[LLM, LMM]] = None,
|
452
452
|
max_retries: int = 2,
|
453
453
|
verbose: bool = False,
|
454
|
-
report_progress_callback: Optional[Callable[[str], None]] = None,
|
454
|
+
report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
|
455
455
|
):
|
456
456
|
"""VisionAgent constructor.
|
457
457
|
|
@@ -518,23 +518,23 @@ class VisionAgent(Agent):
|
|
518
518
|
self_reflection=self_reflection,
|
519
519
|
)
|
520
520
|
|
521
|
-
def log_progress(self,
|
522
|
-
_LOGGER.info(
|
521
|
+
def log_progress(self, data: Dict[str, Any]) -> None:
|
522
|
+
_LOGGER.info(data)
|
523
523
|
if self.report_progress_callback:
|
524
|
-
self.report_progress_callback(
|
524
|
+
self.report_progress_callback(data)
|
525
525
|
|
526
526
|
def _report_visualization_via_callback(
|
527
527
|
self, images: Sequence[Union[str, Path]]
|
528
528
|
) -> None:
|
529
529
|
"""This is intended for streaming the visualization images via the callback to the client side."""
|
530
530
|
if self.report_progress_callback:
|
531
|
-
self.report_progress_callback("<VIZ>")
|
531
|
+
self.report_progress_callback({"log": "<VIZ>"})
|
532
532
|
if images:
|
533
533
|
for img in images:
|
534
534
|
self.report_progress_callback(
|
535
|
-
f"<IMG>base:64{convert_to_b64(img)}</IMG>"
|
535
|
+
{"log": f"<IMG>base:64{convert_to_b64(img)}</IMG>"}
|
536
536
|
)
|
537
|
-
self.report_progress_callback("</VIZ>")
|
537
|
+
self.report_progress_callback({"log": "</VIZ>"})
|
538
538
|
|
539
539
|
def chat_with_workflow(
|
540
540
|
self,
|
@@ -618,8 +618,8 @@ class VisionAgent(Agent):
|
|
618
618
|
tool_results["answer"] = answer
|
619
619
|
all_tool_results.append(tool_results)
|
620
620
|
|
621
|
-
self.log_progress(f"\tCall Result: {call_results}")
|
622
|
-
self.log_progress(f"\tAnswer: {answer}")
|
621
|
+
self.log_progress({"log": f"\tCall Result: {call_results}"})
|
622
|
+
self.log_progress({"log": f"\tAnswer: {answer}"})
|
623
623
|
answers.append({"task": task_str, "answer": answer})
|
624
624
|
task_depend[task["id"]]["answer"] = answer # type: ignore
|
625
625
|
task_depend[task["id"]]["call_result"] = call_results # type: ignore
|
@@ -644,18 +644,22 @@ class VisionAgent(Agent):
|
|
644
644
|
final_answer,
|
645
645
|
reflection_images,
|
646
646
|
)
|
647
|
-
self.log_progress(f"Reflection: {reflection}")
|
647
|
+
self.log_progress({"log": f"Reflection: {reflection}"})
|
648
648
|
parsed_reflection = parse_reflect(reflection)
|
649
649
|
if parsed_reflection["Finish"]:
|
650
650
|
break
|
651
651
|
else:
|
652
652
|
reflections += "\n" + parsed_reflection["Reflection"]
|
653
653
|
else:
|
654
|
-
self.log_progress(
|
654
|
+
self.log_progress(
|
655
|
+
{"log": "Self Reflection skipped based on user request."}
|
656
|
+
)
|
655
657
|
break
|
656
658
|
# '<ANSWER>' is a symbol to indicate the end of the chat, which is useful for streaming logs.
|
657
659
|
self.log_progress(
|
658
|
-
|
660
|
+
{
|
661
|
+
"log": f"The Vision Agent has concluded this chat. <ANSWER>{final_answer}</ANSWER>"
|
662
|
+
}
|
659
663
|
)
|
660
664
|
|
661
665
|
if visualize_output:
|
@@ -718,8 +722,10 @@ class VisionAgent(Agent):
|
|
718
722
|
}
|
719
723
|
|
720
724
|
self.log_progress(
|
721
|
-
|
725
|
+
{
|
726
|
+
"log": f"""Going to run the following tool(s) in sequence:
|
722
727
|
{tabulate(tabular_data=[tool_results], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
|
728
|
+
}
|
723
729
|
)
|
724
730
|
|
725
731
|
def parse_tool_results(result: Dict[str, Union[Dict, List]]) -> Any:
|
@@ -764,7 +770,9 @@ class VisionAgent(Agent):
|
|
764
770
|
else:
|
765
771
|
task_list = []
|
766
772
|
self.log_progress(
|
767
|
-
|
768
|
-
|
773
|
+
{
|
774
|
+
"log": "Planned tasks:",
|
775
|
+
"plan": task_list,
|
776
|
+
}
|
769
777
|
)
|
770
778
|
return task_list
|
@@ -165,7 +165,7 @@ def write_and_exec_code(
|
|
165
165
|
tool_info: str,
|
166
166
|
exec: Execute,
|
167
167
|
retrieved_ltm: str,
|
168
|
-
log_progress: Callable[
|
168
|
+
log_progress: Callable[[Dict[str, Any]], None],
|
169
169
|
max_retry: int = 3,
|
170
170
|
verbosity: int = 0,
|
171
171
|
) -> Tuple[bool, str, str, Dict[str, List[str]]]:
|
@@ -179,7 +179,23 @@ def write_and_exec_code(
|
|
179
179
|
success, result = exec.run_isolation(code)
|
180
180
|
if verbosity == 2:
|
181
181
|
_CONSOLE.print(Syntax(code, "python", theme="gruvbox-dark", line_numbers=True))
|
182
|
-
log_progress(
|
182
|
+
log_progress(
|
183
|
+
{
|
184
|
+
"log": f"Code success: {success}",
|
185
|
+
}
|
186
|
+
)
|
187
|
+
log_progress(
|
188
|
+
{
|
189
|
+
"log": "Code:",
|
190
|
+
"code": code,
|
191
|
+
}
|
192
|
+
)
|
193
|
+
log_progress(
|
194
|
+
{
|
195
|
+
"log": "Result:",
|
196
|
+
"result": str(result),
|
197
|
+
}
|
198
|
+
)
|
183
199
|
_LOGGER.info(f"\tCode success: {success}, result: {str(result)}")
|
184
200
|
working_memory: Dict[str, List[str]] = {}
|
185
201
|
while not success and counter < max_retry:
|
@@ -206,7 +222,18 @@ def write_and_exec_code(
|
|
206
222
|
_CONSOLE.print(
|
207
223
|
Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
|
208
224
|
)
|
209
|
-
log_progress(
|
225
|
+
log_progress(
|
226
|
+
{
|
227
|
+
"log": "Debugging reflection:",
|
228
|
+
"reflection": reflection,
|
229
|
+
}
|
230
|
+
)
|
231
|
+
log_progress(
|
232
|
+
{
|
233
|
+
"log": "Result:",
|
234
|
+
"result": result,
|
235
|
+
}
|
236
|
+
)
|
210
237
|
_LOGGER.info(f"\tDebugging reflection: {reflection}, result: {result}")
|
211
238
|
|
212
239
|
if success:
|
@@ -227,7 +254,7 @@ def run_plan(
|
|
227
254
|
exec: Execute,
|
228
255
|
code: str,
|
229
256
|
tool_recommender: Sim,
|
230
|
-
log_progress: Callable[
|
257
|
+
log_progress: Callable[[Dict[str, Any]], None],
|
231
258
|
long_term_memory: Optional[Sim] = None,
|
232
259
|
verbosity: int = 0,
|
233
260
|
) -> Tuple[str, str, List[Dict[str, Any]], Dict[str, List[str]]]:
|
@@ -239,8 +266,7 @@ def run_plan(
|
|
239
266
|
|
240
267
|
for task in active_plan:
|
241
268
|
log_progress(
|
242
|
-
|
243
|
-
{tabulate(tabular_data=[task], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
|
269
|
+
{"log": "Going to run the following task(s) in sequence:", "task": task}
|
244
270
|
)
|
245
271
|
_LOGGER.info(
|
246
272
|
f"""
|
@@ -250,7 +276,7 @@ def run_plan(
|
|
250
276
|
tool_info = "\n".join([e["doc"] for e in tools])
|
251
277
|
|
252
278
|
if verbosity == 2:
|
253
|
-
log_progress(f"Tools retrieved: {[e['desc'] for e in tools]}")
|
279
|
+
log_progress({"log": f"Tools retrieved: {[e['desc'] for e in tools]}"})
|
254
280
|
_LOGGER.info(f"Tools retrieved: {[e['desc'] for e in tools]}")
|
255
281
|
|
256
282
|
if long_term_memory is not None:
|
@@ -282,7 +308,17 @@ def run_plan(
|
|
282
308
|
Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
|
283
309
|
)
|
284
310
|
|
285
|
-
log_progress(
|
311
|
+
log_progress(
|
312
|
+
{
|
313
|
+
"log": f"Code success: {success}",
|
314
|
+
}
|
315
|
+
)
|
316
|
+
log_progress(
|
317
|
+
{
|
318
|
+
"log": "Result:",
|
319
|
+
"result": str(result),
|
320
|
+
}
|
321
|
+
)
|
286
322
|
_LOGGER.info(f"\tCode success: {success} result: {str(result)}")
|
287
323
|
|
288
324
|
task["success"] = success
|
@@ -320,7 +356,7 @@ class VisionAgentV2(Agent):
|
|
320
356
|
tool_recommender: Optional[Sim] = None,
|
321
357
|
long_term_memory: Optional[Sim] = None,
|
322
358
|
verbosity: int = 0,
|
323
|
-
report_progress_callback: Optional[Callable[
|
359
|
+
report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
|
324
360
|
) -> None:
|
325
361
|
self.planner = OpenAILLM(temperature=0.0, json_mode=True)
|
326
362
|
self.coder = OpenAILLM(temperature=0.0)
|
@@ -376,8 +412,10 @@ class VisionAgentV2(Agent):
|
|
376
412
|
|
377
413
|
user_req, plan = write_plan(chat, plan, TOOL_DESCRIPTIONS, self.planner)
|
378
414
|
self.log_progress(
|
379
|
-
|
380
|
-
|
415
|
+
{
|
416
|
+
"log": "Plans:",
|
417
|
+
"plan": plan,
|
418
|
+
}
|
381
419
|
)
|
382
420
|
_LOGGER.info(
|
383
421
|
f"""Plan:
|
@@ -412,8 +450,12 @@ class VisionAgentV2(Agent):
|
|
412
450
|
|
413
451
|
retries += 1
|
414
452
|
|
415
|
-
self.log_progress(
|
416
|
-
|
453
|
+
self.log_progress(
|
454
|
+
{
|
455
|
+
"log": f"The Vision Agent V2 has concluded this chat.\nSuccess: {success}",
|
456
|
+
"finished": True,
|
457
|
+
}
|
458
|
+
)
|
417
459
|
|
418
460
|
return {
|
419
461
|
"code": working_code,
|
@@ -423,7 +465,7 @@ class VisionAgentV2(Agent):
|
|
423
465
|
"plan": plan,
|
424
466
|
}
|
425
467
|
|
426
|
-
def log_progress(self,
|
468
|
+
def log_progress(self, data: Dict[str, Any]) -> None:
|
427
469
|
if self.report_progress_callback is not None:
|
428
|
-
self.report_progress_callback(
|
470
|
+
self.report_progress_callback(data)
|
429
471
|
pass
|
@@ -3,7 +3,7 @@ import json
|
|
3
3
|
import logging
|
4
4
|
import sys
|
5
5
|
from pathlib import Path
|
6
|
-
from typing import Any, Dict, List, Optional, Union, cast
|
6
|
+
from typing import Any, Dict, List, Optional, Union, cast, Callable, no_type_check
|
7
7
|
|
8
8
|
from rich.console import Console
|
9
9
|
from rich.syntax import Syntax
|
@@ -114,8 +114,10 @@ def write_and_test_code(
|
|
114
114
|
coder: LLM,
|
115
115
|
tester: LLM,
|
116
116
|
debugger: LLM,
|
117
|
+
log_progress: Callable[[Dict[str, Any]], None],
|
117
118
|
verbosity: int = 0,
|
118
119
|
max_retries: int = 3,
|
120
|
+
input_media: Optional[Union[str, Path]] = None,
|
119
121
|
) -> Dict[str, Any]:
|
120
122
|
code = extract_code(
|
121
123
|
coder(CODE.format(docstring=tool_info, question=task, feedback=working_memory))
|
@@ -123,18 +125,40 @@ def write_and_test_code(
|
|
123
125
|
test = extract_code(
|
124
126
|
tester(
|
125
127
|
SIMPLE_TEST.format(
|
126
|
-
docstring=tool_utils,
|
128
|
+
docstring=tool_utils,
|
129
|
+
question=task,
|
130
|
+
code=code,
|
131
|
+
feedback=working_memory,
|
132
|
+
media=input_media,
|
127
133
|
)
|
128
134
|
)
|
129
135
|
)
|
130
136
|
|
131
137
|
success, result = _EXECUTE.run_isolation(f"{code}\n{test}")
|
132
138
|
if verbosity == 2:
|
133
|
-
_LOGGER.info("
|
139
|
+
_LOGGER.info("Initial code and tests:")
|
140
|
+
log_progress(
|
141
|
+
{
|
142
|
+
"log": "Code:",
|
143
|
+
"code": code,
|
144
|
+
}
|
145
|
+
)
|
146
|
+
log_progress(
|
147
|
+
{
|
148
|
+
"log": "Test:",
|
149
|
+
"code": test,
|
150
|
+
}
|
151
|
+
)
|
134
152
|
_CONSOLE.print(
|
135
153
|
Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
|
136
154
|
)
|
137
|
-
|
155
|
+
log_progress(
|
156
|
+
{
|
157
|
+
"log": "Result:",
|
158
|
+
"result": result,
|
159
|
+
}
|
160
|
+
)
|
161
|
+
_LOGGER.info(f"Initial result: {result}")
|
138
162
|
|
139
163
|
count = 0
|
140
164
|
new_working_memory = []
|
@@ -156,6 +180,12 @@ def write_and_test_code(
|
|
156
180
|
|
157
181
|
success, result = _EXECUTE.run_isolation(f"{code}\n{test}")
|
158
182
|
if verbosity == 2:
|
183
|
+
log_progress(
|
184
|
+
{
|
185
|
+
"log": f"Debug attempt {count + 1}, reflection:",
|
186
|
+
"result": fixed_code_and_test["reflections"],
|
187
|
+
}
|
188
|
+
)
|
159
189
|
_LOGGER.info(
|
160
190
|
f"Debug attempt {count + 1}, reflection: {fixed_code_and_test['reflections']}"
|
161
191
|
)
|
@@ -164,25 +194,36 @@ def write_and_test_code(
|
|
164
194
|
f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True
|
165
195
|
)
|
166
196
|
)
|
197
|
+
log_progress(
|
198
|
+
{
|
199
|
+
"log": "Debug result:",
|
200
|
+
"result": result,
|
201
|
+
}
|
202
|
+
)
|
167
203
|
_LOGGER.info(f"Debug result: {result}")
|
168
204
|
count += 1
|
169
205
|
|
170
|
-
if verbosity
|
206
|
+
if verbosity >= 1:
|
207
|
+
_LOGGER.info("Final code and tests:")
|
171
208
|
_CONSOLE.print(
|
172
209
|
Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
|
173
210
|
)
|
174
|
-
_LOGGER.info(f"Result: {result}")
|
211
|
+
_LOGGER.info(f"Final Result: {result}")
|
175
212
|
|
176
213
|
return {
|
177
214
|
"code": code,
|
178
215
|
"test": test,
|
179
216
|
"success": success,
|
217
|
+
"test_result": result,
|
180
218
|
"working_memory": new_working_memory,
|
181
219
|
}
|
182
220
|
|
183
221
|
|
184
222
|
def retrieve_tools(
|
185
|
-
plan: List[Dict[str, str]],
|
223
|
+
plan: List[Dict[str, str]],
|
224
|
+
tool_recommender: Sim,
|
225
|
+
log_progress: Callable[[Dict[str, Any]], None],
|
226
|
+
verbosity: int = 0,
|
186
227
|
) -> str:
|
187
228
|
tool_info = []
|
188
229
|
tool_desc = []
|
@@ -191,6 +232,12 @@ def retrieve_tools(
|
|
191
232
|
tool_info.extend([e["doc"] for e in tools])
|
192
233
|
tool_desc.extend([e["desc"] for e in tools])
|
193
234
|
if verbosity == 2:
|
235
|
+
log_progress(
|
236
|
+
{
|
237
|
+
"log": "Retrieved tools:",
|
238
|
+
"tools": tool_desc,
|
239
|
+
}
|
240
|
+
)
|
194
241
|
_LOGGER.info(f"Tools: {tool_desc}")
|
195
242
|
tool_info_set = set(tool_info)
|
196
243
|
return "\n\n".join(tool_info_set)
|
@@ -206,6 +253,7 @@ class VisionAgentV3(Agent):
|
|
206
253
|
debugger: Optional[LLM] = None,
|
207
254
|
tool_recommender: Optional[Sim] = None,
|
208
255
|
verbosity: int = 0,
|
256
|
+
report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
|
209
257
|
) -> None:
|
210
258
|
self.planner = (
|
211
259
|
OpenAILLM(temperature=0.0, json_mode=True) if planner is None else planner
|
@@ -222,22 +270,26 @@ class VisionAgentV3(Agent):
|
|
222
270
|
else tool_recommender
|
223
271
|
)
|
224
272
|
self.verbosity = verbosity
|
225
|
-
self.max_retries =
|
273
|
+
self.max_retries = 2
|
274
|
+
self.report_progress_callback = report_progress_callback
|
226
275
|
|
276
|
+
@no_type_check
|
227
277
|
def __call__(
|
228
278
|
self,
|
229
279
|
input: Union[List[Dict[str, str]], str],
|
230
280
|
image: Optional[Union[str, Path]] = None,
|
231
|
-
) -> str:
|
281
|
+
) -> Dict[str, Any]:
|
232
282
|
if isinstance(input, str):
|
233
283
|
input = [{"role": "user", "content": input}]
|
234
284
|
results = self.chat_with_workflow(input, image)
|
235
|
-
|
285
|
+
results.pop("working_memory")
|
286
|
+
return results
|
236
287
|
|
237
288
|
def chat_with_workflow(
|
238
289
|
self,
|
239
290
|
chat: List[Dict[str, str]],
|
240
291
|
image: Optional[Union[str, Path]] = None,
|
292
|
+
self_reflection: bool = False,
|
241
293
|
) -> Dict[str, Any]:
|
242
294
|
if len(chat) == 0:
|
243
295
|
raise ValueError("Chat cannot be empty.")
|
@@ -260,7 +312,14 @@ class VisionAgentV3(Agent):
|
|
260
312
|
chat, TOOL_DESCRIPTIONS, format_memory(working_memory), self.planner
|
261
313
|
)
|
262
314
|
plan_i_str = "\n-".join([e["instructions"] for e in plan_i])
|
263
|
-
if self.verbosity
|
315
|
+
if self.verbosity >= 1:
|
316
|
+
self.log_progress(
|
317
|
+
{
|
318
|
+
"log": "Going to run the following plan(s) in sequence:\n",
|
319
|
+
"plan": plan_i,
|
320
|
+
}
|
321
|
+
)
|
322
|
+
|
264
323
|
_LOGGER.info(
|
265
324
|
f"""
|
266
325
|
{tabulate(tabular_data=plan_i, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
|
@@ -269,6 +328,7 @@ class VisionAgentV3(Agent):
|
|
269
328
|
tool_info = retrieve_tools(
|
270
329
|
plan_i,
|
271
330
|
self.tool_recommender,
|
331
|
+
self.log_progress,
|
272
332
|
self.verbosity,
|
273
333
|
)
|
274
334
|
results = write_and_test_code(
|
@@ -279,7 +339,9 @@ class VisionAgentV3(Agent):
|
|
279
339
|
self.coder,
|
280
340
|
self.tester,
|
281
341
|
self.debugger,
|
342
|
+
self.log_progress,
|
282
343
|
verbosity=self.verbosity,
|
344
|
+
input_media=image,
|
283
345
|
)
|
284
346
|
success = cast(bool, results["success"])
|
285
347
|
code = cast(str, results["code"])
|
@@ -287,19 +349,38 @@ class VisionAgentV3(Agent):
|
|
287
349
|
working_memory.extend(results["working_memory"]) # type: ignore
|
288
350
|
plan.append({"code": code, "test": test, "plan": plan_i})
|
289
351
|
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
352
|
+
if self_reflection:
|
353
|
+
reflection = reflect(chat, plan_i_str, code, self.planner)
|
354
|
+
if self.verbosity > 0:
|
355
|
+
self.log_progress(
|
356
|
+
{
|
357
|
+
"log": "Reflection:",
|
358
|
+
"reflection": reflection,
|
359
|
+
}
|
360
|
+
)
|
361
|
+
_LOGGER.info(f"Reflection: {reflection}")
|
362
|
+
feedback = cast(str, reflection["feedback"])
|
363
|
+
success = cast(bool, reflection["success"])
|
364
|
+
working_memory.append({"code": f"{code}\n{test}", "feedback": feedback})
|
365
|
+
|
366
|
+
retries += 1
|
367
|
+
|
368
|
+
self.log_progress(
|
369
|
+
{
|
370
|
+
"log": f"The Vision Agent V3 has concluded this chat.\nSuccess: {success}",
|
371
|
+
"finished": True,
|
372
|
+
}
|
373
|
+
)
|
296
374
|
|
297
375
|
return {
|
298
376
|
"code": code,
|
299
377
|
"test": test,
|
378
|
+
"test_result": results["test_result"],
|
300
379
|
"plan": plan,
|
301
380
|
"working_memory": working_memory,
|
302
381
|
}
|
303
382
|
|
304
|
-
def log_progress(self,
|
383
|
+
def log_progress(self, data: Dict[str, Any]) -> None:
|
384
|
+
if self.report_progress_callback is not None:
|
385
|
+
self.report_progress_callback(data)
|
305
386
|
pass
|
@@ -61,6 +61,7 @@ This is the documentation for the functions you have access to. You may call any
|
|
61
61
|
2. **Algorithm/Method Selection**: Decide on the most efficient way.
|
62
62
|
3. **Pseudocode Creation**: Write down the steps you will follow in pseudocode.
|
63
63
|
4. **Code Generation**: Translate your pseudocode into executable Python code.
|
64
|
+
5. **Logging**: Log the output of the custom functions that were provided to you from `from vision_agent.tools.tools_v2 import *`. Use a debug flag in the function parameters to toggle logging on and off.
|
64
65
|
"""
|
65
66
|
|
66
67
|
TEST = """
|
@@ -149,7 +150,7 @@ This is the documentation for the functions you have access to. You may call any
|
|
149
150
|
|
150
151
|
**Input Code Snippet**:
|
151
152
|
```python
|
152
|
-
### Please
|
153
|
+
### Please decide how would you want to generate test cases. Based on incomplete code or completed version.
|
153
154
|
{code}
|
154
155
|
```
|
155
156
|
|
@@ -159,8 +160,12 @@ This is the documentation for the functions you have access to. You may call any
|
|
159
160
|
**Instructions**:
|
160
161
|
1. Verify the fundamental functionality under normal conditions.
|
161
162
|
2. Ensure each test case is well-documented with comments explaining the scenario it covers.
|
162
|
-
3.
|
163
|
-
4. DO NOT
|
163
|
+
3. Your test case MUST run only on the given image which is {media}
|
164
|
+
4. DO NOT use any non-existent or dummy image or video files that are not provided by the user's instructions.
|
165
|
+
5. DO NOT mock any functions, you must test their functionality as is.
|
166
|
+
6. DO NOT assert the output value, run the code and verify it runs without any errors and assert only the output format or data structure.
|
167
|
+
7. DO NOT import the testing function as it will available in the testing environment.
|
168
|
+
8. Print the output of the function that is being tested.
|
164
169
|
"""
|
165
170
|
|
166
171
|
|
@@ -416,12 +416,15 @@ def closest_mask_distance(mask1: np.ndarray, mask2: np.ndarray) -> float:
|
|
416
416
|
return cast(float, np.min(dist_matrix))
|
417
417
|
|
418
418
|
|
419
|
-
def closest_box_distance(
|
419
|
+
def closest_box_distance(
|
420
|
+
box1: List[float], box2: List[float], image_size: Tuple[int, int]
|
421
|
+
) -> float:
|
420
422
|
"""'closest_box_distance' calculates the closest distance between two bounding boxes.
|
421
423
|
|
422
424
|
Parameters:
|
423
425
|
box1 (List[float]): The first bounding box.
|
424
426
|
box2 (List[float]): The second bounding box.
|
427
|
+
image_size (Tuple[int, int]): The size of the image given as (height, width).
|
425
428
|
|
426
429
|
Returns:
|
427
430
|
float: The closest distance between the two bounding boxes.
|
@@ -432,8 +435,8 @@ def closest_box_distance(box1: List[float], box2: List[float]) -> float:
|
|
432
435
|
141.42
|
433
436
|
"""
|
434
437
|
|
435
|
-
x11, y11, x12, y12 = box1
|
436
|
-
x21, y21, x22, y22 = box2
|
438
|
+
x11, y11, x12, y12 = denormalize_bbox(box1, image_size)
|
439
|
+
x21, y21, x22, y22 = denormalize_bbox(box2, image_size)
|
437
440
|
|
438
441
|
horizontal_distance = np.max([0, x21 - x12, x11 - x22])
|
439
442
|
vertical_distance = np.max([0, y21 - y12, y11 - y22])
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|