vision-agent 0.2.81__py3-none-any.whl → 0.2.83__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/vision_agent.py +90 -85
- {vision_agent-0.2.81.dist-info → vision_agent-0.2.83.dist-info}/METADATA +1 -1
- {vision_agent-0.2.81.dist-info → vision_agent-0.2.83.dist-info}/RECORD +5 -5
- {vision_agent-0.2.81.dist-info → vision_agent-0.2.83.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.81.dist-info → vision_agent-0.2.83.dist-info}/WHEEL +0 -0
@@ -176,6 +176,7 @@ def pick_plan(
|
|
176
176
|
model: LMM,
|
177
177
|
code_interpreter: CodeInterpreter,
|
178
178
|
verbosity: int = 0,
|
179
|
+
max_retries: int = 3,
|
179
180
|
) -> Tuple[str, str]:
|
180
181
|
chat = copy.deepcopy(chat)
|
181
182
|
if chat[-1]["role"] != "user":
|
@@ -192,13 +193,13 @@ def pick_plan(
|
|
192
193
|
if len(tool_output.logs.stdout) > 0:
|
193
194
|
tool_output_str = tool_output.logs.stdout[0]
|
194
195
|
|
195
|
-
if verbosity
|
196
|
+
if verbosity == 2:
|
196
197
|
_print_code("Initial code and tests:", code)
|
197
198
|
_LOGGER.info(f"Initial code execution result:\n{tool_output.text()}")
|
198
199
|
|
199
200
|
# retry if the tool output is empty or code fails
|
200
|
-
count =
|
201
|
-
while (not tool_output.success or tool_output_str == "") and count <
|
201
|
+
count = 0
|
202
|
+
while (not tool_output.success or tool_output_str == "") and count < max_retries:
|
202
203
|
prompt = TEST_PLANS.format(
|
203
204
|
docstring=tool_info,
|
204
205
|
plans=plan_str,
|
@@ -214,12 +215,15 @@ def pick_plan(
|
|
214
215
|
if len(tool_output.logs.stdout) > 0:
|
215
216
|
tool_output_str = tool_output.logs.stdout[0]
|
216
217
|
|
217
|
-
if verbosity ==
|
218
|
+
if verbosity == 2:
|
218
219
|
_print_code("Code and test after attempted fix:", code)
|
219
220
|
_LOGGER.info(f"Code execution result after attempte {count}")
|
220
221
|
|
221
222
|
count += 1
|
222
223
|
|
224
|
+
if verbosity >= 1:
|
225
|
+
_print_code("Final code:", code)
|
226
|
+
|
223
227
|
user_req = chat[-1]["content"]
|
224
228
|
context = USER_REQ.format(user_request=user_req)
|
225
229
|
# because the tool picker model gets the image as well, we have to be careful with
|
@@ -408,7 +412,7 @@ def debug_code(
|
|
408
412
|
FIX_BUG.format(
|
409
413
|
code=code,
|
410
414
|
tests=test,
|
411
|
-
result="\n".join(result.text().splitlines()[-
|
415
|
+
result="\n".join(result.text().splitlines()[-100:]),
|
412
416
|
feedback=format_memory(working_memory + new_working_memory),
|
413
417
|
)
|
414
418
|
)
|
@@ -655,7 +659,15 @@ class VisionAgent(Agent):
|
|
655
659
|
int_chat = cast(
|
656
660
|
List[Message],
|
657
661
|
[
|
658
|
-
|
662
|
+
(
|
663
|
+
{
|
664
|
+
"role": c["role"],
|
665
|
+
"content": c["content"],
|
666
|
+
"media": c["media"],
|
667
|
+
}
|
668
|
+
if "media" in c
|
669
|
+
else {"role": c["role"], "content": c["content"]}
|
670
|
+
)
|
659
671
|
for c in chat
|
660
672
|
],
|
661
673
|
)
|
@@ -665,92 +677,85 @@ class VisionAgent(Agent):
|
|
665
677
|
working_memory: List[Dict[str, str]] = []
|
666
678
|
results = {"code": "", "test": "", "plan": []}
|
667
679
|
plan = []
|
668
|
-
success = False
|
669
|
-
retries = 0
|
670
|
-
|
671
|
-
while not success and retries < self.max_retries:
|
672
|
-
self.log_progress(
|
673
|
-
{
|
674
|
-
"type": "plans",
|
675
|
-
"status": "started",
|
676
|
-
}
|
677
|
-
)
|
678
|
-
plans = write_plans(
|
679
|
-
int_chat,
|
680
|
-
T.TOOL_DESCRIPTIONS,
|
681
|
-
format_memory(working_memory),
|
682
|
-
self.planner,
|
683
|
-
)
|
684
680
|
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
best_plan, tool_output_str = pick_plan(
|
698
|
-
int_chat,
|
699
|
-
plans,
|
700
|
-
tool_infos["all"],
|
701
|
-
self.coder,
|
702
|
-
code_interpreter,
|
703
|
-
verbosity=self.verbosity,
|
704
|
-
)
|
681
|
+
self.log_progress(
|
682
|
+
{
|
683
|
+
"type": "plans",
|
684
|
+
"status": "started",
|
685
|
+
}
|
686
|
+
)
|
687
|
+
plans = write_plans(
|
688
|
+
int_chat,
|
689
|
+
T.TOOL_DESCRIPTIONS,
|
690
|
+
format_memory(working_memory),
|
691
|
+
self.planner,
|
692
|
+
)
|
705
693
|
|
706
|
-
|
707
|
-
|
708
|
-
tool_info = tool_infos[best_plan]
|
709
|
-
else:
|
710
|
-
if self.verbosity >= 1:
|
711
|
-
_LOGGER.warning(
|
712
|
-
f"Best plan {best_plan} not found in plans or tool_infos. Using the first plan and tool info."
|
713
|
-
)
|
714
|
-
k = list(plans.keys())[0]
|
715
|
-
plan_i = plans[k]
|
716
|
-
tool_info = tool_infos[k]
|
717
|
-
|
718
|
-
self.log_progress(
|
719
|
-
{
|
720
|
-
"type": "plans",
|
721
|
-
"status": "completed",
|
722
|
-
"payload": plan_i,
|
723
|
-
}
|
724
|
-
)
|
725
|
-
if self.verbosity >= 1:
|
694
|
+
if self.verbosity >= 1:
|
695
|
+
for p in plans:
|
726
696
|
_LOGGER.info(
|
727
|
-
f"
|
697
|
+
f"\n{tabulate(tabular_data=plans[p], headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
|
728
698
|
)
|
729
699
|
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
700
|
+
tool_infos = retrieve_tools(
|
701
|
+
plans,
|
702
|
+
self.tool_recommender,
|
703
|
+
self.log_progress,
|
704
|
+
self.verbosity,
|
705
|
+
)
|
706
|
+
best_plan, tool_output_str = pick_plan(
|
707
|
+
int_chat,
|
708
|
+
plans,
|
709
|
+
tool_infos["all"],
|
710
|
+
self.coder,
|
711
|
+
code_interpreter,
|
712
|
+
verbosity=self.verbosity,
|
713
|
+
)
|
714
|
+
|
715
|
+
if best_plan in plans and best_plan in tool_infos:
|
716
|
+
plan_i = plans[best_plan]
|
717
|
+
tool_info = tool_infos[best_plan]
|
718
|
+
else:
|
719
|
+
if self.verbosity >= 1:
|
720
|
+
_LOGGER.warning(
|
721
|
+
f"Best plan {best_plan} not found in plans or tool_infos. Using the first plan and tool info."
|
722
|
+
)
|
723
|
+
k = list(plans.keys())[0]
|
724
|
+
plan_i = plans[k]
|
725
|
+
tool_info = tool_infos[k]
|
726
|
+
|
727
|
+
self.log_progress(
|
728
|
+
{
|
729
|
+
"type": "plans",
|
730
|
+
"status": "completed",
|
731
|
+
"payload": plan_i,
|
732
|
+
}
|
733
|
+
)
|
734
|
+
if self.verbosity >= 1:
|
735
|
+
_LOGGER.info(
|
736
|
+
f"Picked best plan:\n{tabulate(tabular_data=plan_i, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
|
746
737
|
)
|
747
|
-
success = cast(bool, results["success"])
|
748
|
-
code = cast(str, results["code"])
|
749
|
-
test = cast(str, results["test"])
|
750
|
-
working_memory.extend(results["working_memory"]) # type: ignore
|
751
|
-
plan.append({"code": code, "test": test, "plan": plan_i})
|
752
738
|
|
753
|
-
|
739
|
+
results = write_and_test_code(
|
740
|
+
chat=[{"role": c["role"], "content": c["content"]} for c in int_chat],
|
741
|
+
plan="\n-" + "\n-".join([e["instructions"] for e in plan_i]),
|
742
|
+
tool_info=tool_info,
|
743
|
+
tool_output=tool_output_str,
|
744
|
+
tool_utils=T.UTILITIES_DOCSTRING,
|
745
|
+
working_memory=working_memory,
|
746
|
+
coder=self.coder,
|
747
|
+
tester=self.tester,
|
748
|
+
debugger=self.debugger,
|
749
|
+
code_interpreter=code_interpreter,
|
750
|
+
log_progress=self.log_progress,
|
751
|
+
verbosity=self.verbosity,
|
752
|
+
media=media_list,
|
753
|
+
)
|
754
|
+
success = cast(bool, results["success"])
|
755
|
+
code = cast(str, results["code"])
|
756
|
+
test = cast(str, results["test"])
|
757
|
+
working_memory.extend(results["working_memory"]) # type: ignore
|
758
|
+
plan.append({"code": code, "test": test, "plan": plan_i})
|
754
759
|
|
755
760
|
execution_result = cast(Execution, results["test_result"])
|
756
761
|
self.log_progress(
|
@@ -1,7 +1,7 @@
|
|
1
1
|
vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
2
2
|
vision_agent/agent/__init__.py,sha256=IUwfbPMcT8X_rnXMLmI8gJ4ltsHy_XSs9eLiKURJxeY,81
|
3
3
|
vision_agent/agent/agent.py,sha256=ZK-5lOtd9-eD9aWcXssJpnOyvZuO7_5hAmnb-6sWVe8,569
|
4
|
-
vision_agent/agent/vision_agent.py,sha256=
|
4
|
+
vision_agent/agent/vision_agent.py,sha256=fLCkqYJzk9SNtu8TzKBk0TLZrXDMTCqgI3FI-zkc-qs,28768
|
5
5
|
vision_agent/agent/vision_agent_prompts.py,sha256=brBV-SmzyzTG5M9nfV3R5xdYT_BUYOKzxNFmTa2Sp-o,11049
|
6
6
|
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
7
7
|
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
@@ -18,7 +18,7 @@ vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOk
|
|
18
18
|
vision_agent/utils/sim.py,sha256=1HTaiVaBiKeyXIy21IYGXlPw0TipOyw9FPOJDfyLI94,4409
|
19
19
|
vision_agent/utils/type_defs.py,sha256=QeQRRIlklZMWzxROcCn5ELxP89nYdXGydy1rAiSpZZw,1384
|
20
20
|
vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
|
21
|
-
vision_agent-0.2.
|
22
|
-
vision_agent-0.2.
|
23
|
-
vision_agent-0.2.
|
24
|
-
vision_agent-0.2.
|
21
|
+
vision_agent-0.2.83.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
22
|
+
vision_agent-0.2.83.dist-info/METADATA,sha256=SPanaWYdrDs5gulJR79jVU3pk6alRN9nu7RpwqEJzDU,9433
|
23
|
+
vision_agent-0.2.83.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
24
|
+
vision_agent-0.2.83.dist-info/RECORD,,
|
File without changes
|
File without changes
|