vision-agent 0.2.81__tar.gz → 0.2.83__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {vision_agent-0.2.81 → vision_agent-0.2.83}/PKG-INFO +1 -1
  2. {vision_agent-0.2.81 → vision_agent-0.2.83}/pyproject.toml +1 -1
  3. {vision_agent-0.2.81 → vision_agent-0.2.83}/vision_agent/agent/vision_agent.py +90 -85
  4. {vision_agent-0.2.81 → vision_agent-0.2.83}/LICENSE +0 -0
  5. {vision_agent-0.2.81 → vision_agent-0.2.83}/README.md +0 -0
  6. {vision_agent-0.2.81 → vision_agent-0.2.83}/vision_agent/__init__.py +0 -0
  7. {vision_agent-0.2.81 → vision_agent-0.2.83}/vision_agent/agent/__init__.py +0 -0
  8. {vision_agent-0.2.81 → vision_agent-0.2.83}/vision_agent/agent/agent.py +0 -0
  9. {vision_agent-0.2.81 → vision_agent-0.2.83}/vision_agent/agent/vision_agent_prompts.py +0 -0
  10. {vision_agent-0.2.81 → vision_agent-0.2.83}/vision_agent/fonts/__init__.py +0 -0
  11. {vision_agent-0.2.81 → vision_agent-0.2.83}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  12. {vision_agent-0.2.81 → vision_agent-0.2.83}/vision_agent/lmm/__init__.py +0 -0
  13. {vision_agent-0.2.81 → vision_agent-0.2.83}/vision_agent/lmm/lmm.py +0 -0
  14. {vision_agent-0.2.81 → vision_agent-0.2.83}/vision_agent/tools/__init__.py +0 -0
  15. {vision_agent-0.2.81 → vision_agent-0.2.83}/vision_agent/tools/prompts.py +0 -0
  16. {vision_agent-0.2.81 → vision_agent-0.2.83}/vision_agent/tools/tool_utils.py +0 -0
  17. {vision_agent-0.2.81 → vision_agent-0.2.83}/vision_agent/tools/tools.py +0 -0
  18. {vision_agent-0.2.81 → vision_agent-0.2.83}/vision_agent/utils/__init__.py +0 -0
  19. {vision_agent-0.2.81 → vision_agent-0.2.83}/vision_agent/utils/exceptions.py +0 -0
  20. {vision_agent-0.2.81 → vision_agent-0.2.83}/vision_agent/utils/execute.py +0 -0
  21. {vision_agent-0.2.81 → vision_agent-0.2.83}/vision_agent/utils/image_utils.py +0 -0
  22. {vision_agent-0.2.81 → vision_agent-0.2.83}/vision_agent/utils/sim.py +0 -0
  23. {vision_agent-0.2.81 → vision_agent-0.2.83}/vision_agent/utils/type_defs.py +0 -0
  24. {vision_agent-0.2.81 → vision_agent-0.2.83}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.81
3
+ Version: 0.2.83
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.81"
7
+ version = "0.2.83"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -176,6 +176,7 @@ def pick_plan(
176
176
  model: LMM,
177
177
  code_interpreter: CodeInterpreter,
178
178
  verbosity: int = 0,
179
+ max_retries: int = 3,
179
180
  ) -> Tuple[str, str]:
180
181
  chat = copy.deepcopy(chat)
181
182
  if chat[-1]["role"] != "user":
@@ -192,13 +193,13 @@ def pick_plan(
192
193
  if len(tool_output.logs.stdout) > 0:
193
194
  tool_output_str = tool_output.logs.stdout[0]
194
195
 
195
- if verbosity >= 1:
196
+ if verbosity == 2:
196
197
  _print_code("Initial code and tests:", code)
197
198
  _LOGGER.info(f"Initial code execution result:\n{tool_output.text()}")
198
199
 
199
200
  # retry if the tool output is empty or code fails
200
- count = 1
201
- while (not tool_output.success or tool_output_str == "") and count < 3:
201
+ count = 0
202
+ while (not tool_output.success or tool_output_str == "") and count < max_retries:
202
203
  prompt = TEST_PLANS.format(
203
204
  docstring=tool_info,
204
205
  plans=plan_str,
@@ -214,12 +215,15 @@ def pick_plan(
214
215
  if len(tool_output.logs.stdout) > 0:
215
216
  tool_output_str = tool_output.logs.stdout[0]
216
217
 
217
- if verbosity == 1:
218
+ if verbosity == 2:
218
219
  _print_code("Code and test after attempted fix:", code)
219
220
  _LOGGER.info(f"Code execution result after attempte {count}")
220
221
 
221
222
  count += 1
222
223
 
224
+ if verbosity >= 1:
225
+ _print_code("Final code:", code)
226
+
223
227
  user_req = chat[-1]["content"]
224
228
  context = USER_REQ.format(user_request=user_req)
225
229
  # because the tool picker model gets the image as well, we have to be careful with
@@ -408,7 +412,7 @@ def debug_code(
408
412
  FIX_BUG.format(
409
413
  code=code,
410
414
  tests=test,
411
- result="\n".join(result.text().splitlines()[-50:]),
415
+ result="\n".join(result.text().splitlines()[-100:]),
412
416
  feedback=format_memory(working_memory + new_working_memory),
413
417
  )
414
418
  )
@@ -655,7 +659,15 @@ class VisionAgent(Agent):
655
659
  int_chat = cast(
656
660
  List[Message],
657
661
  [
658
- {"role": c["role"], "content": c["content"], "media": c["media"]}
662
+ (
663
+ {
664
+ "role": c["role"],
665
+ "content": c["content"],
666
+ "media": c["media"],
667
+ }
668
+ if "media" in c
669
+ else {"role": c["role"], "content": c["content"]}
670
+ )
659
671
  for c in chat
660
672
  ],
661
673
  )
@@ -665,92 +677,85 @@ class VisionAgent(Agent):
665
677
  working_memory: List[Dict[str, str]] = []
666
678
  results = {"code": "", "test": "", "plan": []}
667
679
  plan = []
668
- success = False
669
- retries = 0
670
-
671
- while not success and retries < self.max_retries:
672
- self.log_progress(
673
- {
674
- "type": "plans",
675
- "status": "started",
676
- }
677
- )
678
- plans = write_plans(
679
- int_chat,
680
- T.TOOL_DESCRIPTIONS,
681
- format_memory(working_memory),
682
- self.planner,
683
- )
684
680
 
685
- if self.verbosity >= 1:
686
- for p in plans:
687
- _LOGGER.info(
688
- f"\n{tabulate(tabular_data=plans[p], headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
689
- )
690
-
691
- tool_infos = retrieve_tools(
692
- plans,
693
- self.tool_recommender,
694
- self.log_progress,
695
- self.verbosity,
696
- )
697
- best_plan, tool_output_str = pick_plan(
698
- int_chat,
699
- plans,
700
- tool_infos["all"],
701
- self.coder,
702
- code_interpreter,
703
- verbosity=self.verbosity,
704
- )
681
+ self.log_progress(
682
+ {
683
+ "type": "plans",
684
+ "status": "started",
685
+ }
686
+ )
687
+ plans = write_plans(
688
+ int_chat,
689
+ T.TOOL_DESCRIPTIONS,
690
+ format_memory(working_memory),
691
+ self.planner,
692
+ )
705
693
 
706
- if best_plan in plans and best_plan in tool_infos:
707
- plan_i = plans[best_plan]
708
- tool_info = tool_infos[best_plan]
709
- else:
710
- if self.verbosity >= 1:
711
- _LOGGER.warning(
712
- f"Best plan {best_plan} not found in plans or tool_infos. Using the first plan and tool info."
713
- )
714
- k = list(plans.keys())[0]
715
- plan_i = plans[k]
716
- tool_info = tool_infos[k]
717
-
718
- self.log_progress(
719
- {
720
- "type": "plans",
721
- "status": "completed",
722
- "payload": plan_i,
723
- }
724
- )
725
- if self.verbosity >= 1:
694
+ if self.verbosity >= 1:
695
+ for p in plans:
726
696
  _LOGGER.info(
727
- f"Picked best plan:\n{tabulate(tabular_data=plan_i, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
697
+ f"\n{tabulate(tabular_data=plans[p], headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
728
698
  )
729
699
 
730
- results = write_and_test_code(
731
- chat=[
732
- {"role": c["role"], "content": c["content"]} for c in int_chat
733
- ],
734
- plan="\n-" + "\n-".join([e["instructions"] for e in plan_i]),
735
- tool_info=tool_info,
736
- tool_output=tool_output_str,
737
- tool_utils=T.UTILITIES_DOCSTRING,
738
- working_memory=working_memory,
739
- coder=self.coder,
740
- tester=self.tester,
741
- debugger=self.debugger,
742
- code_interpreter=code_interpreter,
743
- log_progress=self.log_progress,
744
- verbosity=self.verbosity,
745
- media=media_list,
700
+ tool_infos = retrieve_tools(
701
+ plans,
702
+ self.tool_recommender,
703
+ self.log_progress,
704
+ self.verbosity,
705
+ )
706
+ best_plan, tool_output_str = pick_plan(
707
+ int_chat,
708
+ plans,
709
+ tool_infos["all"],
710
+ self.coder,
711
+ code_interpreter,
712
+ verbosity=self.verbosity,
713
+ )
714
+
715
+ if best_plan in plans and best_plan in tool_infos:
716
+ plan_i = plans[best_plan]
717
+ tool_info = tool_infos[best_plan]
718
+ else:
719
+ if self.verbosity >= 1:
720
+ _LOGGER.warning(
721
+ f"Best plan {best_plan} not found in plans or tool_infos. Using the first plan and tool info."
722
+ )
723
+ k = list(plans.keys())[0]
724
+ plan_i = plans[k]
725
+ tool_info = tool_infos[k]
726
+
727
+ self.log_progress(
728
+ {
729
+ "type": "plans",
730
+ "status": "completed",
731
+ "payload": plan_i,
732
+ }
733
+ )
734
+ if self.verbosity >= 1:
735
+ _LOGGER.info(
736
+ f"Picked best plan:\n{tabulate(tabular_data=plan_i, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
746
737
  )
747
- success = cast(bool, results["success"])
748
- code = cast(str, results["code"])
749
- test = cast(str, results["test"])
750
- working_memory.extend(results["working_memory"]) # type: ignore
751
- plan.append({"code": code, "test": test, "plan": plan_i})
752
738
 
753
- retries += 1
739
+ results = write_and_test_code(
740
+ chat=[{"role": c["role"], "content": c["content"]} for c in int_chat],
741
+ plan="\n-" + "\n-".join([e["instructions"] for e in plan_i]),
742
+ tool_info=tool_info,
743
+ tool_output=tool_output_str,
744
+ tool_utils=T.UTILITIES_DOCSTRING,
745
+ working_memory=working_memory,
746
+ coder=self.coder,
747
+ tester=self.tester,
748
+ debugger=self.debugger,
749
+ code_interpreter=code_interpreter,
750
+ log_progress=self.log_progress,
751
+ verbosity=self.verbosity,
752
+ media=media_list,
753
+ )
754
+ success = cast(bool, results["success"])
755
+ code = cast(str, results["code"])
756
+ test = cast(str, results["test"])
757
+ working_memory.extend(results["working_memory"]) # type: ignore
758
+ plan.append({"code": code, "test": test, "plan": plan_i})
754
759
 
755
760
  execution_result = cast(Execution, results["test_result"])
756
761
  self.log_progress(
File without changes
File without changes