vision-agent 0.2.82__tar.gz → 0.2.83__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {vision_agent-0.2.82 → vision_agent-0.2.83}/PKG-INFO +1 -1
  2. {vision_agent-0.2.82 → vision_agent-0.2.83}/pyproject.toml +1 -1
  3. {vision_agent-0.2.82 → vision_agent-0.2.83}/vision_agent/agent/vision_agent.py +81 -84
  4. {vision_agent-0.2.82 → vision_agent-0.2.83}/LICENSE +0 -0
  5. {vision_agent-0.2.82 → vision_agent-0.2.83}/README.md +0 -0
  6. {vision_agent-0.2.82 → vision_agent-0.2.83}/vision_agent/__init__.py +0 -0
  7. {vision_agent-0.2.82 → vision_agent-0.2.83}/vision_agent/agent/__init__.py +0 -0
  8. {vision_agent-0.2.82 → vision_agent-0.2.83}/vision_agent/agent/agent.py +0 -0
  9. {vision_agent-0.2.82 → vision_agent-0.2.83}/vision_agent/agent/vision_agent_prompts.py +0 -0
  10. {vision_agent-0.2.82 → vision_agent-0.2.83}/vision_agent/fonts/__init__.py +0 -0
  11. {vision_agent-0.2.82 → vision_agent-0.2.83}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  12. {vision_agent-0.2.82 → vision_agent-0.2.83}/vision_agent/lmm/__init__.py +0 -0
  13. {vision_agent-0.2.82 → vision_agent-0.2.83}/vision_agent/lmm/lmm.py +0 -0
  14. {vision_agent-0.2.82 → vision_agent-0.2.83}/vision_agent/tools/__init__.py +0 -0
  15. {vision_agent-0.2.82 → vision_agent-0.2.83}/vision_agent/tools/prompts.py +0 -0
  16. {vision_agent-0.2.82 → vision_agent-0.2.83}/vision_agent/tools/tool_utils.py +0 -0
  17. {vision_agent-0.2.82 → vision_agent-0.2.83}/vision_agent/tools/tools.py +0 -0
  18. {vision_agent-0.2.82 → vision_agent-0.2.83}/vision_agent/utils/__init__.py +0 -0
  19. {vision_agent-0.2.82 → vision_agent-0.2.83}/vision_agent/utils/exceptions.py +0 -0
  20. {vision_agent-0.2.82 → vision_agent-0.2.83}/vision_agent/utils/execute.py +0 -0
  21. {vision_agent-0.2.82 → vision_agent-0.2.83}/vision_agent/utils/image_utils.py +0 -0
  22. {vision_agent-0.2.82 → vision_agent-0.2.83}/vision_agent/utils/sim.py +0 -0
  23. {vision_agent-0.2.82 → vision_agent-0.2.83}/vision_agent/utils/type_defs.py +0 -0
  24. {vision_agent-0.2.82 → vision_agent-0.2.83}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.82
3
+ Version: 0.2.83
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.82"
7
+ version = "0.2.83"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -176,6 +176,7 @@ def pick_plan(
176
176
  model: LMM,
177
177
  code_interpreter: CodeInterpreter,
178
178
  verbosity: int = 0,
179
+ max_retries: int = 3,
179
180
  ) -> Tuple[str, str]:
180
181
  chat = copy.deepcopy(chat)
181
182
  if chat[-1]["role"] != "user":
@@ -192,13 +193,13 @@ def pick_plan(
192
193
  if len(tool_output.logs.stdout) > 0:
193
194
  tool_output_str = tool_output.logs.stdout[0]
194
195
 
195
- if verbosity >= 1:
196
+ if verbosity == 2:
196
197
  _print_code("Initial code and tests:", code)
197
198
  _LOGGER.info(f"Initial code execution result:\n{tool_output.text()}")
198
199
 
199
200
  # retry if the tool output is empty or code fails
200
- count = 1
201
- while (not tool_output.success or tool_output_str == "") and count < 3:
201
+ count = 0
202
+ while (not tool_output.success or tool_output_str == "") and count < max_retries:
202
203
  prompt = TEST_PLANS.format(
203
204
  docstring=tool_info,
204
205
  plans=plan_str,
@@ -214,12 +215,15 @@ def pick_plan(
214
215
  if len(tool_output.logs.stdout) > 0:
215
216
  tool_output_str = tool_output.logs.stdout[0]
216
217
 
217
- if verbosity == 1:
218
+ if verbosity == 2:
218
219
  _print_code("Code and test after attempted fix:", code)
219
220
  _LOGGER.info(f"Code execution result after attempte {count}")
220
221
 
221
222
  count += 1
222
223
 
224
+ if verbosity >= 1:
225
+ _print_code("Final code:", code)
226
+
223
227
  user_req = chat[-1]["content"]
224
228
  context = USER_REQ.format(user_request=user_req)
225
229
  # because the tool picker model gets the image as well, we have to be careful with
@@ -408,7 +412,7 @@ def debug_code(
408
412
  FIX_BUG.format(
409
413
  code=code,
410
414
  tests=test,
411
- result="\n".join(result.text().splitlines()[-50:]),
415
+ result="\n".join(result.text().splitlines()[-100:]),
412
416
  feedback=format_memory(working_memory + new_working_memory),
413
417
  )
414
418
  )
@@ -673,92 +677,85 @@ class VisionAgent(Agent):
673
677
  working_memory: List[Dict[str, str]] = []
674
678
  results = {"code": "", "test": "", "plan": []}
675
679
  plan = []
676
- success = False
677
- retries = 0
678
-
679
- while not success and retries < self.max_retries:
680
- self.log_progress(
681
- {
682
- "type": "plans",
683
- "status": "started",
684
- }
685
- )
686
- plans = write_plans(
687
- int_chat,
688
- T.TOOL_DESCRIPTIONS,
689
- format_memory(working_memory),
690
- self.planner,
691
- )
692
680
 
693
- if self.verbosity >= 1:
694
- for p in plans:
695
- _LOGGER.info(
696
- f"\n{tabulate(tabular_data=plans[p], headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
697
- )
698
-
699
- tool_infos = retrieve_tools(
700
- plans,
701
- self.tool_recommender,
702
- self.log_progress,
703
- self.verbosity,
704
- )
705
- best_plan, tool_output_str = pick_plan(
706
- int_chat,
707
- plans,
708
- tool_infos["all"],
709
- self.coder,
710
- code_interpreter,
711
- verbosity=self.verbosity,
712
- )
681
+ self.log_progress(
682
+ {
683
+ "type": "plans",
684
+ "status": "started",
685
+ }
686
+ )
687
+ plans = write_plans(
688
+ int_chat,
689
+ T.TOOL_DESCRIPTIONS,
690
+ format_memory(working_memory),
691
+ self.planner,
692
+ )
713
693
 
714
- if best_plan in plans and best_plan in tool_infos:
715
- plan_i = plans[best_plan]
716
- tool_info = tool_infos[best_plan]
717
- else:
718
- if self.verbosity >= 1:
719
- _LOGGER.warning(
720
- f"Best plan {best_plan} not found in plans or tool_infos. Using the first plan and tool info."
721
- )
722
- k = list(plans.keys())[0]
723
- plan_i = plans[k]
724
- tool_info = tool_infos[k]
725
-
726
- self.log_progress(
727
- {
728
- "type": "plans",
729
- "status": "completed",
730
- "payload": plan_i,
731
- }
732
- )
733
- if self.verbosity >= 1:
694
+ if self.verbosity >= 1:
695
+ for p in plans:
734
696
  _LOGGER.info(
735
- f"Picked best plan:\n{tabulate(tabular_data=plan_i, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
697
+ f"\n{tabulate(tabular_data=plans[p], headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
736
698
  )
737
699
 
738
- results = write_and_test_code(
739
- chat=[
740
- {"role": c["role"], "content": c["content"]} for c in int_chat
741
- ],
742
- plan="\n-" + "\n-".join([e["instructions"] for e in plan_i]),
743
- tool_info=tool_info,
744
- tool_output=tool_output_str,
745
- tool_utils=T.UTILITIES_DOCSTRING,
746
- working_memory=working_memory,
747
- coder=self.coder,
748
- tester=self.tester,
749
- debugger=self.debugger,
750
- code_interpreter=code_interpreter,
751
- log_progress=self.log_progress,
752
- verbosity=self.verbosity,
753
- media=media_list,
700
+ tool_infos = retrieve_tools(
701
+ plans,
702
+ self.tool_recommender,
703
+ self.log_progress,
704
+ self.verbosity,
705
+ )
706
+ best_plan, tool_output_str = pick_plan(
707
+ int_chat,
708
+ plans,
709
+ tool_infos["all"],
710
+ self.coder,
711
+ code_interpreter,
712
+ verbosity=self.verbosity,
713
+ )
714
+
715
+ if best_plan in plans and best_plan in tool_infos:
716
+ plan_i = plans[best_plan]
717
+ tool_info = tool_infos[best_plan]
718
+ else:
719
+ if self.verbosity >= 1:
720
+ _LOGGER.warning(
721
+ f"Best plan {best_plan} not found in plans or tool_infos. Using the first plan and tool info."
722
+ )
723
+ k = list(plans.keys())[0]
724
+ plan_i = plans[k]
725
+ tool_info = tool_infos[k]
726
+
727
+ self.log_progress(
728
+ {
729
+ "type": "plans",
730
+ "status": "completed",
731
+ "payload": plan_i,
732
+ }
733
+ )
734
+ if self.verbosity >= 1:
735
+ _LOGGER.info(
736
+ f"Picked best plan:\n{tabulate(tabular_data=plan_i, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
754
737
  )
755
- success = cast(bool, results["success"])
756
- code = cast(str, results["code"])
757
- test = cast(str, results["test"])
758
- working_memory.extend(results["working_memory"]) # type: ignore
759
- plan.append({"code": code, "test": test, "plan": plan_i})
760
738
 
761
- retries += 1
739
+ results = write_and_test_code(
740
+ chat=[{"role": c["role"], "content": c["content"]} for c in int_chat],
741
+ plan="\n-" + "\n-".join([e["instructions"] for e in plan_i]),
742
+ tool_info=tool_info,
743
+ tool_output=tool_output_str,
744
+ tool_utils=T.UTILITIES_DOCSTRING,
745
+ working_memory=working_memory,
746
+ coder=self.coder,
747
+ tester=self.tester,
748
+ debugger=self.debugger,
749
+ code_interpreter=code_interpreter,
750
+ log_progress=self.log_progress,
751
+ verbosity=self.verbosity,
752
+ media=media_list,
753
+ )
754
+ success = cast(bool, results["success"])
755
+ code = cast(str, results["code"])
756
+ test = cast(str, results["test"])
757
+ working_memory.extend(results["working_memory"]) # type: ignore
758
+ plan.append({"code": code, "test": test, "plan": plan_i})
762
759
 
763
760
  execution_result = cast(Execution, results["test_result"])
764
761
  self.log_progress(
File without changes
File without changes