PyPI - vision-agent - Versions diffs - 0.2.84__tar.gz → 0.2.86__tar.gz - Mend

vision-agent 0.2.84tar.gz → 0.2.86tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{vision_agent-0.2.84 → vision_agent-0.2.86}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.84
+Version: 0.2.86
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.84 → vision_agent-0.2.86}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "vision-agent"
-version = "0.2.84"
+version = "0.2.86"
 description = "Toolset for Vision Agent"
 authors = ["Landing AI <dev@landing.ai>"]
 readme = "README.md"

{vision_agent-0.2.84 → vision_agent-0.2.86}/vision_agent/agent/vision_agent.py RENAMED Viewed

@@ -172,19 +172,25 @@ def write_plans(
 def pick_plan(
     chat: List[Message],
     plans: Dict[str, Any],
-    tool_info: str,
+    tool_infos: Dict[str, str],
     model: LMM,
     code_interpreter: CodeInterpreter,
+    test_multi_plan: bool,
     verbosity: int = 0,
     max_retries: int = 3,
-) -> Tuple[str, str]:
+) -> Tuple[Any, str, str]:
+    if not test_multi_plan:
+        k = list(plans.keys())[0]
+        return plans[k], tool_infos[k], ""
+    all_tool_info = tool_infos["all"]
     chat = copy.deepcopy(chat)
     if chat[-1]["role"] != "user":
         raise ValueError("Last chat message must be from the user.")
     plan_str = format_plans(plans)
     prompt = TEST_PLANS.format(
-        docstring=tool_info, plans=plan_str, previous_attempts=""
+        docstring=all_tool_info, plans=plan_str, previous_attempts=""
     )
     code = extract_code(model(prompt))
@@ -201,7 +207,7 @@ def pick_plan(
     count = 0
     while (not tool_output.success or tool_output_str == "") and count < max_retries:
         prompt = TEST_PLANS.format(
-            docstring=tool_info,
+            docstring=all_tool_info,
             plans=plan_str,
             previous_attempts=PREVIOUS_FAILED.format(
                 code=code, error=tool_output.text()
@@ -237,7 +243,17 @@ def pick_plan(
     best_plan = extract_json(model(chat))
     if verbosity >= 1:
         _LOGGER.info(f"Best plan:\n{best_plan}")
-    return best_plan["best_plan"], tool_output_str
+    plan = best_plan["best_plan"]
+    if plan in plans and plan in tool_infos:
+        return plans[plan], tool_infos[plan], tool_output_str
+    else:
+        if verbosity >= 1:
+            _LOGGER.warning(
+                f"Best plan {plan} not found in plans or tool_infos. Using the first plan and tool info."
+            )
+        k = list(plans.keys())[0]
+        return plans[k], tool_infos[k], tool_output_str
 @traceable
@@ -524,6 +540,13 @@ def retrieve_tools(
         )
     all_tools = "\n\n".join(set(tool_info))
     tool_lists_unique["all"] = all_tools
+    log_progress(
+        {
+            "type": "tools",
+            "status": "completed",
+            "payload": tool_lists[list(plans.keys())[0]],
+        }
+    )
     return tool_lists_unique
@@ -622,6 +645,7 @@ class VisionAgent(Agent):
     def chat_with_workflow(
         self,
         chat: List[Message],
+        test_multi_plan: bool = True,
         display_visualization: bool = False,
     ) -> Dict[str, Any]:
         """Chat with Vision Agent and return intermediate information regarding the task.
@@ -691,7 +715,15 @@ class VisionAgent(Agent):
                 self.planner,
             )
-            if self.verbosity >= 1:
+            self.log_progress(
+                {
+                    "type": "plans",
+                    "status": "completed",
+                    "payload": plans[list(plans.keys())[0]],
+                }
+            )
+            if self.verbosity >= 1 and test_multi_plan:
                 for p in plans:
                     _LOGGER.info(
                         f"\n{tabulate(tabular_data=plans[p], headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
@@ -703,43 +735,26 @@ class VisionAgent(Agent):
                 self.log_progress,
                 self.verbosity,
             )
-            best_plan, tool_output_str = pick_plan(
+            best_plan, best_tool_info, tool_output_str = pick_plan(
                 int_chat,
                 plans,
-                tool_infos["all"],
+                tool_infos,
                 self.coder,
                 code_interpreter,
+                test_multi_plan,
                 verbosity=self.verbosity,
             )
-            if best_plan in plans and best_plan in tool_infos:
-                plan_i = plans[best_plan]
-                tool_info = tool_infos[best_plan]
-            else:
-                if self.verbosity >= 1:
-                    _LOGGER.warning(
-                        f"Best plan {best_plan} not found in plans or tool_infos. Using the first plan and tool info."
-                    )
-                k = list(plans.keys())[0]
-                plan_i = plans[k]
-                tool_info = tool_infos[k]
-            self.log_progress(
-                {
-                    "type": "plans",
-                    "status": "completed",
-                    "payload": plan_i,
-                }
-            )
             if self.verbosity >= 1:
                 _LOGGER.info(
-                    f"Picked best plan:\n{tabulate(tabular_data=plan_i, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
+                    f"Picked best plan:\n{tabulate(tabular_data=best_plan, headers='keys', tablefmt='mixed_grid', maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"
                 )
             results = write_and_test_code(
                 chat=[{"role": c["role"], "content": c["content"]} for c in int_chat],
-                plan="\n-" + "\n-".join([e["instructions"] for e in plan_i]),
-                tool_info=tool_info,
+                plan="\n-" + "\n-".join([e["instructions"] for e in best_plan]),
+                tool_info=best_tool_info,
                 tool_output=tool_output_str,
                 tool_utils=T.UTILITIES_DOCSTRING,
                 working_memory=working_memory,
@@ -755,7 +770,7 @@ class VisionAgent(Agent):
             code = cast(str, results["code"])
             test = cast(str, results["test"])
             working_memory.extend(results["working_memory"])  # type: ignore
-            plan.append({"code": code, "test": test, "plan": plan_i})
+            plan.append({"code": code, "test": test, "plan": best_plan})
             execution_result = cast(Execution, results["test_result"])
             self.log_progress(