PyPI - vision-agent - Versions diffs - 0.2.143__tar.gz → 0.2.145__tar.gz - Mend

vision-agent 0.2.143tar.gz → 0.2.145tar.gz

Files changed (33) hide show

{vision_agent-0.2.143 → vision_agent-0.2.145}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.143
+Version: 0.2.145
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.143 → vision_agent-0.2.145}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "vision-agent"
-version = "0.2.143"
+version = "0.2.145"
 description = "Toolset for Vision Agent"
 authors = ["Landing AI <dev@landing.ai>"]
 readme = "README.md"

{vision_agent-0.2.143 → vision_agent-0.2.145}/vision_agent/agent/vision_agent.py RENAMED Viewed

@@ -229,7 +229,7 @@ class VisionAgent(Agent):
         ) as code_interpreter:
             orig_chat = copy.deepcopy(chat)
             int_chat = copy.deepcopy(chat)
-            last_user_message_content = chat[-1].get("content")
+            last_user_message = chat[-1]
             media_list = []
             for chat_i in int_chat:
                 if "media" in chat_i:
@@ -278,32 +278,9 @@ class VisionAgent(Agent):
             orig_chat.append({"role": "observation", "content": artifacts_loaded})
             self.streaming_message({"role": "observation", "content": artifacts_loaded})
-            if int_chat[-1]["role"] == "user":
-                last_user_message_content = cast(str, int_chat[-1].get("content", ""))
-                user_code_action = parse_execution(last_user_message_content, False)
-                if user_code_action is not None:
-                    user_result, user_obs = run_code_action(
-                        user_code_action, code_interpreter, str(remote_artifacts_path)
-                    )
-                    if self.verbosity >= 1:
-                        _LOGGER.info(user_obs)
-                    int_chat.append({"role": "observation", "content": user_obs})
-                    orig_chat.append(
-                        {
-                            "role": "observation",
-                            "content": user_obs,
-                            "execution": user_result,
-                        }
-                    )
-                    self.streaming_message(
-                        {
-                            "role": "observation",
-                            "content": user_obs,
-                            "execution": user_result,
-                            "finished": True,
-                        }
-                    )
-                    finished = True
+            finished = self.execute_user_code_action(
+                last_user_message, code_interpreter, remote_artifacts_path
+            )
             while not finished and iterations < self.max_iterations:
                 response = run_conversation(self.agent, int_chat)
@@ -315,11 +292,6 @@ class VisionAgent(Agent):
                 # sometimes it gets stuck in a loop, so we force it to exit
                 if last_response == response:
                     response["let_user_respond"] = True
-                    self.streaming_message(
-                        {"role": "assistant", "error": "Stuck in loop"}
-                    )
-                else:
-                    self.streaming_message({"role": "assistant", "content": response})
                 finished = response["let_user_respond"]
@@ -327,6 +299,28 @@ class VisionAgent(Agent):
                     response["response"], test_multi_plan, customized_tool_names
                 )
+                if last_response == response:
+                    self.streaming_message(
+                        {
+                            "role": "assistant",
+                            "content": "{}",
+                            "error": {
+                                "name": "Error when running conversation agent",
+                                "value": "Agent is stuck in conversation loop, exited",
+                                "traceback_raw": [],
+                            },
+                            "finished": finished and code_action is None,
+                        }
+                    )
+                else:
+                    self.streaming_message(
+                        {
+                            "role": "assistant",
+                            "content": response,
+                            "finished": finished and code_action is None,
+                        }
+                    )
                 if code_action is not None:
                     result, obs = run_code_action(
                         code_action, code_interpreter, str(remote_artifacts_path)
@@ -353,6 +347,7 @@ class VisionAgent(Agent):
                             "role": "observation",
                             "content": obs,
                             "execution": result,
+                            "finished": finished,
                         }
                     )
@@ -367,6 +362,34 @@ class VisionAgent(Agent):
             artifacts.save()
         return orig_chat, artifacts
+    def execute_user_code_action(
+        self,
+        last_user_message: Message,
+        code_interpreter: CodeInterpreter,
+        remote_artifacts_path: Path,
+    ) -> bool:
+        if last_user_message["role"] != "user":
+            return False
+        user_code_action = parse_execution(
+            cast(str, last_user_message.get("content", "")), False
+        )
+        if user_code_action is not None:
+            user_result, user_obs = run_code_action(
+                user_code_action, code_interpreter, str(remote_artifacts_path)
+            )
+            if self.verbosity >= 1:
+                _LOGGER.info(user_obs)
+            self.streaming_message(
+                {
+                    "role": "observation",
+                    "content": user_obs,
+                    "execution": user_result,
+                    "finished": True,
+                }
+            )
+            return True
+        return False
     def streaming_message(self, message: Dict[str, Any]) -> None:
         if self.callback_message:
             self.callback_message(message)

{vision_agent-0.2.143 → vision_agent-0.2.145}/vision_agent/tools/meta_tools.py RENAMED Viewed

@@ -425,6 +425,7 @@ def edit_vision_code(
     agent = va.agent.VisionAgentCoder()
     if name not in artifacts:
+        print(f"[Artifact {name} does not exist]")
         return f"[Artifact {name} does not exist]"
     code = artifacts[name]

{vision_agent-0.2.143 → vision_agent-0.2.145}/vision_agent/tools/tools.py RENAMED Viewed

@@ -1181,7 +1181,12 @@ def florence2_phrase_grounding(
             fine_tuning=FineTuning(job_id=UUID(fine_tune_id)),
         )
         data = data_obj.model_dump(by_alias=True)
-        detections = send_inference_request(data, "tools", v2=False)
+        detections = send_inference_request(
+            data,
+            "tools",
+            v2=False,
+            metadata_payload={"function_name": "florence2_phrase_grounding"},
+        )
     else:
         data = {
             "image": image_b64,