PyPI - vision-agent - Versions diffs - 0.2.131__py3-none-any.whl → 0.2.132__py3-none-any.whl - Mend

vision-agent 0.2.131py3-none-any.whl → 0.2.132py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

vision_agent/agent/vision_agent.py CHANGED Viewed

@@ -3,7 +3,7 @@ import logging
 import os
 import tempfile
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, Union, cast
+from typing import Any, Dict, List, Optional, Tuple, Union, cast, Callable
 from vision_agent.agent import Agent
 from vision_agent.agent.agent_utils import extract_json
@@ -13,7 +13,7 @@ from vision_agent.agent.vision_agent_prompts import (
     VA_CODE,
 )
 from vision_agent.lmm import LMM, Message, OpenAILMM
-from vision_agent.tools import META_TOOL_DOCSTRING
+from vision_agent.tools import META_TOOL_DOCSTRING, save_image, load_image
 from vision_agent.tools.meta_tools import Artifacts, use_extra_vision_agent_args
 from vision_agent.utils import CodeInterpreterFactory
 from vision_agent.utils.execute import CodeInterpreter, Execution
@@ -123,6 +123,7 @@ class VisionAgent(Agent):
         verbosity: int = 0,
         local_artifacts_path: Optional[Union[str, Path]] = None,
         code_sandbox_runtime: Optional[str] = None,
+        callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
     ) -> None:
         """Initialize the VisionAgent.
@@ -141,6 +142,7 @@ class VisionAgent(Agent):
         self.max_iterations = 100
         self.verbosity = verbosity
         self.code_sandbox_runtime = code_sandbox_runtime
+        self.callback_message = callback_message
         if self.verbosity >= 1:
             _LOGGER.setLevel(logging.INFO)
         self.local_artifacts_path = cast(
@@ -220,7 +222,14 @@ class VisionAgent(Agent):
             for chat_i in int_chat:
                 if "media" in chat_i:
                     for media in chat_i["media"]:
-                        media = cast(str, media)
+                        if type(media) is str and media.startswith(("http", "https")):
+                            # TODO: Ideally we should not call VA.tools here, we should come to revisit how to better support remote image later
+                            file_path = Path(media).name
+                            ndarray = load_image(media)
+                            save_image(ndarray, file_path)
+                            media = file_path
+                        else:
+                            media = cast(str, media)
                         artifacts.artifacts[Path(media).name] = open(media, "rb").read()
                         media_remote_path = (
@@ -262,6 +271,7 @@ class VisionAgent(Agent):
             artifacts_loaded = artifacts.show()
             int_chat.append({"role": "observation", "content": artifacts_loaded})
             orig_chat.append({"role": "observation", "content": artifacts_loaded})
+            self.streaming_message({"role": "observation", "content": artifacts_loaded})
             while not finished and iterations < self.max_iterations:
                 response = run_conversation(self.agent, int_chat)
@@ -274,6 +284,8 @@ class VisionAgent(Agent):
                 if last_response == response:
                     response["let_user_respond"] = True
+                self.streaming_message({"role": "assistant", "content": response})
                 if response["let_user_respond"]:
                     break
@@ -293,6 +305,13 @@ class VisionAgent(Agent):
                     orig_chat.append(
                         {"role": "observation", "content": obs, "execution": result}
                     )
+                    self.streaming_message(
+                        {
+                            "role": "observation",
+                            "content": obs,
+                            "execution": result,
+                        }
+                    )
                 iterations += 1
                 last_response = response
@@ -305,5 +324,9 @@ class VisionAgent(Agent):
             artifacts.save()
         return orig_chat, artifacts
+    def streaming_message(self, message: Dict[str, Any]) -> None:
+        if self.callback_message:
+            self.callback_message(message)
     def log_progress(self, data: Dict[str, Any]) -> None:
         pass

{vision_agent-0.2.131.dist-info → vision_agent-0.2.132.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.131
+Version: 0.2.132
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.131.dist-info → vision_agent-0.2.132.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
 vision_agent/agent/__init__.py,sha256=FRwiux1FGvGccetyUCtY46KP01fQteqorm-JtFepovI,176
 vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
 vision_agent/agent/agent_utils.py,sha256=22LiPhkJlS5mVeo2dIi259pc2NgA7PGHRpcbnrtKo78,1930
-vision_agent/agent/vision_agent.py,sha256=7Xa_TjjbUXhlPmKpmXGCAIdT0-PJzRL2rFaACszTXX0,12001
+vision_agent/agent/vision_agent.py,sha256=nfxdY5W5UME7JhwFcsB3j2-L5zsYZzJWdlS2R8U_9lE,13224
 vision_agent/agent/vision_agent_coder.py,sha256=_2QQd_nTGojkk2ZOiMevVCY6-eUA9q1QdCWH7-Noq4w,34237
 vision_agent/agent/vision_agent_coder_prompts.py,sha256=nj4iRRSAWYHjKqyUSp12aTCV1D5iUVCHeezVXoozS4M,12687
 vision_agent/agent/vision_agent_prompts.py,sha256=-fXiIIb48duXVljWYcJ0Y4ZzfNnRFi3C5cKdF4SdDo8,10075
@@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=zTTOJFOieMzwIquTFnW7T6ssx9o6XfoZ0Unqyk7
 vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
 vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
 vision_agent/utils/video.py,sha256=GmJqu_3WhBMEwP4HToMMp8EwgftliHSpv5nd-QEDOcs,4528
-vision_agent-0.2.131.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.131.dist-info/METADATA,sha256=LCIVXm-Le9Uw6Vp-XMvmmkhMRPRJJlBZmJPF28Bn6Hs,12295
-vision_agent-0.2.131.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.131.dist-info/RECORD,,
+vision_agent-0.2.132.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.132.dist-info/METADATA,sha256=s0AXiV6qjDjTUrzFqHL-50QJ6r7sxlJrwkSKNIGgklc,12295
+vision_agent-0.2.132.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.132.dist-info/RECORD,,

{vision_agent-0.2.131.dist-info → vision_agent-0.2.132.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.131.dist-info → vision_agent-0.2.132.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.131__py3-none-any.whl → 0.2.132__py3-none-any.whl

vision-agent 0.2.131py3-none-any.whl → 0.2.132py3-none-any.whl