PyPI - vision-agent - Versions diffs - 0.2.158__py3-none-any.whl → 0.2.160__py3-none-any.whl - Mend

vision-agent 0.2.158py3-none-any.whl → 0.2.160py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

vision_agent/agent/vision_agent.py CHANGED Viewed

@@ -149,6 +149,32 @@ def execute_user_code_action(
     return user_result, user_obs
+def add_step_descriptions(response: Dict[str, str]) -> Dict[str, str]:
+    response = copy.deepcopy(response)
+    if "response" in response:
+        resp_str = response["response"]
+        if "<execute_python>" in resp_str:
+            # only include descriptions for these, the rest will just have executing
+            # code
+            description_map = {
+                "open_code_artifact": "Reading file.",
+                "create_code_artifact": "Creating file.",
+                "edit_code_artifact": "Editing file.",
+                "generate_vision_code": "Generating vision code.",
+                "edit_vision_code": "Editing vision code.",
+            }
+            description = ""
+            for k, v in description_map.items():
+                if k in resp_str:
+                    description += v + " "
+            if description == "":
+                description = "Executing code."
+            resp_str = resp_str[resp_str.find("<execute_python>") :]
+            resp_str = description + resp_str
+        response["response"] = resp_str
+    return response
 class VisionAgent(Agent):
     """Vision Agent is an agent that can chat with the user and call tools or other
     agents to generate code for it. Vision Agent uses python code to execute actions
@@ -335,8 +361,18 @@ class VisionAgent(Agent):
                 response = run_conversation(self.agent, int_chat)
                 if self.verbosity >= 1:
                     _LOGGER.info(response)
-                int_chat.append({"role": "assistant", "content": str(response)})
-                orig_chat.append({"role": "assistant", "content": str(response)})
+                int_chat.append(
+                    {
+                        "role": "assistant",
+                        "content": str(add_step_descriptions(response)),
+                    }
+                )
+                orig_chat.append(
+                    {
+                        "role": "assistant",
+                        "content": str(add_step_descriptions(response)),
+                    }
+                )
                 # sometimes it gets stuck in a loop, so we force it to exit
                 if last_response == response:
@@ -382,6 +418,16 @@ class VisionAgent(Agent):
                     obs_chat_elt: Message = {"role": "observation", "content": obs}
                     if media_obs and result.success:
+                        # for view_media_artifact, we need to ensure the media is loaded
+                        # locally so the conversation agent can actually see it
+                        code_interpreter.download_file(
+                            str(remote_artifacts_path.name),
+                            str(self.local_artifacts_path),
+                        )
+                        artifacts.load(
+                            self.local_artifacts_path,
+                            Path(self.local_artifacts_path).parent,
+                        )
                         obs_chat_elt["media"] = [
                             Path(self.local_artifacts_path).parent / media_ob
                             for media_ob in media_obs
@@ -407,8 +453,9 @@ class VisionAgent(Agent):
             code_interpreter.download_file(
                 str(remote_artifacts_path.name), str(self.local_artifacts_path)
             )
-            artifacts.load(self.local_artifacts_path)
-            artifacts.save()
+            artifacts.load(
+                self.local_artifacts_path, Path(self.local_artifacts_path).parent
+            )
         return orig_chat, artifacts
     def streaming_message(self, message: Dict[str, Any]) -> None:

vision_agent/tools/meta_tools.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import base64
 import difflib
 import json
 import os
@@ -92,19 +93,26 @@ class Artifacts:
         self.code_sandbox_runtime = None
-    def load(self, file_path: Union[str, Path]) -> None:
-        """Loads are artifacts into the remote environment. If an artifact value is None
-        it will skip loading it.
+    def load(
+        self,
+        artifacts_path: Union[str, Path],
+        load_to: Optional[Union[str, Path]] = None,
+    ) -> None:
+        """Loads are artifacts into the load_to path. If load_to is None, it will load
+        into remote_save_path. If an artifact value is None it will skip loading it.
         Parameters:
-            file_path (Union[str, Path]): The file path to load the artifacts from
+            artifacts_path (Union[str, Path]): The file path to load the artifacts from
         """
-        with open(file_path, "rb") as f:
+        with open(artifacts_path, "rb") as f:
             self.artifacts = pkl.load(f)
+        load_to = self.remote_save_path.parent if load_to is None else Path(load_to)
         for k, v in self.artifacts.items():
             if v is not None:
                 mode = "w" if isinstance(v, str) else "wb"
-                with open(self.remote_save_path.parent / k, mode) as f:
+                with open(load_to / k, mode) as f:
                     f.write(v)
     def show(self, uploaded_file_path: Optional[Union[str, Path]] = None) -> str:
@@ -503,6 +511,19 @@ def write_media_artifact(
         return f"[Invalid media type {type(media)}]"
     artifacts[name] = media_bytes
     print(f"[Media {name} saved]")
+    display(
+        {
+            MimeType.APPLICATION_ARTIFACT: json.dumps(
+                {
+                    "name": name,
+                    "action": "create",
+                    "content": base64.b64encode(media_bytes).decode("utf-8"),
+                    "contentType": "media_output",
+                }
+            )
+        },
+        raw=True,
+    )
     return f"[Media {name} saved]"

vision_agent/tools/tools.py CHANGED Viewed

@@ -700,6 +700,7 @@ def countgd_counting(
             {'score': 0.98, 'label': 'flower', 'bbox': [0.44, 0.24, 0.49, 0.58},
         ]
     """
+    image_size = image.shape[:2]
     buffer_bytes = numpy_to_bytes(image)
     files = [("image", buffer_bytes)]
     prompt = prompt.replace(", ", " .")
@@ -712,7 +713,7 @@ def countgd_counting(
     bboxes_formatted = [
         ODResponseData(
             label=bbox["label"],
-            bbox=list(map(lambda x: round(x, 2), bbox["bounding_box"])),
+            bbox=normalize_bbox(bbox["bounding_box"], image_size),
             score=round(bbox["score"], 2),
         )
         for bbox in bboxes_per_frame
@@ -757,6 +758,7 @@ def countgd_example_based_counting(
             {'score': 0.98, 'label': 'object', 'bounding_box': [0.44, 0.24, 0.49, 0.58},
         ]
     """
+    image_size = image.shape[:2]
     buffer_bytes = numpy_to_bytes(image)
     files = [("image", buffer_bytes)]
     visual_prompts = [
@@ -771,7 +773,7 @@ def countgd_example_based_counting(
     bboxes_formatted = [
         ODResponseData(
             label=bbox["label"],
-            bbox=list(map(lambda x: round(x, 2), bbox["bounding_box"])),
+            bbox=normalize_bbox(bbox["bounding_box"], image_size),
             score=round(bbox["score"], 2),
         )
         for bbox in bboxes_per_frame

{vision_agent-0.2.158.dist-info → vision_agent-0.2.160.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.158
+Version: 0.2.160
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.158.dist-info → vision_agent-0.2.160.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
 vision_agent/agent/__init__.py,sha256=NF2LABqHixLvbsOIO-fe-VKZ7awvShLtcT0oQT4eWtI,235
 vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
 vision_agent/agent/agent_utils.py,sha256=eIpLz2NunEqEsBBrECJaD34-2uY0bsFNnW-XKfqqohs,2518
-vision_agent/agent/vision_agent.py,sha256=GAoTxGrWrJGk-4rC-e1BvjMzv0UuIVv45rGVW3kmLJk,18463
+vision_agent/agent/vision_agent.py,sha256=etqyLMZHJJz_A6tkonoYGlYvFvEW0uUHs5D1gsYwkSs,20412
 vision_agent/agent/vision_agent_coder.py,sha256=2ZoGikn2nakGDfs20XRshZjQUyvbw6l47UhExJAYkqI,38515
 vision_agent/agent/vision_agent_coder_prompts.py,sha256=BmbTMhth4v1qLexuoSeyo47QQ0kPQvL1pLbCJHMsWDw,18910
 vision_agent/agent/vision_agent_prompts.py,sha256=LZ9Bnx7ZFkqbNOMqwfdiWZU4niND9Z1ArcFHNSn_jzA,11187
@@ -15,10 +15,10 @@ vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,
 vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
 vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
 vision_agent/tools/__init__.py,sha256=PLVbfTMjKxQlHIRWnq9b785W9a52AXQS_tOa0tkQ0ZY,2420
-vision_agent/tools/meta_tools.py,sha256=Xu5h92YRfsbvW_iivTnOhlNAPOc2z7CShjOz8KLI4KA,25212
+vision_agent/tools/meta_tools.py,sha256=TG4vXN7W2M3rOJhzVsCrDAhbctd3RJcuOmbAeXsk2Sw,25798
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
 vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
-vision_agent/tools/tools.py,sha256=aP4GCeuGJDMQAIajflgKPVMjrs7ecdEuNiA9GDnV-Pk,78470
+vision_agent/tools/tools.py,sha256=vS1yCk3Fza9eYOTHPFwwroo_ULdw2ztMQMb81x1U5f8,78524
 vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
 vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
 vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
@@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
 vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
 vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
 vision_agent/utils/video.py,sha256=xbMEoRk13l4fHeQlbvMQhLCn8RNndYmsDhUf01TUeR8,4781
-vision_agent-0.2.158.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.158.dist-info/METADATA,sha256=VGAG_jcVVy8RRa_H6KuxCUIkrATVfyw_WBOceGhVgN4,17753
-vision_agent-0.2.158.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.158.dist-info/RECORD,,
+vision_agent-0.2.160.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.160.dist-info/METADATA,sha256=yfWiwd_dUVN2AQOkCSDM9A41NPqACMoj8NaVgOHOmNQ,17753
+vision_agent-0.2.160.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.160.dist-info/RECORD,,

{vision_agent-0.2.158.dist-info → vision_agent-0.2.160.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.158.dist-info → vision_agent-0.2.160.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.158__py3-none-any.whl → 0.2.160__py3-none-any.whl

vision-agent 0.2.158py3-none-any.whl → 0.2.160py3-none-any.whl