PyPI - vision-agent - Versions diffs - 0.2.157__py3-none-any.whl → 0.2.159__py3-none-any.whl - Mend

vision-agent 0.2.157py3-none-any.whl → 0.2.159py3-none-any.whl

Files changed (7) hide show

vision_agent/agent/vision_agent.py CHANGED Viewed

@@ -149,6 +149,32 @@ def execute_user_code_action(
     return user_result, user_obs
+def add_step_descriptions(response: Dict[str, str]) -> Dict[str, str]:
+    response = copy.deepcopy(response)
+    if "response" in response:
+        resp_str = response["response"]
+        if "<execute_python>" in resp_str:
+            # only include descriptions for these, the rest will just have executing
+            # code
+            description_map = {
+                "open_code_artifact": "Reading file.",
+                "create_code_artifact": "Creating file.",
+                "edit_code_artifact": "Editing file.",
+                "generate_vision_code": "Generating vision code.",
+                "edit_vision_code": "Editing vision code.",
+            }
+            description = ""
+            for k, v in description_map.items():
+                if k in resp_str:
+                    description += v + " "
+            if description == "":
+                description = "Executing code."
+            resp_str = resp_str[resp_str.find("<execute_python>") :]
+            resp_str = description + resp_str
+        response["response"] = resp_str
+    return response
 class VisionAgent(Agent):
     """Vision Agent is an agent that can chat with the user and call tools or other
     agents to generate code for it. Vision Agent uses python code to execute actions
@@ -335,8 +361,18 @@ class VisionAgent(Agent):
                 response = run_conversation(self.agent, int_chat)
                 if self.verbosity >= 1:
                     _LOGGER.info(response)
-                int_chat.append({"role": "assistant", "content": str(response)})
-                orig_chat.append({"role": "assistant", "content": str(response)})
+                int_chat.append(
+                    {
+                        "role": "assistant",
+                        "content": str(add_step_descriptions(response)),
+                    }
+                )
+                orig_chat.append(
+                    {
+                        "role": "assistant",
+                        "content": str(add_step_descriptions(response)),
+                    }
+                )
                 # sometimes it gets stuck in a loop, so we force it to exit
                 if last_response == response:
@@ -382,8 +418,18 @@ class VisionAgent(Agent):
                     obs_chat_elt: Message = {"role": "observation", "content": obs}
                     if media_obs and result.success:
+                        # for view_media_artifact, we need to ensure the media is loaded
+                        # locally so the conversation agent can actually see it
+                        code_interpreter.download_file(
+                            str(remote_artifacts_path.name),
+                            str(self.local_artifacts_path),
+                        )
+                        artifacts.load(
+                            self.local_artifacts_path,
+                            Path(self.local_artifacts_path).parent,
+                        )
                         obs_chat_elt["media"] = [
-                            Path(code_interpreter.remote_path) / media_ob
+                            Path(self.local_artifacts_path).parent / media_ob
                             for media_ob in media_obs
                         ]
@@ -407,6 +453,9 @@ class VisionAgent(Agent):
             code_interpreter.download_file(
                 str(remote_artifacts_path.name), str(self.local_artifacts_path)
             )
+            artifacts.load(
+                self.local_artifacts_path, Path(self.local_artifacts_path).parent
+            )
         return orig_chat, artifacts
     def streaming_message(self, message: Dict[str, Any]) -> None:

vision_agent/tools/meta_tools.py CHANGED Viewed

@@ -92,19 +92,26 @@ class Artifacts:
         self.code_sandbox_runtime = None
-    def load(self, file_path: Union[str, Path]) -> None:
-        """Loads are artifacts into the remote environment. If an artifact value is None
-        it will skip loading it.
+    def load(
+        self,
+        artifacts_path: Union[str, Path],
+        load_to: Optional[Union[str, Path]] = None,
+    ) -> None:
+        """Loads are artifacts into the load_to path. If load_to is None, it will load
+        into remote_save_path. If an artifact value is None it will skip loading it.
         Parameters:
-            file_path (Union[str, Path]): The file path to load the artifacts from
+            artifacts_path (Union[str, Path]): The file path to load the artifacts from
         """
-        with open(file_path, "rb") as f:
+        with open(artifacts_path, "rb") as f:
             self.artifacts = pkl.load(f)
+        load_to = self.remote_save_path.parent if load_to is None else Path(load_to)
         for k, v in self.artifacts.items():
             if v is not None:
                 mode = "w" if isinstance(v, str) else "wb"
-                with open(self.remote_save_path.parent / k, mode) as f:
+                with open(load_to / k, mode) as f:
                     f.write(v)
     def show(self, uploaded_file_path: Optional[Union[str, Path]] = None) -> str:

vision_agent/tools/tools.py CHANGED Viewed

@@ -700,6 +700,7 @@ def countgd_counting(
             {'score': 0.98, 'label': 'flower', 'bbox': [0.44, 0.24, 0.49, 0.58},
         ]
     """
+    image_size = image.shape[:2]
     buffer_bytes = numpy_to_bytes(image)
     files = [("image", buffer_bytes)]
     prompt = prompt.replace(", ", " .")
@@ -712,7 +713,7 @@ def countgd_counting(
     bboxes_formatted = [
         ODResponseData(
             label=bbox["label"],
-            bbox=list(map(lambda x: round(x, 2), bbox["bounding_box"])),
+            bbox=normalize_bbox(bbox["bounding_box"], image_size),
             score=round(bbox["score"], 2),
         )
         for bbox in bboxes_per_frame
@@ -757,6 +758,7 @@ def countgd_example_based_counting(
             {'score': 0.98, 'label': 'object', 'bounding_box': [0.44, 0.24, 0.49, 0.58},
         ]
     """
+    image_size = image.shape[:2]
     buffer_bytes = numpy_to_bytes(image)
     files = [("image", buffer_bytes)]
     visual_prompts = [
@@ -771,7 +773,7 @@ def countgd_example_based_counting(
     bboxes_formatted = [
         ODResponseData(
             label=bbox["label"],
-            bbox=list(map(lambda x: round(x, 2), bbox["bounding_box"])),
+            bbox=normalize_bbox(bbox["bounding_box"], image_size),
             score=round(bbox["score"], 2),
         )
         for bbox in bboxes_per_frame

{vision_agent-0.2.157.dist-info → vision_agent-0.2.159.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.157
+Version: 0.2.159
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.157.dist-info → vision_agent-0.2.159.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
 vision_agent/agent/__init__.py,sha256=NF2LABqHixLvbsOIO-fe-VKZ7awvShLtcT0oQT4eWtI,235
 vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
 vision_agent/agent/agent_utils.py,sha256=eIpLz2NunEqEsBBrECJaD34-2uY0bsFNnW-XKfqqohs,2518
-vision_agent/agent/vision_agent.py,sha256=wrfAWGLcJMJ62ATFLl0E0-2xszi9HQ4Amp82B7-_Ihw,18376
+vision_agent/agent/vision_agent.py,sha256=etqyLMZHJJz_A6tkonoYGlYvFvEW0uUHs5D1gsYwkSs,20412
 vision_agent/agent/vision_agent_coder.py,sha256=2ZoGikn2nakGDfs20XRshZjQUyvbw6l47UhExJAYkqI,38515
 vision_agent/agent/vision_agent_coder_prompts.py,sha256=BmbTMhth4v1qLexuoSeyo47QQ0kPQvL1pLbCJHMsWDw,18910
 vision_agent/agent/vision_agent_prompts.py,sha256=LZ9Bnx7ZFkqbNOMqwfdiWZU4niND9Z1ArcFHNSn_jzA,11187
@@ -15,10 +15,10 @@ vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,
 vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
 vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
 vision_agent/tools/__init__.py,sha256=PLVbfTMjKxQlHIRWnq9b785W9a52AXQS_tOa0tkQ0ZY,2420
-vision_agent/tools/meta_tools.py,sha256=Xu5h92YRfsbvW_iivTnOhlNAPOc2z7CShjOz8KLI4KA,25212
+vision_agent/tools/meta_tools.py,sha256=VKvrGgd_uvB8nEGTfouz8ij9MKJJh9G5bOg4mVMSrqY,25418
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
 vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
-vision_agent/tools/tools.py,sha256=aP4GCeuGJDMQAIajflgKPVMjrs7ecdEuNiA9GDnV-Pk,78470
+vision_agent/tools/tools.py,sha256=vS1yCk3Fza9eYOTHPFwwroo_ULdw2ztMQMb81x1U5f8,78524
 vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
 vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
 vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
@@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
 vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
 vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
 vision_agent/utils/video.py,sha256=xbMEoRk13l4fHeQlbvMQhLCn8RNndYmsDhUf01TUeR8,4781
-vision_agent-0.2.157.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.157.dist-info/METADATA,sha256=Lry2FA0K935e2HwJWMAQYCIbUK1P4OMn831xikvO-Rg,17753
-vision_agent-0.2.157.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.157.dist-info/RECORD,,
+vision_agent-0.2.159.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.159.dist-info/METADATA,sha256=iKyw9w-VOAaZ2EqPJmRozZ8J8QP0DR87gog3HeJ3mcc,17753
+vision_agent-0.2.159.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.159.dist-info/RECORD,,

{vision_agent-0.2.157.dist-info → vision_agent-0.2.159.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.157.dist-info → vision_agent-0.2.159.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.157__py3-none-any.whl → 0.2.159__py3-none-any.whl

vision-agent 0.2.157py3-none-any.whl → 0.2.159py3-none-any.whl