vision-agent 0.2.157__py3-none-any.whl → 0.2.159__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/vision_agent.py +52 -3
- vision_agent/tools/meta_tools.py +13 -6
- vision_agent/tools/tools.py +4 -2
- {vision_agent-0.2.157.dist-info → vision_agent-0.2.159.dist-info}/METADATA +1 -1
- {vision_agent-0.2.157.dist-info → vision_agent-0.2.159.dist-info}/RECORD +7 -7
- {vision_agent-0.2.157.dist-info → vision_agent-0.2.159.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.157.dist-info → vision_agent-0.2.159.dist-info}/WHEEL +0 -0
| @@ -149,6 +149,32 @@ def execute_user_code_action( | |
| 149 149 | 
             
                return user_result, user_obs
         | 
| 150 150 |  | 
| 151 151 |  | 
| 152 | 
            +
            def add_step_descriptions(response: Dict[str, str]) -> Dict[str, str]:
         | 
| 153 | 
            +
                response = copy.deepcopy(response)
         | 
| 154 | 
            +
                if "response" in response:
         | 
| 155 | 
            +
                    resp_str = response["response"]
         | 
| 156 | 
            +
                    if "<execute_python>" in resp_str:
         | 
| 157 | 
            +
                        # only include descriptions for these, the rest will just have executing
         | 
| 158 | 
            +
                        # code
         | 
| 159 | 
            +
                        description_map = {
         | 
| 160 | 
            +
                            "open_code_artifact": "Reading file.",
         | 
| 161 | 
            +
                            "create_code_artifact": "Creating file.",
         | 
| 162 | 
            +
                            "edit_code_artifact": "Editing file.",
         | 
| 163 | 
            +
                            "generate_vision_code": "Generating vision code.",
         | 
| 164 | 
            +
                            "edit_vision_code": "Editing vision code.",
         | 
| 165 | 
            +
                        }
         | 
| 166 | 
            +
                        description = ""
         | 
| 167 | 
            +
                        for k, v in description_map.items():
         | 
| 168 | 
            +
                            if k in resp_str:
         | 
| 169 | 
            +
                                description += v + " "
         | 
| 170 | 
            +
                        if description == "":
         | 
| 171 | 
            +
                            description = "Executing code."
         | 
| 172 | 
            +
                        resp_str = resp_str[resp_str.find("<execute_python>") :]
         | 
| 173 | 
            +
                        resp_str = description + resp_str
         | 
| 174 | 
            +
                    response["response"] = resp_str
         | 
| 175 | 
            +
                return response
         | 
| 176 | 
            +
             | 
| 177 | 
            +
             | 
| 152 178 | 
             
            class VisionAgent(Agent):
         | 
| 153 179 | 
             
                """Vision Agent is an agent that can chat with the user and call tools or other
         | 
| 154 180 | 
             
                agents to generate code for it. Vision Agent uses python code to execute actions
         | 
| @@ -335,8 +361,18 @@ class VisionAgent(Agent): | |
| 335 361 | 
             
                            response = run_conversation(self.agent, int_chat)
         | 
| 336 362 | 
             
                            if self.verbosity >= 1:
         | 
| 337 363 | 
             
                                _LOGGER.info(response)
         | 
| 338 | 
            -
                            int_chat.append( | 
| 339 | 
            -
             | 
| 364 | 
            +
                            int_chat.append(
         | 
| 365 | 
            +
                                {
         | 
| 366 | 
            +
                                    "role": "assistant",
         | 
| 367 | 
            +
                                    "content": str(add_step_descriptions(response)),
         | 
| 368 | 
            +
                                }
         | 
| 369 | 
            +
                            )
         | 
| 370 | 
            +
                            orig_chat.append(
         | 
| 371 | 
            +
                                {
         | 
| 372 | 
            +
                                    "role": "assistant",
         | 
| 373 | 
            +
                                    "content": str(add_step_descriptions(response)),
         | 
| 374 | 
            +
                                }
         | 
| 375 | 
            +
                            )
         | 
| 340 376 |  | 
| 341 377 | 
             
                            # sometimes it gets stuck in a loop, so we force it to exit
         | 
| 342 378 | 
             
                            if last_response == response:
         | 
| @@ -382,8 +418,18 @@ class VisionAgent(Agent): | |
| 382 418 |  | 
| 383 419 | 
             
                                obs_chat_elt: Message = {"role": "observation", "content": obs}
         | 
| 384 420 | 
             
                                if media_obs and result.success:
         | 
| 421 | 
            +
                                    # for view_media_artifact, we need to ensure the media is loaded
         | 
| 422 | 
            +
                                    # locally so the conversation agent can actually see it
         | 
| 423 | 
            +
                                    code_interpreter.download_file(
         | 
| 424 | 
            +
                                        str(remote_artifacts_path.name),
         | 
| 425 | 
            +
                                        str(self.local_artifacts_path),
         | 
| 426 | 
            +
                                    )
         | 
| 427 | 
            +
                                    artifacts.load(
         | 
| 428 | 
            +
                                        self.local_artifacts_path,
         | 
| 429 | 
            +
                                        Path(self.local_artifacts_path).parent,
         | 
| 430 | 
            +
                                    )
         | 
| 385 431 | 
             
                                    obs_chat_elt["media"] = [
         | 
| 386 | 
            -
                                        Path( | 
| 432 | 
            +
                                        Path(self.local_artifacts_path).parent / media_ob
         | 
| 387 433 | 
             
                                        for media_ob in media_obs
         | 
| 388 434 | 
             
                                    ]
         | 
| 389 435 |  | 
| @@ -407,6 +453,9 @@ class VisionAgent(Agent): | |
| 407 453 | 
             
                        code_interpreter.download_file(
         | 
| 408 454 | 
             
                            str(remote_artifacts_path.name), str(self.local_artifacts_path)
         | 
| 409 455 | 
             
                        )
         | 
| 456 | 
            +
                        artifacts.load(
         | 
| 457 | 
            +
                            self.local_artifacts_path, Path(self.local_artifacts_path).parent
         | 
| 458 | 
            +
                        )
         | 
| 410 459 | 
             
                    return orig_chat, artifacts
         | 
| 411 460 |  | 
| 412 461 | 
             
                def streaming_message(self, message: Dict[str, Any]) -> None:
         | 
    
        vision_agent/tools/meta_tools.py
    CHANGED
    
    | @@ -92,19 +92,26 @@ class Artifacts: | |
| 92 92 |  | 
| 93 93 | 
             
                    self.code_sandbox_runtime = None
         | 
| 94 94 |  | 
| 95 | 
            -
                def load( | 
| 96 | 
            -
                     | 
| 97 | 
            -
                     | 
| 95 | 
            +
                def load(
         | 
| 96 | 
            +
                    self,
         | 
| 97 | 
            +
                    artifacts_path: Union[str, Path],
         | 
| 98 | 
            +
                    load_to: Optional[Union[str, Path]] = None,
         | 
| 99 | 
            +
                ) -> None:
         | 
| 100 | 
            +
                    """Loads are artifacts into the load_to path. If load_to is None, it will load
         | 
| 101 | 
            +
                    into remote_save_path. If an artifact value is None it will skip loading it.
         | 
| 98 102 |  | 
| 99 103 | 
             
                    Parameters:
         | 
| 100 | 
            -
                         | 
| 104 | 
            +
                        artifacts_path (Union[str, Path]): The file path to load the artifacts from
         | 
| 101 105 | 
             
                    """
         | 
| 102 | 
            -
                    with open( | 
| 106 | 
            +
                    with open(artifacts_path, "rb") as f:
         | 
| 103 107 | 
             
                        self.artifacts = pkl.load(f)
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                    load_to = self.remote_save_path.parent if load_to is None else Path(load_to)
         | 
| 110 | 
            +
             | 
| 104 111 | 
             
                    for k, v in self.artifacts.items():
         | 
| 105 112 | 
             
                        if v is not None:
         | 
| 106 113 | 
             
                            mode = "w" if isinstance(v, str) else "wb"
         | 
| 107 | 
            -
                            with open( | 
| 114 | 
            +
                            with open(load_to / k, mode) as f:
         | 
| 108 115 | 
             
                                f.write(v)
         | 
| 109 116 |  | 
| 110 117 | 
             
                def show(self, uploaded_file_path: Optional[Union[str, Path]] = None) -> str:
         | 
    
        vision_agent/tools/tools.py
    CHANGED
    
    | @@ -700,6 +700,7 @@ def countgd_counting( | |
| 700 700 | 
             
                        {'score': 0.98, 'label': 'flower', 'bbox': [0.44, 0.24, 0.49, 0.58},
         | 
| 701 701 | 
             
                    ]
         | 
| 702 702 | 
             
                """
         | 
| 703 | 
            +
                image_size = image.shape[:2]
         | 
| 703 704 | 
             
                buffer_bytes = numpy_to_bytes(image)
         | 
| 704 705 | 
             
                files = [("image", buffer_bytes)]
         | 
| 705 706 | 
             
                prompt = prompt.replace(", ", " .")
         | 
| @@ -712,7 +713,7 @@ def countgd_counting( | |
| 712 713 | 
             
                bboxes_formatted = [
         | 
| 713 714 | 
             
                    ODResponseData(
         | 
| 714 715 | 
             
                        label=bbox["label"],
         | 
| 715 | 
            -
                        bbox= | 
| 716 | 
            +
                        bbox=normalize_bbox(bbox["bounding_box"], image_size),
         | 
| 716 717 | 
             
                        score=round(bbox["score"], 2),
         | 
| 717 718 | 
             
                    )
         | 
| 718 719 | 
             
                    for bbox in bboxes_per_frame
         | 
| @@ -757,6 +758,7 @@ def countgd_example_based_counting( | |
| 757 758 | 
             
                        {'score': 0.98, 'label': 'object', 'bounding_box': [0.44, 0.24, 0.49, 0.58},
         | 
| 758 759 | 
             
                    ]
         | 
| 759 760 | 
             
                """
         | 
| 761 | 
            +
                image_size = image.shape[:2]
         | 
| 760 762 | 
             
                buffer_bytes = numpy_to_bytes(image)
         | 
| 761 763 | 
             
                files = [("image", buffer_bytes)]
         | 
| 762 764 | 
             
                visual_prompts = [
         | 
| @@ -771,7 +773,7 @@ def countgd_example_based_counting( | |
| 771 773 | 
             
                bboxes_formatted = [
         | 
| 772 774 | 
             
                    ODResponseData(
         | 
| 773 775 | 
             
                        label=bbox["label"],
         | 
| 774 | 
            -
                        bbox= | 
| 776 | 
            +
                        bbox=normalize_bbox(bbox["bounding_box"], image_size),
         | 
| 775 777 | 
             
                        score=round(bbox["score"], 2),
         | 
| 776 778 | 
             
                    )
         | 
| 777 779 | 
             
                    for bbox in bboxes_per_frame
         | 
| @@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57 | |
| 2 2 | 
             
            vision_agent/agent/__init__.py,sha256=NF2LABqHixLvbsOIO-fe-VKZ7awvShLtcT0oQT4eWtI,235
         | 
| 3 3 | 
             
            vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
         | 
| 4 4 | 
             
            vision_agent/agent/agent_utils.py,sha256=eIpLz2NunEqEsBBrECJaD34-2uY0bsFNnW-XKfqqohs,2518
         | 
| 5 | 
            -
            vision_agent/agent/vision_agent.py,sha256= | 
| 5 | 
            +
            vision_agent/agent/vision_agent.py,sha256=etqyLMZHJJz_A6tkonoYGlYvFvEW0uUHs5D1gsYwkSs,20412
         | 
| 6 6 | 
             
            vision_agent/agent/vision_agent_coder.py,sha256=2ZoGikn2nakGDfs20XRshZjQUyvbw6l47UhExJAYkqI,38515
         | 
| 7 7 | 
             
            vision_agent/agent/vision_agent_coder_prompts.py,sha256=BmbTMhth4v1qLexuoSeyo47QQ0kPQvL1pLbCJHMsWDw,18910
         | 
| 8 8 | 
             
            vision_agent/agent/vision_agent_prompts.py,sha256=LZ9Bnx7ZFkqbNOMqwfdiWZU4niND9Z1ArcFHNSn_jzA,11187
         | 
| @@ -15,10 +15,10 @@ vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg, | |
| 15 15 | 
             
            vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
         | 
| 16 16 | 
             
            vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
         | 
| 17 17 | 
             
            vision_agent/tools/__init__.py,sha256=PLVbfTMjKxQlHIRWnq9b785W9a52AXQS_tOa0tkQ0ZY,2420
         | 
| 18 | 
            -
            vision_agent/tools/meta_tools.py,sha256= | 
| 18 | 
            +
            vision_agent/tools/meta_tools.py,sha256=VKvrGgd_uvB8nEGTfouz8ij9MKJJh9G5bOg4mVMSrqY,25418
         | 
| 19 19 | 
             
            vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
         | 
| 20 20 | 
             
            vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
         | 
| 21 | 
            -
            vision_agent/tools/tools.py,sha256= | 
| 21 | 
            +
            vision_agent/tools/tools.py,sha256=vS1yCk3Fza9eYOTHPFwwroo_ULdw2ztMQMb81x1U5f8,78524
         | 
| 22 22 | 
             
            vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
         | 
| 23 23 | 
             
            vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
         | 
| 24 24 | 
             
            vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
         | 
| @@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd | |
| 27 27 | 
             
            vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
         | 
| 28 28 | 
             
            vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
         | 
| 29 29 | 
             
            vision_agent/utils/video.py,sha256=xbMEoRk13l4fHeQlbvMQhLCn8RNndYmsDhUf01TUeR8,4781
         | 
| 30 | 
            -
            vision_agent-0.2. | 
| 31 | 
            -
            vision_agent-0.2. | 
| 32 | 
            -
            vision_agent-0.2. | 
| 33 | 
            -
            vision_agent-0.2. | 
| 30 | 
            +
            vision_agent-0.2.159.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
         | 
| 31 | 
            +
            vision_agent-0.2.159.dist-info/METADATA,sha256=iKyw9w-VOAaZ2EqPJmRozZ8J8QP0DR87gog3HeJ3mcc,17753
         | 
| 32 | 
            +
            vision_agent-0.2.159.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
         | 
| 33 | 
            +
            vision_agent-0.2.159.dist-info/RECORD,,
         | 
| 
            File without changes
         | 
| 
            File without changes
         |