PyPI - vision-agent - Versions diffs - 0.2.200__py3-none-any.whl → 0.2.201__py3-none-any.whl - Mend

vision-agent 0.2.200py3-none-any.whl → 0.2.201py3-none-any.whl

Files changed (8) hide show

vision_agent/agent/vision_agent.py CHANGED Viewed

@@ -36,14 +36,10 @@ class BoilerplateCode:
     pre_code = [
         "from typing import *",
         "from vision_agent.utils.execute import CodeInterpreter",
-        "from vision_agent.tools.meta_tools import Artifacts, open_code_artifact, create_code_artifact, edit_code_artifact, get_tool_descriptions, generate_vision_code, edit_vision_code, view_media_artifact, object_detection_fine_tuning, use_object_detection_fine_tuning, list_artifacts, capture_files_into_artifacts",
-        "artifacts = Artifacts('{remote_path}', '{remote_path}')",
-        "artifacts.load('{remote_path}')",
-    ]
-    post_code = [
-        "capture_files_into_artifacts(artifacts)",
-        "artifacts.save()",
+        "from vision_agent.tools.meta_tools import Artifacts, open_code_artifact, create_code_artifact, edit_code_artifact, get_tool_descriptions, generate_vision_code, edit_vision_code, view_media_artifact, object_detection_fine_tuning, use_object_detection_fine_tuning, list_artifacts",
+        "artifacts = Artifacts('{cwd}')",
     ]
+    post_code: List[str] = []
     @staticmethod
     def add_boilerplate(code: str, **format: Any) -> str:
@@ -149,9 +145,7 @@ def execute_code_action(
     code_interpreter: CodeInterpreter,
 ) -> Tuple[Execution, str]:
     result = code_interpreter.exec_isolation(
-        BoilerplateCode.add_boilerplate(
-            code, remote_path=str(artifacts.remote_save_path)
-        )
+        BoilerplateCode.add_boilerplate(code, cwd=str(artifacts.cwd))
     )
     obs = str(result.logs)
@@ -212,19 +206,6 @@ def add_step_descriptions(response: Dict[str, Any]) -> Dict[str, Any]:
     return response
-def setup_artifacts() -> Artifacts:
-    # this is setting remote artifacts path
-    sandbox = os.environ.get("CODE_SANDBOX_RUNTIME", None)
-    if sandbox is None or sandbox == "local":
-        remote = WORKSPACE / "artifacts.pkl"
-    elif sandbox == "e2b":
-        remote = Path("/home/user/artifacts.pkl")
-    else:
-        raise ValueError(f"Unknown code sandbox runtime {sandbox}")
-    artifacts = Artifacts(remote, Path(os.getcwd()) / "artifacts.pkl")
-    return artifacts
 def new_format_to_old_format(new_format: Dict[str, Any]) -> Dict[str, Any]:
     thoughts = new_format["thinking"] if new_format["thinking"] is not None else ""
     response = new_format["response"] if new_format["response"] is not None else ""
@@ -297,9 +278,10 @@ class VisionAgent(Agent):
     def __init__(
         self,
         agent: Optional[LMM] = None,
+        cwd: Optional[Union[Path, str]] = None,
         verbosity: int = 0,
         callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
-        code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
+        code_sandbox_runtime: Optional[str] = None,
     ) -> None:
         """Initialize the VisionAgent.
@@ -317,9 +299,10 @@ class VisionAgent(Agent):
         self.agent = AnthropicLMM(temperature=0.0) if agent is None else agent
         self.max_iterations = 12
+        self.cwd = Path(cwd) if cwd is not None else Path.cwd()
         self.verbosity = verbosity
-        self.code_interpreter = code_interpreter
         self.callback_message = callback_message
+        self.code_sandbox_runtime = code_sandbox_runtime
         if self.verbosity >= 1:
             _LOGGER.setLevel(logging.INFO)
@@ -397,40 +380,21 @@ class VisionAgent(Agent):
             raise ValueError("chat cannot be empty")
         if not artifacts:
-            artifacts = setup_artifacts()
-        # NOTE: each chat should have a dedicated code interpreter instance to avoid concurrency issues
-        code_interpreter = (
-            self.code_interpreter
-            if self.code_interpreter is not None
-            and not isinstance(self.code_interpreter, str)
-            else CodeInterpreterFactory.new_instance(
-                code_sandbox_runtime=self.code_interpreter,
-                remote_path=artifacts.remote_save_path.parent,
-            )
-        )
+            artifacts = Artifacts(self.cwd)
-        if code_interpreter.remote_path != artifacts.remote_save_path.parent:
-            raise ValueError(
-                f"Code interpreter remote path {code_interpreter.remote_path} does not match artifacts remote path {artifacts.remote_save_path.parent}"
-            )
-        with code_interpreter:
+        with CodeInterpreterFactory.new_instance(
+            code_sandbox_runtime=self.code_sandbox_runtime,
+            remote_path=self.cwd,
+        ) as code_interpreter:
             orig_chat = copy.deepcopy(chat)
             int_chat = copy.deepcopy(chat)
             last_user_message = chat[-1]
-            media_list = []
             for chat_i in int_chat:
                 if "media" in chat_i:
                     for media in chat_i["media"]:
                         media = cast(str, media)
-                        artifacts.artifacts[Path(media).name] = open(media, "rb").read()
-                        media_remote_path = (
-                            Path(artifacts.remote_save_path.parent) / Path(media).name
-                        )
+                        media_remote_path = Path(artifacts.cwd) / Path(media).name
                         chat_i["content"] += f" Media name {media_remote_path}"  # type: ignore
-                        media_list.append(media_remote_path)
             int_chat = cast(
                 List[Message],
@@ -452,15 +416,10 @@ class VisionAgent(Agent):
             iterations = 0
             last_response = None
-            # Save the current state of artifacts, will include any images the user
-            # passed in.
-            artifacts.save()
             # Upload artifacts to remote location and show where they are going
             # to be loaded to. The actual loading happens in BoilerplateCode as
             # part of the pre_code.
-            code_interpreter.upload_file(artifacts.local_save_path)
-            artifacts_loaded = artifacts.show(artifacts.remote_save_path.parent)
+            artifacts_loaded = artifacts.show()
             int_chat.append({"role": "observation", "content": artifacts_loaded})
             orig_chat.append({"role": "observation", "content": artifacts_loaded})
             self.streaming_message({"role": "observation", "content": artifacts_loaded})
@@ -487,10 +446,6 @@ class VisionAgent(Agent):
                 )
             while not finished and iterations < self.max_iterations:
-                # ensure we upload the artifacts before each turn, so any local
-                # modifications we made to it will be reflected in the remote
-                code_interpreter.upload_file(artifacts.local_save_path)
                 response = run_conversation(self.agent, int_chat)
                 if self.verbosity >= 1:
                     _LOGGER.info(response)
@@ -555,11 +510,8 @@ class VisionAgent(Agent):
                     obs_chat_elt: Message = {"role": "observation", "content": obs}
                     media_obs = check_and_load_image(code_action)
                     if media_obs and result.success:
-                        # media paths will be under the local_save_path when we download
-                        # them after each turn
                         obs_chat_elt["media"] = [
-                            artifacts.local_save_path.parent / media_ob
-                            for media_ob in media_obs
+                            artifacts.cwd / media_ob for media_ob in media_obs
                         ]
                     if self.verbosity >= 1:
@@ -581,15 +533,6 @@ class VisionAgent(Agent):
                 iterations += 1
                 last_response = response
-                # after each turn, download the artifacts locally
-                code_interpreter.download_file(
-                    str(artifacts.remote_save_path.name),
-                    str(artifacts.local_save_path),
-                )
-                artifacts.load(
-                    artifacts.local_save_path, artifacts.local_save_path.parent
-                )
         return orig_chat, artifacts
     def streaming_message(self, message: Dict[str, Any]) -> None:
@@ -604,9 +547,9 @@ class OpenAIVisionAgent(VisionAgent):
     def __init__(
         self,
         agent: Optional[LMM] = None,
+        cwd: Optional[Union[Path, str]] = None,
         verbosity: int = 0,
         callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
-        code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
     ) -> None:
         """Initialize the VisionAgent using OpenAI LMMs.
@@ -625,9 +568,9 @@ class OpenAIVisionAgent(VisionAgent):
         agent = OpenAILMM(temperature=0.0, json_mode=True) if agent is None else agent
         super().__init__(
             agent,
+            cwd,
             verbosity,
             callback_message,
-            code_interpreter,
         )
@@ -635,9 +578,9 @@ class AnthropicVisionAgent(VisionAgent):
     def __init__(
         self,
         agent: Optional[LMM] = None,
+        cwd: Optional[Union[Path, str]] = None,
         verbosity: int = 0,
         callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
-        code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
     ) -> None:
         """Initialize the VisionAgent using Anthropic LMMs.
@@ -656,7 +599,7 @@ class AnthropicVisionAgent(VisionAgent):
         agent = AnthropicLMM(temperature=0.0) if agent is None else agent
         super().__init__(
             agent,
+            cwd,
             verbosity,
             callback_message,
-            code_interpreter,
         )

vision_agent/agent/vision_agent_coder.py CHANGED Viewed

@@ -450,12 +450,6 @@ class VisionAgentCoder(Agent):
             for chat_i in chat:
                 if "media" in chat_i:
                     for media in chat_i["media"]:
-                        media = (
-                            media
-                            if type(media) is str
-                            and media.startswith(("http", "https"))
-                            else code_interpreter.upload_file(cast(str, media))
-                        )
                         chat_i["content"] += f" Media name {media}"  # type: ignore
                         media_list.append(str(media))

vision_agent/agent/vision_agent_planner.py CHANGED Viewed

@@ -391,12 +391,6 @@ class VisionAgentPlanner(Agent):
             for chat_i in chat:
                 if "media" in chat_i:
                     for media in chat_i["media"]:
-                        media = (
-                            media
-                            if type(media) is str
-                            and media.startswith(("http", "https"))
-                            else code_interpreter.upload_file(cast(str, media))
-                        )
                         chat_i["content"] += f" Media name {media}"  # type: ignore
                         media_list.append(str(media))

vision_agent/tools/meta_tools.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import difflib
 import json
 import os
-import pickle as pkl
 import re
 import subprocess
 import tempfile
@@ -73,95 +72,41 @@ class Artifacts:
     need to be in sync with the remote environment the VisionAgent is running in.
     """
-    def __init__(
-        self, remote_save_path: Union[str, Path], local_save_path: Union[str, Path]
-    ) -> None:
+    def __init__(self, cwd: Union[str, Path]) -> None:
         """Initializes the Artifacts object with it's remote and local save paths.
         Parameters:
-            remote_save_path (Union[str, Path]): The path to save the artifacts in the
-                remote environment. For example "/home/user/artifacts.pkl".
-            local_save_path (Union[str, Path]): The path to save the artifacts in the
-                local environment. For example "/Users/my_user/workspace/artifacts.pkl".
+            cwd (Union[str, Path]): The path to save all the chat related files. For example "/home/user/chat_abc/".
         """
-        self.remote_save_path = Path(remote_save_path)
-        self.local_save_path = Path(local_save_path)
-        self.artifacts: Dict[str, Any] = {}
+        self.cwd = Path(cwd)
-        self.code_sandbox_runtime = None
-    def load(
-        self,
-        artifacts_path: Union[str, Path],
-        load_to_dir: Optional[Union[str, Path]] = None,
-    ) -> None:
-        """Loads are artifacts into the load_to_dir directory. If load_to_dir is None,
-        it will load into remote_save_path directory. If an artifact value is None it
-        will skip loading it.
-        Parameters:
-            artifacts_path (Union[str, Path]): The file path to load the artifacts from.
-                If you are in the remote environment this would be remote_save_path, if
-                you are in the local environment this would be local_save_path.
-            load_to_dir (Optional[Union[str, Path]): The directory to load the artifacts
-                into. If None, it will load into remote_save_path directory.
-        """
-        with open(artifacts_path, "rb") as f:
-            self.artifacts = pkl.load(f)
-        load_to_dir = (
-            self.remote_save_path.parent if load_to_dir is None else Path(load_to_dir)
-        )
-        for k, v in self.artifacts.items():
-            if v is not None:
-                mode = "w" if isinstance(v, str) else "wb"
-                with open(load_to_dir / k, mode) as f:
-                    f.write(v)
-    def show(self, uploaded_file_dir: Optional[Union[str, Path]] = None) -> str:
-        """Prints out the artifacts and the directory they have been loaded to. If you
-        pass in upload_file_dir, it will show the artifacts have been loaded to the
-        upload_file_dir directory. If you don't pass in upload_file_dir, it will show
-        the artifacts have been loaded to the remote_save_path directory.
-        Parameters:
-            uploaded_file_dir (Optional[Union[str, Path]): The directory the artifacts
-                have been loaded to.
-        """
-        loaded_path = (
-            Path(uploaded_file_dir)
-            if uploaded_file_dir is not None
-            else self.remote_save_path.parent
-        )
+    def show(self) -> str:
+        """Prints out all the files in the curret working directory"""
         output_str = "[Artifacts loaded]\n"
-        for k in self.artifacts.keys():
-            output_str += (
-                f"Artifact name: {k}, loaded to path: {str(loaded_path / k)}\n"
-            )
+        for k in self:
+            output_str += f"Artifact name: {k}, loaded to path: {str(self.cwd / k)}\n"
         output_str += "[End of artifacts]\n"
         print(output_str)
         return output_str
-    def save(self, local_path: Optional[Union[str, Path]] = None) -> None:
-        """Saves the artifacts to the local_save_path directory. If local_path is None,
-        it will save to the local_save_path directory.
-        """
-        save_path = Path(local_path) if local_path is not None else self.local_save_path
-        with open(save_path, "wb") as f:
-            pkl.dump(self.artifacts, f)
     def __iter__(self) -> Any:
-        return iter(self.artifacts)
+        return iter(os.listdir(self.cwd))
     def __getitem__(self, name: str) -> Any:
-        return self.artifacts[name]
+        file_path = self.cwd / name
+        if file_path.exists():
+            with open(file_path, "r") as file:
+                return file.read()
+        else:
+            raise KeyError(f"File '{name}' not found in artifacts")
     def __setitem__(self, name: str, value: Any) -> None:
-        self.artifacts[name] = value
+        file_path = self.cwd / name
+        with open(file_path, "w") as file:
+            file.write(value)
     def __contains__(self, name: str) -> bool:
-        return name in self.artifacts
+        return name in os.listdir(self.cwd)
 def filter_file(file_name: Union[str, Path]) -> Tuple[bool, bool]:
@@ -175,27 +120,6 @@ def filter_file(file_name: Union[str, Path]) -> Tuple[bool, bool]:
     ), file_name_p.suffix in [".png", ".jpeg", ".jpg", ".mp4"]
-def capture_files_into_artifacts(artifacts: Artifacts) -> None:
-    """This function is used to capture all files in the current directory into an
-    artifact object. This is useful if you want to capture all files in the current
-    directory and use them in a different environment where you don't have access to
-    the file system.
-    Parameters:
-        artifact (Artifacts): The artifact object to save the files to.
-    """
-    for file in Path(".").glob("**/*"):
-        usable_file, is_media = filter_file(file)
-        mode = "rb" if is_media else "r"
-        if usable_file:
-            file_name = file.name
-            if file_name.startswith(str(Path(artifacts.remote_save_path).parents)):
-                idx = len(Path(artifacts.remote_save_path).parents)
-                file_name = file_name[idx:]
-            with open(file, mode) as f:
-                artifacts[file_name] = f.read()
 # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent

{vision_agent-0.2.200.dist-info → vision_agent-0.2.201.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.200
+Version: 0.2.201
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.200.dist-info → vision_agent-0.2.201.dist-info}/RECORD RENAMED Viewed

@@ -5,12 +5,12 @@ vision_agent/agent/__init__.py,sha256=M8CffavdIh8Zh-skznLHIaQkYGCGK7vk4dq1FaVkbs
 vision_agent/agent/agent.py,sha256=sf8JcA3LNy_4GaS_gQb2Q-PXkl4dBuGh-7raI9KAtZo,1470
 vision_agent/agent/agent_utils.py,sha256=NmrqjhSb6fpnrB8XGWtaywZjr9n89otusOZpcbWLf9k,13534
 vision_agent/agent/types.py,sha256=aAd_ez1-NQh04k27cmywyOV2uA_vWWYE-Ok7zq_JoAk,1532
-vision_agent/agent/vision_agent.py,sha256=rr1P9iTbr7OsjgMYWCeIxQYI4cLwPWia3NIMJNi-9Yo,26110
-vision_agent/agent/vision_agent_coder.py,sha256=waCmw_NTgsy9G-UqlRZFhsFJJVuWVrjxVnShe4Xp_lI,27743
+vision_agent/agent/vision_agent.py,sha256=I75bEU-os9Lf9OSICKfvQ_H_ftg-zOwgTwWnu41oIdo,23555
+vision_agent/agent/vision_agent_coder.py,sha256=ANwUuCO4JpTYJs4s6ynSRFcdjZFUVuSoSfcqp8ZQDDQ,27451
 vision_agent/agent/vision_agent_coder_prompts.py,sha256=gPLVXQMNSzYnQYpNm0wlH_5FPkOTaFDV24bqzK3jQ40,12221
 vision_agent/agent/vision_agent_coder_prompts_v2.py,sha256=9v5HwbNidSzYUEFl6ZMniWWOmyLITM_moWLtKVaTen8,4845
 vision_agent/agent/vision_agent_coder_v2.py,sha256=SVIJC0N5TBgq9z-F99UebLimRuQuAe_HHvTFupBzVfo,14715
-vision_agent/agent/vision_agent_planner.py,sha256=F_5opnc0XmQmNH40rs2T7DFrai4CC6aDYe02Z8e93AM,18875
+vision_agent/agent/vision_agent_planner.py,sha256=KWMA7XemcSmc_jn-MwdWz9wnKDtj-sYQ9tINi70_OoU,18583
 vision_agent/agent/vision_agent_planner_prompts.py,sha256=Y3jz9HRf8fz9NLUseN7cTgZqewP0RazxR7vw1sPhcn0,6691
 vision_agent/agent/vision_agent_planner_prompts_v2.py,sha256=Tzon3h5iZdHJglesk8GVS-2myNf5-fhf7HUbkpZWHQk,33143
 vision_agent/agent/vision_agent_planner_v2.py,sha256=mxQxD_B8sKYharh8e7W0uc1tN11YCztyLowc83seScc,17023
@@ -26,7 +26,7 @@ vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,
 vision_agent/lmm/lmm.py,sha256=x_nIyDNDZwq4-pfjnJTmcyyJZ2_B7TjkA5jZp88YVO8,17103
 vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
 vision_agent/tools/__init__.py,sha256=xuNt5e4syQH28Vr6EdjLmO9ni9i00yav9yqcPMUx1oo,2878
-vision_agent/tools/meta_tools.py,sha256=by7TIbH7lsLIayX_Pe2mS1iw8aeLn2T8yqAo8SkB9Kg,32074
+vision_agent/tools/meta_tools.py,sha256=TPeS7QWnc_PmmU_ndiDT03dXbQ5yDSP33E7U8cSj7Ls,28660
 vision_agent/tools/planner_tools.py,sha256=FROahw_6Taqvytv6pOjCHUEypOfjsi_f8Vo1c5vz6Mw,8823
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
 vision_agent/tools/tool_utils.py,sha256=GDGOmBCo4UfYz-DJ-olREJHPsqs5mzHu0YXiAnpNE8E,10179
@@ -39,7 +39,7 @@ vision_agent/utils/image_utils.py,sha256=rRWcxKggPXIRXIY_XT9rZt30ECDRq8zq7FDeXRD
 vision_agent/utils/sim.py,sha256=NZc9QGD6BTY5O29NVbHH7oxDePL_QMnylT1lYcDUn1Y,7437
 vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
 vision_agent/utils/video.py,sha256=tRcGp4vEnaDycigL1hBO9k0FBPtDH35fCQciVr9GqYI,6013
-vision_agent-0.2.200.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.200.dist-info/METADATA,sha256=goRTW73tD79-UlJiy4cL0twnVYm9iSjU9f5HsC4A1ZI,19026
-vision_agent-0.2.200.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.200.dist-info/RECORD,,
+vision_agent-0.2.201.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.201.dist-info/METADATA,sha256=Vbdn9gqa9uz0RTRV9SMvNgPQbqLGmgQJKUtuEe1buI0,19026
+vision_agent-0.2.201.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.201.dist-info/RECORD,,

{vision_agent-0.2.200.dist-info → vision_agent-0.2.201.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.200.dist-info → vision_agent-0.2.201.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.200__py3-none-any.whl → 0.2.201__py3-none-any.whl

vision-agent 0.2.200py3-none-any.whl → 0.2.201py3-none-any.whl