PyPI - vision-agent - Versions diffs - 0.2.173__py3-none-any.whl → 0.2.175__py3-none-any.whl - Mend

vision-agent 0.2.173py3-none-any.whl → 0.2.175py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

vision_agent/agent/agent_utils.py +3 -1
vision_agent/agent/vision_agent.py +63 -68
vision_agent/agent/vision_agent_coder.py +101 -31
vision_agent/agent/vision_agent_prompts.py +75 -14
vision_agent/tools/meta_tools.py +141 -111
vision_agent/utils/execute.py +2 -2
{vision_agent-0.2.173.dist-info → vision_agent-0.2.175.dist-info}/METADATA +2 -2
{vision_agent-0.2.173.dist-info → vision_agent-0.2.175.dist-info}/RECORD +10 -10
{vision_agent-0.2.173.dist-info → vision_agent-0.2.175.dist-info}/LICENSE +0 -0
{vision_agent-0.2.173.dist-info → vision_agent-0.2.175.dist-info}/WHEEL +0 -0

vision_agent/agent/agent_utils.py CHANGED Viewed

@@ -72,7 +72,9 @@ def extract_json(json_str: str) -> Dict[str, Any]:
         if json_dict is None:
             error_msg = f"Could not extract JSON from the given str: {json_orig}"
             _LOGGER.exception(error_msg)
-            raise ValueError(error_msg)
+            raise json.JSONDecodeError(
+                msg="Could not extract JSON", doc=json_orig, pos=0
+            )
         return json_dict

vision_agent/agent/vision_agent.py CHANGED Viewed

@@ -2,7 +2,6 @@ import copy
 import json
 import logging
 import os
-import tempfile
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
@@ -12,6 +11,7 @@ from vision_agent.agent.vision_agent_prompts import (
     EXAMPLES_CODE1,
     EXAMPLES_CODE2,
     EXAMPLES_CODE3,
+    EXAMPLES_CODE3_EXTRA2,
     VA_CODE,
 )
 from vision_agent.lmm import LMM, AnthropicLMM, Message, OpenAILMM
@@ -19,7 +19,6 @@ from vision_agent.tools.meta_tools import (
     META_TOOL_DOCSTRING,
     Artifacts,
     check_and_load_image,
-    extract_and_save_files_to_artifacts,
     use_extra_vision_agent_args,
 )
 from vision_agent.utils import CodeInterpreterFactory
@@ -37,11 +36,12 @@ class BoilerplateCode:
     pre_code = [
         "from typing import *",
         "from vision_agent.utils.execute import CodeInterpreter",
-        "from vision_agent.tools.meta_tools import Artifacts, open_code_artifact, create_code_artifact, edit_code_artifact, get_tool_descriptions, generate_vision_code, edit_vision_code, view_media_artifact, object_detection_fine_tuning, use_object_detection_fine_tuning",
-        "artifacts = Artifacts('{remote_path}')",
+        "from vision_agent.tools.meta_tools import Artifacts, open_code_artifact, create_code_artifact, edit_code_artifact, get_tool_descriptions, generate_vision_code, edit_vision_code, view_media_artifact, object_detection_fine_tuning, use_object_detection_fine_tuning, list_artifacts, capture_files_into_artifacts",
+        "artifacts = Artifacts('{remote_path}', '{remote_path}')",
         "artifacts.load('{remote_path}')",
     ]
     post_code = [
+        "capture_files_into_artifacts(artifacts)",
         "artifacts.save()",
     ]
@@ -97,8 +97,9 @@ def _clean_response(response: str) -> str:
 def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
     chat = copy.deepcopy(chat)
+    # only add 10 most recent messages in the chat to not go over token limit
     conversation = ""
-    for chat_i in chat:
+    for chat_i in chat[-10:]:
         if chat_i["role"] == "user":
             conversation += f"USER: {chat_i['content']}\n\n"
         elif chat_i["role"] == "observation":
@@ -110,7 +111,7 @@ def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
     prompt = VA_CODE.format(
         documentation=META_TOOL_DOCSTRING,
-        examples=f"{EXAMPLES_CODE1}\n{EXAMPLES_CODE2}\n{EXAMPLES_CODE3}",
+        examples=f"{EXAMPLES_CODE1}\n{EXAMPLES_CODE2}\n{EXAMPLES_CODE3}\n{EXAMPLES_CODE3_EXTRA2}",
         conversation=conversation,
     )
     message: Message = {"role": "user", "content": prompt}
@@ -120,7 +121,9 @@ def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
         and "media" in chat[-1]
         and len(chat[-1]["media"]) > 0  # type: ignore
     ):
-        message["media"] = chat[-1]["media"]
+        media_obs = [media for media in chat[-1]["media"] if Path(media).exists()]  # type: ignore
+        if len(media_obs) > 0:
+            message["media"] = media_obs  # type: ignore
     conv_resp = cast(str, orch([message], stream=False))
     # clean the response first, if we are executing code, do not resond or end
@@ -144,16 +147,16 @@ def execute_code_action(
     artifacts: Artifacts,
     code: str,
     code_interpreter: CodeInterpreter,
-    artifact_remote_path: str,
 ) -> Tuple[Execution, str]:
     result = code_interpreter.exec_isolation(
-        BoilerplateCode.add_boilerplate(code, remote_path=artifact_remote_path)
+        BoilerplateCode.add_boilerplate(
+            code, remote_path=str(artifacts.remote_save_path)
+        )
     )
     obs = str(result.logs)
     if result.error:
         obs += f"\n{result.error}"
-    extract_and_save_files_to_artifacts(artifacts, code, obs)
     return result, obs
@@ -161,7 +164,6 @@ def execute_user_code_action(
     artifacts: Artifacts,
     last_user_message: Message,
     code_interpreter: CodeInterpreter,
-    artifact_remote_path: str,
 ) -> Tuple[Optional[Execution], Optional[str]]:
     user_result = None
     user_obs = None
@@ -178,11 +180,10 @@ def execute_user_code_action(
     if user_code_action is not None:
         user_code_action = use_extra_vision_agent_args(user_code_action, False)
         user_result, user_obs = execute_code_action(
-            artifacts, user_code_action, code_interpreter, artifact_remote_path
+            artifacts, user_code_action, code_interpreter
         )
         if user_result.error:
             user_obs += f"\n{user_result.error}"
-        extract_and_save_files_to_artifacts(artifacts, user_code_action, user_obs)
     return user_result, user_obs
@@ -231,9 +232,18 @@ def old_format_to_new_format(old_format_str: str) -> str:
     except json.JSONDecodeError:
         return old_format_str
-    thinking = old_format["thoughts"] if old_format["thoughts"].strip() != "" else None
-    let_user_respond = old_format["let_user_respond"]
-    if "<execute_python>" in old_format["response"]:
+    if "thoughts" in old_format:
+        thinking = (
+            old_format["thoughts"] if old_format["thoughts"].strip() != "" else None
+        )
+    else:
+        thinking = None
+    let_user_respond = (
+        old_format["let_user_respond"] if "let_user_respond" in old_format else True
+    )
+    if "response" in old_format and "<execute_python>" in old_format["response"]:
         execute_python = extract_tag(old_format["response"], "execute_python")
         response = (
             old_format["response"]
@@ -244,7 +254,7 @@ def old_format_to_new_format(old_format_str: str) -> str:
         )
     else:
         execute_python = None
-        response = old_format["response"]
+        response = old_format["response"] if "response" in old_format else None
     return json.dumps(
         {
@@ -275,7 +285,6 @@ class VisionAgent(Agent):
         self,
         agent: Optional[LMM] = None,
         verbosity: int = 0,
-        local_artifacts_path: Optional[Union[str, Path]] = None,
         callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
         code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
     ) -> None:
@@ -285,8 +294,6 @@ class VisionAgent(Agent):
             agent (Optional[LMM]): The agent to use for conversation and orchestration
                 of other agents.
             verbosity (int): The verbosity level of the agent.
-            local_artifacts_path (Optional[Union[str, Path]]): The path to the local
-                artifacts file.
             callback_message (Optional[Callable[[Dict[str, Any]], None]]): Callback
                 function to send intermediate update messages.
             code_interpreter (Optional[Union[str, CodeInterpreter]]): For string values
@@ -302,14 +309,6 @@ class VisionAgent(Agent):
         self.callback_message = callback_message
         if self.verbosity >= 1:
             _LOGGER.setLevel(logging.INFO)
-        self.local_artifacts_path = cast(
-            str,
-            (
-                Path(local_artifacts_path)
-                if local_artifacts_path is not None
-                else Path(tempfile.NamedTemporaryFile(delete=False).name)
-            ),
-        )
     def __call__(
         self,
@@ -386,7 +385,7 @@ class VisionAgent(Agent):
         if not artifacts:
             # this is setting remote artifacts path
-            artifacts = Artifacts(WORKSPACE / "artifacts.pkl")
+            artifacts = Artifacts("", "")
         # NOTE: each chat should have a dedicated code interpreter instance to avoid concurrency issues
         code_interpreter = (
@@ -395,8 +394,15 @@ class VisionAgent(Agent):
             and not isinstance(self.code_interpreter, str)
             else CodeInterpreterFactory.new_instance(
                 code_sandbox_runtime=self.code_interpreter,
+                remote_path=artifacts.remote_save_path.parent,
             )
         )
+        if code_interpreter.remote_path != artifacts.remote_save_path.parent:
+            raise ValueError(
+                f"Code interpreter remote path {code_interpreter.remote_path} does not match {artifacts.remote_save_path.parent}"
+            )
         with code_interpreter:
             orig_chat = copy.deepcopy(chat)
             int_chat = copy.deepcopy(chat)
@@ -436,15 +442,13 @@ class VisionAgent(Agent):
             # Save the current state of artifacts, will include any images the user
             # passed in.
-            artifacts.save(self.local_artifacts_path)
+            artifacts.save()
             # Upload artifacts to remote location and show where they are going
             # to be loaded to. The actual loading happens in BoilerplateCode as
             # part of the pre_code.
-            remote_artifacts_path = code_interpreter.upload_file(
-                self.local_artifacts_path
-            )
-            artifacts_loaded = artifacts.show(code_interpreter.remote_path)
+            code_interpreter.upload_file(artifacts.local_save_path)
+            artifacts_loaded = artifacts.show(artifacts.remote_save_path.parent)
             int_chat.append({"role": "observation", "content": artifacts_loaded})
             orig_chat.append({"role": "observation", "content": artifacts_loaded})
             self.streaming_message({"role": "observation", "content": artifacts_loaded})
@@ -453,7 +457,6 @@ class VisionAgent(Agent):
                 artifacts,
                 last_user_message,
                 code_interpreter,
-                str(remote_artifacts_path),
             )
             finished = user_result is not None and user_obs is not None
             if user_result is not None and user_obs is not None:
@@ -472,7 +475,16 @@ class VisionAgent(Agent):
                 )
             while not finished and iterations < self.max_iterations:
+                # ensure we upload the artifacts before each turn, so any local
+                # modifications we made to it will be reflected in the remote
+                code_interpreter.upload_file(artifacts.local_save_path)
                 response = run_conversation(self.agent, int_chat)
+                code_action = use_extra_vision_agent_args(
+                    response.get("execute_python", None),
+                    test_multi_plan,
+                    custom_tool_names,
+                )
                 if self.verbosity >= 1:
                     _LOGGER.info(response)
                 int_chat.append(
@@ -532,31 +544,20 @@ class VisionAgent(Agent):
                         artifacts,
                         code_action,
                         code_interpreter,
-                        str(remote_artifacts_path),
                     )
-                    media_obs = check_and_load_image(code_action)
-                    if self.verbosity >= 1:
-                        _LOGGER.info(obs)
                     obs_chat_elt: Message = {"role": "observation", "content": obs}
+                    media_obs = check_and_load_image(code_action)
                     if media_obs and result.success:
-                        # for view_media_artifact, we need to ensure the media is loaded
-                        # locally so the conversation agent can actually see it
-                        code_interpreter.download_file(
-                            str(remote_artifacts_path.name),
-                            str(self.local_artifacts_path),
-                        )
-                        artifacts.load(
-                            self.local_artifacts_path,
-                            Path(self.local_artifacts_path).parent,
-                        )
+                        # media paths will be under the local_save_path when we download
+                        # them after each turn
                         obs_chat_elt["media"] = [
-                            Path(self.local_artifacts_path).parent / media_ob
+                            artifacts.local_save_path.parent / media_ob
                             for media_ob in media_obs
                         ]
+                    if self.verbosity >= 1:
+                        _LOGGER.info(obs)
                     # don't add execution results to internal chat
                     int_chat.append(obs_chat_elt)
                     obs_chat_elt["execution"] = result
@@ -573,13 +574,15 @@ class VisionAgent(Agent):
                 iterations += 1
                 last_response = response
-            # after running the agent, download the artifacts locally
-            code_interpreter.download_file(
-                str(remote_artifacts_path.name), str(self.local_artifacts_path)
-            )
-            artifacts.load(
-                self.local_artifacts_path, Path(self.local_artifacts_path).parent
-            )
+                # after each turn, download the artifacts locally
+                code_interpreter.download_file(
+                    str(artifacts.remote_save_path.name),
+                    str(artifacts.local_save_path),
+                )
+                artifacts.load(
+                    artifacts.local_save_path, artifacts.local_save_path.parent
+                )
         return orig_chat, artifacts
     def streaming_message(self, message: Dict[str, Any]) -> None:
@@ -595,7 +598,6 @@ class OpenAIVisionAgent(VisionAgent):
         self,
         agent: Optional[LMM] = None,
         verbosity: int = 0,
-        local_artifacts_path: Optional[Union[str, Path]] = None,
         callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
         code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
     ) -> None:
@@ -605,8 +607,6 @@ class OpenAIVisionAgent(VisionAgent):
             agent (Optional[LMM]): The agent to use for conversation and orchestration
                 of other agents.
             verbosity (int): The verbosity level of the agent.
-            local_artifacts_path (Optional[Union[str, Path]]): The path to the local
-                artifacts file.
             callback_message (Optional[Callable[[Dict[str, Any]], None]]): Callback
                 function to send intermediate update messages.
             code_interpreter (Optional[Union[str, CodeInterpreter]]): For string values
@@ -619,7 +619,6 @@ class OpenAIVisionAgent(VisionAgent):
         super().__init__(
             agent,
             verbosity,
-            local_artifacts_path,
             callback_message,
             code_interpreter,
         )
@@ -630,7 +629,6 @@ class AnthropicVisionAgent(VisionAgent):
         self,
         agent: Optional[LMM] = None,
         verbosity: int = 0,
-        local_artifacts_path: Optional[Union[str, Path]] = None,
         callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
         code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
     ) -> None:
@@ -640,8 +638,6 @@ class AnthropicVisionAgent(VisionAgent):
             agent (Optional[LMM]): The agent to use for conversation and orchestration
                 of other agents.
             verbosity (int): The verbosity level of the agent.
-            local_artifacts_path (Optional[Union[str, Path]]): The path to the local
-                artifacts file.
             callback_message (Optional[Callable[[Dict[str, Any]], None]]): Callback
                 function to send intermediate update messages.
             code_interpreter (Optional[Union[str, CodeInterpreter]]): For string values
@@ -654,7 +650,6 @@ class AnthropicVisionAgent(VisionAgent):
         super().__init__(
             agent,
             verbosity,
-            local_artifacts_path,
             callback_message,
             code_interpreter,
         )

vision_agent/agent/vision_agent_coder.py CHANGED Viewed

@@ -5,7 +5,7 @@ import sys
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Sequence, Union, cast
-from redbaron import RedBaron  # type: ignore
+import libcst as cst
 from tabulate import tabulate
 import vision_agent.tools as T
@@ -49,42 +49,112 @@ WORKSPACE = Path(os.getenv("WORKSPACE", ""))
 _LOGGER = logging.getLogger(__name__)
-def strip_function_calls(code: str, exclusions: Optional[List[str]] = None) -> str:
+def strip_function_calls(  # noqa: C901
+    code: str, exclusions: Optional[List[str]] = None
+) -> str:
     """This will strip out all code that calls functions except for functions included
     in exclusions.
     """
     if exclusions is None:
         exclusions = []
-    red = RedBaron(code)
-    nodes_to_remove = []
-    for node in red:
-        if node.type == "def":
-            continue
-        elif node.type == "import" or node.type == "from_import":
-            continue
-        elif node.type == "call":
-            if node.value and node.value[0].value in exclusions:
-                continue
-            nodes_to_remove.append(node)
-        elif node.type == "atomtrailers":
-            if node[0].value in exclusions:
-                continue
-            nodes_to_remove.append(node)
-        elif node.type == "assignment":
-            if node.value.type == "call" or node.value.type == "atomtrailers":
-                func_name = node.value[0].value
-                if func_name in exclusions:
-                    continue
-                nodes_to_remove.append(node)
-        elif node.type == "endl":
-            continue
-        else:
-            nodes_to_remove.append(node)
-    for node in nodes_to_remove:
-        node.parent.remove(node)
-    cleaned_code = red.dumps().strip()
-    return cleaned_code if isinstance(cleaned_code, str) else code
+    def check_and_remove_node(node: cst.CSTNode, exclusions: List[str]) -> cst.CSTNode:
+        if hasattr(node, "value") and isinstance(node.value, cst.Call):
+            if (
+                isinstance(node.value.func, cst.Name)
+                and node.value.func.value in exclusions
+            ):
+                return node
+            return cst.RemoveFromParent()  # type: ignore
+        return node
+    class StripFunctionCallsTransformer(cst.CSTTransformer):
+        def __init__(self, exclusions: List[str]):
+            # Store exclusions to skip removing certain function calls
+            self.exclusions = exclusions
+            self.in_function_or_class = False
+        def visit_FunctionDef(self, node: cst.FunctionDef) -> Optional[bool]:
+            self.in_function_or_class = True
+            return True
+        def leave_FunctionDef(
+            self, original_node: cst.FunctionDef, updated_node: cst.FunctionDef
+        ) -> cst.BaseStatement:
+            self.in_function_or_class = False
+            return updated_node
+        def visit_ClassDef(self, node: cst.ClassDef) -> Optional[bool]:
+            self.in_function_or_class = True
+            return True
+        def leave_ClassDef(
+            self, node: cst.ClassDef, updated_node: cst.ClassDef
+        ) -> cst.BaseStatement:
+            self.in_function_or_class = False
+            return updated_node
+        def leave_Expr(
+            self, original_node: cst.Expr, updated_node: cst.Expr
+        ) -> cst.Expr:
+            if not self.in_function_or_class:
+                return cast(
+                    cst.Expr, check_and_remove_node(updated_node, self.exclusions)
+                )
+            return updated_node
+        def leave_Assign(
+            self, original_node: cst.Assign, updated_node: cst.Assign
+        ) -> cst.Assign:
+            if not self.in_function_or_class:
+                return cast(
+                    cst.Assign, check_and_remove_node(updated_node, self.exclusions)
+                )
+            return updated_node
+        def leave_If(self, original_node: cst.If, updated_node: cst.If) -> cst.If:
+            if not self.in_function_or_class:
+                return cast(
+                    cst.If, check_and_remove_node(updated_node, self.exclusions)
+                )
+            return updated_node
+        def leave_For(self, original_node: cst.For, updated_node: cst.For) -> cst.For:
+            if not self.in_function_or_class:
+                return cast(
+                    cst.For, check_and_remove_node(updated_node, self.exclusions)
+                )
+            return updated_node
+        def leave_While(
+            self, original_node: cst.While, updated_node: cst.While
+        ) -> cst.While:
+            if not self.in_function_or_class:
+                return cast(
+                    cst.While, check_and_remove_node(updated_node, self.exclusions)
+                )
+            return updated_node
+        def leave_With(
+            self, original_node: cst.With, updated_node: cst.With
+        ) -> cst.With:
+            if not self.in_function_or_class:
+                return cast(
+                    cst.With, check_and_remove_node(updated_node, self.exclusions)
+                )
+            return updated_node
+        def leave_Try(self, original_node: cst.Try, updated_node: cst.Try) -> cst.Try:
+            if not self.in_function_or_class:
+                return cast(
+                    cst.Try, check_and_remove_node(updated_node, self.exclusions)
+                )
+            return updated_node
+    tree = cst.parse_module(code)
+    transformer = StripFunctionCallsTransformer(exclusions)
+    modified_tree = tree.visit(transformer)
+    return modified_tree.code
 def write_code(

vision_agent/agent/vision_agent_prompts.py CHANGED Viewed

@@ -1,7 +1,7 @@
 VA_CODE = """
 **Role**: You are a helpful agent that assists users with writing code.
-**Taks**: As a conversational agent, you are required to understand the user's request and provide a helpful response. Use a Chain-of-Thought approach to break down the problem, create a plan, and then provide a response. Ensure that your response is clear, concise, and helpful. You can use an interactive Python (Jupyter Notebook) environment, executing code with <execution_python>. You are given access to an `artifacts` object which contains files shared between you and the user. `artifacts` will be automatically saved everytime you execute python code.
+**Taks**: As a conversational agent, you are required to understand the user's request and provide a helpful response. Use a Chain-of-Thought approach to break down the problem, create a plan, and then provide a response. Ensure that your response is clear, concise, and helpful. You can use an interactive Python (Jupyter Notebook) environment, executing code with <execute_python>. You are given access to an `artifacts` object which contains files shared between you and the user. `artifacts` will be saved only AFTER you execute python code with `save_imgae` or `save_video`. The user can see all `artifacts`.
 <execute_python>
 print("Hello World!")
@@ -26,10 +26,11 @@ Here is the current conversation so far:
 **Instructions**:
 1. **Understand and Clarify**: Make sure you understand the task, ask clarifying questions if the task is not clear.
-2. **Code Generation**: Only use code provided in the Documentation in your <execute_python> tags. Only use `edit_vision_code` to modify code written by `generate_vision_code`.
+2. **Code Generation**: Only use code provided in the Documentation in your <execute_python> tags. Only use `edit_vision_code` to modify code written by `generate_vision_code`. DO NOT run `edit_vision_code` or `edit_code_artifact` more than 2 times in a row and instead ask the user for help.
 3. **Execute**: Do only what the user asked you to do and no more. If you need to ask the user a question or show your results to the user, set <let_user_respond> to `true`.
-4. **Response**: Keep your responses short and concise. Provide the user only with the information they need to continue the conversation.
-5. **Output**: You can only respond with <thinking>, <response>, <execute_python>, and <let_user_respond> tags.
+4. **Artifacts**: Files are only saved in `artifacts` after <execute_python>, do not try to access artifacts until you observe that they are loaded.
+5. **Response**: Keep your responses short and concise. Provide the user only with the information they need to continue the conversation.
+6. **Output**: You can only respond with <thinking>, <response>, <execute_python>, and <let_user_respond> tags.
 <thinking>Your thoughts here...</thinking>
 <response>Your response to the user here...</response>
@@ -53,7 +54,7 @@ generate_vision_code(artifacts, 'dog_detector.py', 'Can you write code to detect
 <let_user_respond>false</let_user_respond>
 OBSERVATION:
-[Artifact dog_detector.py]
+[Artifact dog_detector.py (5 lines total)]
 0|from vision_agent.tools import load_image, owl_v2
 1|def detect_dogs(image_path: str):
 2|    image = load_image(image_path)
@@ -94,7 +95,7 @@ edit_vision_code(artifacts, 'dog_detector.py', ['Can you write code to detect do
 <let_user_respond>false</let_user_respond>
 OBSERVATION:
-[Artifact dog_detector.py]
+[Artifact dog_detector.py (5 lines total)]
 0|from vision_agent.tools import load_image, owl_v2
 1|def detect_dogs(image_path: str):
 2|    image = load_image(image_path)
@@ -142,7 +143,7 @@ AGENT: <thinking>The image shows a cat and a dog sitting on the couch, I will te
 EXAMPLES_CODE3 = """
-USER: Can you create a function to count workers with helmets, return the count and save a visaulization of the bounding boxes?
+USER: Can you create a function to count workers with helmets, return the count and a visaulization of the bounding boxes?
 OBSERVATION:
 [Artifacts loaded]
@@ -166,9 +167,9 @@ generate_vision_code(artifacts, 'code.py', 'Can you create a function to count w
 <let_user_respond>false</let_user_respond>
 OBSERVATION:
-[Artifact code.py]
+[Artifact code.py (17 lines total)]
 0|from vision_agent.tools import load_image, florence2_phrase_grounding, closest_box_distance, overlay_bounding_boxes, save_image
-1|def count_workers_with_helmets(image_path: str, output_path: str):
+1|def count_workers_with_helmets(image_path: str):
 2|    image = load_image(image_path)
 3|    detections = florence2_phrase_grounding("worker, helmet", image)
 4|    workers = [d for d in detections if d['label'] == 'worker']
@@ -182,15 +183,16 @@ OBSERVATION:
 11|                break
 11|        if person_has_helmet:
 13|            count += 1
-14|    overlay_bounding_boxes(image, detections)
-15|    save_image(output_path, image)
-16|    return count
+14|    viz = overlay_bounding_boxes(image, detections)
+16|    return count, viz
 [End of artifact]
-AGENT: <thinking>I have generated the code to count the workers with helmets in the image, I must now run the code and print the output.</thinking>
+AGENT: <thinking>I have generated the code to count the workers with helmets in the image, I must now run the code, print the counts and save the visualization.</thinking>
 <execute_python>
 from code import count_workers_with_helmets
-print(count_workers_with_helmets('/path/to/images/workers.png', 'workers_viz.png'))
+count, viz = count_workers_with_helmets('/path/to/images/workers.png')
+save_image(viz, 'workers_viz.png')
+print(count)
 </execute_python>
 <let_user_respond>false</let_user_respond>
@@ -201,7 +203,9 @@ OBSERVATION:
 AGENT: <thinking>Two workers with helmets are detected, I will show this to the user and ask them if the result looks good.</thinking>
 <response>The code to detect workers with helmets is saved in code.py and the visualization under 'workers_viz.png'.</response>
 <let_user_respond>true</let_user_respond>
+"""
+EXAMPLES_CODE3_EXTRA1 = """
 USER: The detections are slightly off. Can you fine tune florence2 using these labels? "[{'image_path': 'image1.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}, {'image_path': 'image2.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}]"
 AGENT: <thinking>Because the user has supplied me with labels I can call object_detection_fine_tuning on their behalf to fine tune the model</thinking>
@@ -249,3 +253,60 @@ AGENT: <thinking>Three workers with helmets are detected, I will show this to th
 <response>I have updated the code using the fine tuned florence2_phrase_grounding model to count the workers wearing helmets in code.py and saved the visualization under 'workers_viz.png'.</response>
 <let_user_respond>true</let_user_respond>
 """
+EXAMPLES_CODE3_EXTRA2 = """
+USER: Can you write a streamlit app that allows me to upload an image and count the workers with helmets?
+AGENT: <thinking>I will use edit_code_artifact to generate a streamlit app that allows the user to upload an image and count the workers with helmets.</thinking>
+<execute_python>
+CODE = '''
+import streamlit as st
+from code imoprt count_workers_with_helmets
+import os
+def main():
+    st.title("Worker with Helmet Counter")
+    uploaded_file = st.file_uploader("Choose image...", type=["jpeg", "jpg", "png"])
+    if uploaded_file is not None:
+        with open("temp_image.jpg", "wb") as f:
+            f.write(uploaded_file.read())
+        count = count_workers_with_helmets("temp_image.jpg", "temp_image_viz.jpg")
+        st.image("temp_image_viz.jpg")
+        st.write(f"Number of workers with helmets: {count}")
+        os.remove("temp_image.jpg")
+        os.remove("temp_image_viz.jpg")
+if __name__ == "__main__":
+    main()
+'''
+edit_code_artifact(artifacts, 'streamlit_app.py', CODE, 0, 0)
+</execute_python>
+OBSERVATION:
+[Artifact streamlit_app.py (19 lines total)]
+0|import streamlit as st
+1|from code imoprt count_workers_with_helmets
+2|import os
+3|
+4|def main():
+5|    st.title("Worker with Helmet Counter")
+6|    uploaded_file = st.file_uploader("Choose image...", type=["jpeg", "jpg", "png"])
+7|    if uploaded_file is not None:
+8|        with open("temp_image.jpg", "wb") as f:
+9|            f.write(uploaded_file.read())
+10|
+11|        count = count_workers_with_helmets("temp_image.jpg", "temp_image_viz.jpg")
+12|        st.image("temp_image_viz.jpg")
+13|        st.write(f"Number of workers with helmets: {count}")
+14|        os.remove("temp_image.jpg")
+15|        os.remove("temp_image_viz.jpg")
+16|
+17|if __name__ == "__main__":
+18|    main()
+[End of artifact]
+AGENT: <thinking>I have generated the streamlit app that allows the user to upload an image and count the workers</thinking>
+<response>The streamlit app is saved in streamlit_app.py, you can run the app by running `streamlit run streamlit_app.py`.</response>
+<let_user_respond>true</let_user_respond>
+"""

vision_agent/tools/meta_tools.py CHANGED Viewed

@@ -6,15 +6,13 @@ import re
 import subprocess
 import tempfile
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
+import libcst as cst
 from IPython.display import display
-from redbaron import RedBaron  # type: ignore
 import vision_agent as va
-from vision_agent.agent.agent_utils import extract_json
 from vision_agent.clients.landing_public_api import LandingPublicAPI
-from vision_agent.lmm import AnthropicLMM
 from vision_agent.lmm.types import Message
 from vision_agent.tools.tool_utils import get_tool_documentation
 from vision_agent.tools.tools import TOOL_DESCRIPTIONS
@@ -26,7 +24,6 @@ CURRENT_FILE = None
 CURRENT_LINE = 0
 DEFAULT_WINDOW_SIZE = 100
 ZMQ_PORT = os.environ.get("ZMQ_PORT", None)
-VERBOSITY = os.environ.get("VERBOSITY", 0)
 def report_progress_callback(port: int, inp: Dict[str, Any]) -> None:
@@ -38,16 +35,6 @@ def report_progress_callback(port: int, inp: Dict[str, Any]) -> None:
     socket.send_json(inp)
-def filter_file(file_name: Union[str, Path]) -> bool:
-    file_name_p = Path(file_name)
-    return (
-        file_name_p.is_file()
-        and "__pycache__" not in str(file_name_p)
-        and file_name_p.suffix in [".py", ".txt"]
-        and not file_name_p.name.startswith(".")
-    )
 def redisplay_results(execution: Execution) -> None:
     """This function is used to add previous execution results to the current output.
     This is handy if you are inside a notebook environment, call it notebook1, and you
@@ -86,8 +73,11 @@ class Artifacts:
     need to be in sync with the remote environment the VisionAgent is running in.
     """
-    def __init__(self, remote_save_path: Union[str, Path]) -> None:
+    def __init__(
+        self, remote_save_path: Union[str, Path], local_save_path: Union[str, Path]
+    ) -> None:
         self.remote_save_path = Path(remote_save_path)
+        self.local_save_path = Path(local_save_path)
         self.artifacts: Dict[str, Any] = {}
         self.code_sandbox_runtime = None
@@ -131,9 +121,7 @@ class Artifacts:
         return output_str
     def save(self, local_path: Optional[Union[str, Path]] = None) -> None:
-        save_path = (
-            Path(local_path) if local_path is not None else self.remote_save_path
-        )
+        save_path = Path(local_path) if local_path is not None else self.local_save_path
         with open(save_path, "wb") as f:
             pkl.dump(self.artifacts, f)
@@ -150,6 +138,38 @@ class Artifacts:
         return name in self.artifacts
+def filter_file(file_name: Union[str, Path]) -> Tuple[bool, bool]:
+    file_name_p = Path(file_name)
+    return (
+        file_name_p.is_file()
+        and "__pycache__" not in str(file_name_p)
+        and not file_name_p.name.startswith(".")
+        and file_name_p.suffix
+        in [".png", ".jpeg", ".jpg", ".mp4", ".txt", ".json", ".csv"]
+    ), file_name_p.suffix in [".png", ".jpeg", ".jpg", ".mp4"]
+def capture_files_into_artifacts(artifacts: Artifacts) -> None:
+    """This function is used to capture all files in the current directory into an
+    artifact object. This is useful if you want to capture all files in the current
+    directory and use them in a different environment where you don't have access to
+    the file system.
+    Parameters:
+        artifact (Artifacts): The artifact object to save the files to.
+    """
+    for file in Path(".").glob("**/*"):
+        usable_file, is_media = filter_file(file)
+        mode = "rb" if is_media else "r"
+        if usable_file:
+            file_name = file.name
+            if file_name.startswith(str(Path(artifacts.remote_save_path).parents)):
+                idx = len(Path(artifacts.remote_save_path).parents)
+                file_name = file_name[idx:]
+            with open(file, mode) as f:
+                artifacts[file_name] = f.read()
 # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
@@ -174,9 +194,9 @@ def view_lines(
         f"[Artifact: {name} ({total_lines} lines total)]\n"
         + format_lines(lines[start:end], start)
         + (
-            "[End of artifact]"
+            "\n[End of artifact]"
             if end == len(lines)
-            else f"[{len(lines) - end} more lines]"
+            else f"\n[{len(lines) - end} more lines]"
         )
     )
@@ -256,8 +276,10 @@ def edit_code_artifact(
     Parameters:
         artifacts (Artifacts): The artifacts object to edit the artifact from.
         name (str): The name of the artifact to edit.
-        start (int): The line number to start the edit.
-        end (int): The line number to end the edit.
+        start (int): The line number to start the edit, can be in [-1, total_lines]
+            where -1 represents the end of the file.
+        end (int): The line number to end the edit, can be in [-1, total_lines] where
+            -1 represents the end of the file.
         content (str): The content to insert.
     """
     # just make the artifact if it doesn't exist instead of forcing agent to call
@@ -266,17 +288,21 @@ def edit_code_artifact(
         artifacts[name] = ""
     total_lines = len(artifacts[name].splitlines())
+    if start == -1:
+        start = total_lines
+    if end == -1:
+        end = total_lines
     if start < 0 or end < 0 or start > end or end > total_lines:
         print("[Invalid line range]")
         return "[Invalid line range]"
-    if start == end:
-        end += 1
     new_content_lines = content.splitlines(keepends=True)
     new_content_lines = [
         line if line.endswith("\n") else line + "\n" for line in new_content_lines
     ]
     lines = artifacts[name].splitlines(keepends=True)
+    lines = [line if line.endswith("\n") else line + "\n" for line in lines]
     edited_lines = lines[:start] + new_content_lines + lines[end:]
     cur_line = start + len(content.split("\n")) // 2
@@ -371,14 +397,16 @@ def generate_vision_plan(
         [End Plan Context]
     """
+    # verbosity is set to 0 to avoid adding extra content to the VisionAgent conversation
     if ZMQ_PORT is not None:
         agent = va.agent.VisionAgentPlanner(
             report_progress_callback=lambda inp: report_progress_callback(
                 int(ZMQ_PORT), inp
-            )
+            ),
+            verbosity=0,
         )
     else:
-        agent = va.agent.VisionAgentPlanner()
+        agent = va.agent.VisionAgentPlanner(verbosity=0)
     fixed_chat: List[Message] = [{"role": "user", "content": chat, "media": media}]
     response = agent.generate_plan(
@@ -435,14 +463,16 @@ def generate_vision_code(
             dogs = owl_v2("dog", image)
             return dogs
     """
+    # verbosity is set to 0 to avoid adding extra content to the VisionAgent conversation
     if ZMQ_PORT is not None:
         agent = va.agent.VisionAgentCoder(
             report_progress_callback=lambda inp: report_progress_callback(
                 int(ZMQ_PORT), inp
-            )
+            ),
+            verbosity=0,
         )
     else:
-        agent = va.agent.VisionAgentCoder(verbosity=int(VERBOSITY))
+        agent = va.agent.VisionAgentCoder(verbosity=0)
     fixed_chat: List[Message] = [{"role": "user", "content": chat, "media": media}]
     response = agent.generate_code(
@@ -506,7 +536,8 @@ def edit_vision_code(
             return dogs
     """
-    agent = va.agent.VisionAgentCoder(verbosity=int(VERBOSITY))
+    # verbosity is set to 0 to avoid adding extra content to the VisionAgent conversation
+    agent = va.agent.VisionAgentCoder(verbosity=0)
     if name not in artifacts:
         print(f"[Artifact {name} does not exist]")
         return f"[Artifact {name} does not exist]"
@@ -570,8 +601,9 @@ def check_and_load_image(code: str) -> List[str]:
 def view_media_artifact(artifacts: Artifacts, name: str) -> str:
-    """Allows you to view the media artifact with the given name. This does not show
-    the media to the user, the user can already see all media saved in the artifacts.
+    """Allows only the agent to view the media artifact with the given name. DO NOT use
+    this to show media to the user, the user can already see all media saved in the
+    artifacts.
     Parameters:
         artifacts (Artifacts): The artifacts object to show the image from.
@@ -648,10 +680,10 @@ def get_diff_with_prompts(name: str, before: str, after: str) -> str:
 def use_extra_vision_agent_args(
-    code: str,
+    code: Optional[str],
     test_multi_plan: bool = True,
     custom_tool_names: Optional[List[str]] = None,
-) -> str:
+) -> Optional[str]:
     """This is for forcing arguments passed by the user to VisionAgent into the
     VisionAgentCoder call.
@@ -663,21 +695,81 @@ def use_extra_vision_agent_args(
     Returns:
         str: The edited code.
     """
-    red = RedBaron(code)
-    for node in red:
-        # seems to always be atomtrailers not call type
-        if node.type == "atomtrailers":
-            if node.name.value == "generate_vision_code":
-                node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
-            if (
-                node.name.value == "generate_vision_code"
-                or node.name.value == "edit_vision_code"
-            ):
-                if custom_tool_names is not None:
-                    node.value[1].value.append(f"custom_tool_names={custom_tool_names}")
-    cleaned_code = red.dumps().strip()
-    return cleaned_code if isinstance(cleaned_code, str) else code
+    if code is None:
+        return None
+    class VisionAgentTransformer(cst.CSTTransformer):
+        def __init__(
+            self, test_multi_plan: bool, custom_tool_names: Optional[List[str]]
+        ):
+            self.test_multi_plan = test_multi_plan
+            self.custom_tool_names = custom_tool_names
+        def leave_Call(
+            self, original_node: cst.Call, updated_node: cst.Call
+        ) -> cst.Call:
+            # Check if the function being called is generate_vision_code or edit_vision_code
+            if isinstance(updated_node.func, cst.Name) and updated_node.func.value in [
+                "generate_vision_code",
+                "edit_vision_code",
+            ]:
+                # Add test_multi_plan argument to generate_vision_code calls
+                if updated_node.func.value == "generate_vision_code":
+                    new_arg = cst.Arg(
+                        keyword=cst.Name("test_multi_plan"),
+                        value=cst.Name(str(self.test_multi_plan)),
+                        equal=cst.AssignEqual(
+                            whitespace_before=cst.SimpleWhitespace(""),
+                            whitespace_after=cst.SimpleWhitespace(""),
+                        ),
+                    )
+                    updated_node = updated_node.with_changes(
+                        args=[*updated_node.args, new_arg]
+                    )
+                # Add custom_tool_names if provided
+                if self.custom_tool_names is not None:
+                    list_arg = []
+                    for i, tool_name in enumerate(self.custom_tool_names):
+                        if i < len(self.custom_tool_names) - 1:
+                            list_arg.append(
+                                cst._nodes.expression.Element(
+                                    value=cst.SimpleString(value=f'"{tool_name}"'),
+                                    comma=cst.Comma(
+                                        whitespace_before=cst.SimpleWhitespace(""),
+                                        whitespace_after=cst.SimpleWhitespace(" "),
+                                    ),
+                                )
+                            )
+                        else:
+                            list_arg.append(
+                                cst._nodes.expression.Element(
+                                    value=cst.SimpleString(value=f'"{tool_name}"'),
+                                )
+                            )
+                    new_arg = cst.Arg(
+                        keyword=cst.Name("custom_tool_names"),
+                        value=cst.List(list_arg),
+                        equal=cst.AssignEqual(
+                            whitespace_before=cst.SimpleWhitespace(""),
+                            whitespace_after=cst.SimpleWhitespace(""),
+                        ),
+                    )
+                    updated_node = updated_node.with_changes(
+                        args=[*updated_node.args, new_arg]
+                    )
+            return updated_node
+    # Parse the input code into a CST node
+    tree = cst.parse_module(code)
+    # Apply the transformer to modify the CST
+    transformer = VisionAgentTransformer(test_multi_plan, custom_tool_names)
+    modified_tree = tree.visit(transformer)
+    # Return the modified code as a string
+    return modified_tree.code
 def use_object_detection_fine_tuning(
@@ -757,74 +849,12 @@ def use_object_detection_fine_tuning(
     return diff
-def extract_and_save_files_to_artifacts(
-    artifacts: Artifacts, code: str, obs: str
-) -> None:
-    """Extracts and saves files used in the code to the artifacts object.
-    Parameters:
-        artifacts (Artifacts): The artifacts object to save the files to.
-        code (str): The code to extract the files from.
-    """
-    try:
-        response = extract_json(
-            AnthropicLMM()(  # type: ignore
-                f"""You are a helpful AI assistant. Your job is to look at a snippet of code and the output of running that code and return the file paths that are being saved in the file. Below is the code snippet:
-```python
-{code}
-```
-```output
-{obs}
-```
-Return the file paths in the following JSON format:
-{{"file_paths": ["/path/to/image1.jpg", "/other/path/to/data.json"]}}"""
-            )
-        )
-    except json.JSONDecodeError:
-        return
-    text_file_ext = [
-        ".txt",
-        ".md",
-        "rtf",
-        ".html",
-        ".htm",
-        "xml",
-        ".json",
-        ".csv",
-        ".tsv",
-        ".yaml",
-        ".yml",
-        ".toml",
-        ".conf",
-        ".env" ".ini",
-        ".log",
-        ".py",
-        ".java",
-        ".js",
-        ".cpp",
-        ".c" ".sql",
-        ".sh",
-    ]
-    if "file_paths" in response and isinstance(response["file_paths"], list):
-        for file_path in response["file_paths"]:
-            read_mode = "r" if Path(file_path).suffix in text_file_ext else "rb"
-            if Path(file_path).is_file():
-                with open(file_path, read_mode) as f:
-                    artifacts[Path(file_path).name] = f.read()
 META_TOOL_DOCSTRING = get_tool_documentation(
     [
         get_tool_descriptions,
         open_code_artifact,
         create_code_artifact,
         edit_code_artifact,
-        generate_vision_plan,
         generate_vision_code,
         edit_vision_code,
         view_media_artifact,

vision_agent/utils/execute.py CHANGED Viewed

@@ -575,6 +575,7 @@ class LocalCodeInterpreter(CodeInterpreter):
         super().__init__(timeout=timeout)
         self.nb = nbformat.v4.new_notebook()
         # Set the notebook execution path to the remote path
+        self.remote_path = Path(remote_path if remote_path is not None else WORKSPACE)
         self.resources = {"metadata": {"path": str(self.remote_path)}}
         self.nb_client = NotebookClient(
             self.nb,
@@ -591,7 +592,6 @@ Timeout: {self.timeout}"""
         )
         sleep(1)
         self._new_kernel()
-        self.remote_path = Path(remote_path if remote_path is not None else WORKSPACE)
     def _new_kernel(self) -> None:
         if self.nb_client.kc is None or not run_sync(self.nb_client.kc.is_alive)():  # type: ignore
@@ -659,7 +659,7 @@ Timeout: {self.timeout}"""
     def download_file(
         self, remote_file_path: Union[str, Path], local_file_path: Union[str, Path]
     ) -> Path:
-        with open(self.remote_path / remote_file_path, "rb") as f:
+        with open(self.remote_path / Path(remote_file_path).name, "rb") as f:
             contents = f.read()
         with open(local_file_path, "wb") as f:
             f.write(contents)

{vision_agent-0.2.173.dist-info → vision_agent-0.2.175.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.173
+Version: 0.2.175
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai
@@ -16,6 +16,7 @@ Requires-Dist: e2b-code-interpreter (==0.0.11a37)
 Requires-Dist: flake8 (>=7.0.0,<8.0.0)
 Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
 Requires-Dist: langsmith (>=0.1.58,<0.2.0)
+Requires-Dist: libcst (>=1.5.0,<2.0.0)
 Requires-Dist: nbclient (>=0.10.0,<0.11.0)
 Requires-Dist: nbformat (>=5.10.4,<6.0.0)
 Requires-Dist: numpy (>=1.21.0,<2.0.0)
@@ -27,7 +28,6 @@ Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)
 Requires-Dist: pydantic (==2.7.4)
 Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
 Requires-Dist: pytube (==15.0.0)
-Requires-Dist: redbaron (>=0.9.2,<0.10.0)
 Requires-Dist: requests (>=2.0.0,<3.0.0)
 Requires-Dist: rich (>=13.7.1,<14.0.0)
 Requires-Dist: scipy (>=1.13.0,<1.14.0)

{vision_agent-0.2.173.dist-info → vision_agent-0.2.175.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
 vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
 vision_agent/agent/__init__.py,sha256=RRMPhH8mgm_pCtEKiVFSjJyDi4lCr4F7k05AhK01xlM,436
 vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
-vision_agent/agent/agent_utils.py,sha256=eSgg8CwWylX_erLTqTg2pVhEEgVkMLRrQfYRyJzI3so,5443
-vision_agent/agent/vision_agent.py,sha256=lEda43d-Ri68FIjmn-MPIgLs8_jMpyDVXslusQedhWA,26222
-vision_agent/agent/vision_agent_coder.py,sha256=aVkl0b9LKvy-auuHGYSag-ixYnue0iRQqD1PYLPBR-s,29312
+vision_agent/agent/agent_utils.py,sha256=WYJF11PfKXlRMPnogGz3s7c2TlWoxoGzuLiIptVYE1s,5524
+vision_agent/agent/vision_agent.py,sha256=YfSYp9UeML-f67mn1TvjwyXDB_CxaaG_4mzNC1py5xU,25882
+vision_agent/agent/vision_agent_coder.py,sha256=3Q1VWrN-BNUoSD4OAqKazvXkP2c04PXDYu2Z1f5dQb0,31960
 vision_agent/agent/vision_agent_coder_prompts.py,sha256=gPLVXQMNSzYnQYpNm0wlH_5FPkOTaFDV24bqzK3jQ40,12221
 vision_agent/agent/vision_agent_planner.py,sha256=mjmnXG9CvYf_ZA7ZJ3ri4H-2U_Km55gF1sZYRSOlxpY,19027
 vision_agent/agent/vision_agent_planner_prompts.py,sha256=JDARUzko2HZdxkBtcy6wuP9DCCmbqhK_gnVgrjr6l1k,6691
-vision_agent/agent/vision_agent_prompts.py,sha256=_xAITNDKcS45tqhEax5i6vDQa4V39f9n55iRGk2R6RM,11218
+vision_agent/agent/vision_agent_prompts.py,sha256=4329ll0kqCznRALIMl-rlKWGjN92p3bcRrz8R-cO744,13748
 vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
 vision_agent/clients/landing_public_api.py,sha256=lU2ev6E8NICmR8DMUljuGcVFy5VNJQ4WQkWC8WnnJEc,1503
@@ -17,19 +17,19 @@ vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,
 vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
 vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
 vision_agent/tools/__init__.py,sha256=u-vS5iORB4ccvxoAjbtpvhTALDhXGilcATIq1_eZhKo,2332
-vision_agent/tools/meta_tools.py,sha256=zcqp3POpGcS4cfceMih0AD1c6MwS_PFLLA6EjTXhonM,28013
+vision_agent/tools/meta_tools.py,sha256=v6e4pnWDIO46ZTWuk-1FkMszfmz2pj-N5wRP8_0WelM,30648
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
 vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
 vision_agent/tools/tools.py,sha256=iKsBZxJ5--xWK-mqgZ1jbX_bfGS5HmAp-VRZ69m9yPg,77921
 vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
 vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
 vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
-vision_agent/utils/execute.py,sha256=FqSOr5gtBeKB1g2hbV6-bhox6qItDQNn2o9efq1w6f4,28017
+vision_agent/utils/execute.py,sha256=2sIQn45llOENMyrKu3TPINVRLLbOvvZ6SVHFCB9MQUo,28028
 vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwdn6sk,11303
 vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
 vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
 vision_agent/utils/video.py,sha256=fOPR48-SuwMbE5eB5rc2F7lVo6k1mVHn26eEJ0QCslc,5602
-vision_agent-0.2.173.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.173.dist-info/METADATA,sha256=xSZ3aKwFKG81BoBIHXLKN2CSqGFmmzMnalcbLPYrV7w,18342
-vision_agent-0.2.173.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.173.dist-info/RECORD,,
+vision_agent-0.2.175.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.175.dist-info/METADATA,sha256=4qXZ_bRfFYb5fgTP4XcRG4bH9IcVhqE2akIQObcpMSo,18339
+vision_agent-0.2.175.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.175.dist-info/RECORD,,

{vision_agent-0.2.173.dist-info → vision_agent-0.2.175.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.173.dist-info → vision_agent-0.2.175.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.173__py3-none-any.whl → 0.2.175__py3-none-any.whl

vision-agent 0.2.173py3-none-any.whl → 0.2.175py3-none-any.whl