PyPI - vision-agent - Versions diffs - 1.0.5__py3-none-any.whl → 1.0.8__py3-none-any.whl - Mend

vision-agent 1.0.5py3-none-any.whl → 1.0.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

vision_agent/agent/__init__.py +0 -16
vision_agent/tools/__init__.py +0 -6
vision_agent/tools/meta_tools.py +1 -492
vision_agent/utils/tools.py +3 -1
vision_agent-1.0.8.dist-info/METADATA +259 -0
{vision_agent-1.0.5.dist-info → vision_agent-1.0.8.dist-info}/RECORD +8 -14
{vision_agent-1.0.5.dist-info → vision_agent-1.0.8.dist-info}/WHEEL +1 -1
vision_agent/agent/vision_agent.py +0 -605
vision_agent/agent/vision_agent_coder.py +0 -742
vision_agent/agent/vision_agent_coder_prompts.py +0 -290
vision_agent/agent/vision_agent_planner.py +0 -564
vision_agent/agent/vision_agent_planner_prompts.py +0 -199
vision_agent/agent/vision_agent_prompts.py +0 -312
vision_agent-1.0.5.dist-info/METADATA +0 -179
{vision_agent-1.0.5.dist-info → vision_agent-1.0.8.dist-info}/LICENSE +0 -0

vision_agent/agent/__init__.py CHANGED Viewed

@@ -1,20 +1,4 @@
 from .agent import Agent, AgentCoder, AgentPlanner
-from .vision_agent import VisionAgent
-from .vision_agent_coder import (
-    AnthropicVisionAgentCoder,
-    AzureVisionAgentCoder,
-    OllamaVisionAgentCoder,
-    OpenAIVisionAgentCoder,
-    VisionAgentCoder,
-)
 from .vision_agent_coder_v2 import VisionAgentCoderV2
-from .vision_agent_planner import (
-    AnthropicVisionAgentPlanner,
-    AzureVisionAgentPlanner,
-    OllamaVisionAgentPlanner,
-    OpenAIVisionAgentPlanner,
-    PlanContext,
-    VisionAgentPlanner,
-)
 from .vision_agent_planner_v2 import VisionAgentPlannerV2
 from .vision_agent_v2 import VisionAgentV2

vision_agent/tools/__init__.py CHANGED Viewed

@@ -1,13 +1,7 @@
 from typing import Callable, List, Optional
 from .meta_tools import (
-    create_code_artifact,
-    edit_code_artifact,
-    edit_vision_code,
-    generate_vision_code,
     get_tool_descriptions,
-    list_artifacts,
-    open_code_artifact,
     view_media_artifact,
 )
 from .planner_tools import judge_od_results

vision_agent/tools/meta_tools.py CHANGED Viewed

@@ -1,17 +1,11 @@
 import difflib
-import json
 import os
 import re
-import subprocess
-import tempfile
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Tuple, Union
-import libcst as cst
 from IPython.display import display
-import vision_agent as va
-from vision_agent.models import Message
 from vision_agent.tools.tools import get_tools_descriptions as _get_tool_descriptions
 from vision_agent.utils.execute import Execution, MimeType
 from vision_agent.utils.tools_doc import get_tool_documentation
@@ -152,392 +146,6 @@ def view_lines(
     return return_str
-def open_code_artifact(
-    artifacts: Artifacts, name: str, line_num: int = 0, window_size: int = 100
-) -> str:
-    """Opens the provided code artifact. If `line_num` is provided, the window will be
-    moved to include that line. It only shows the first 100 lines by default! Max
-    `window_size` supported is 2000.
-    Parameters:
-        artifacts (Artifacts): The artifacts object to open the artifact from.
-        name (str): The name of the artifact to open.
-        line_num (int): The line number to move the window to.
-        window_size (int): The number of lines to show above and below the line.
-    """
-    if name not in artifacts:
-        return f"[Artifact {name} does not exist]"
-    total_lines = len(artifacts[name].splitlines())
-    window_size = min(window_size, 2000)
-    window_size = window_size // 2
-    if line_num - window_size < 0:
-        line_num = window_size
-    elif line_num >= total_lines:
-        line_num = total_lines - 1 - window_size
-    lines = artifacts[name].splitlines(keepends=True)
-    return view_lines(lines, line_num, window_size, name, total_lines)
-def create_code_artifact(artifacts: Artifacts, name: str) -> str:
-    """Creates a new code artifiact with the given name.
-    Parameters:
-        artifacts (Artifacts): The artifacts object to add the new artifact to.
-        name (str): The name of the new artifact.
-    """
-    if name in artifacts:
-        return_str = f"[Artifact {name} already exists]"
-    else:
-        artifacts[name] = ""
-        return_str = f"[Artifact {name} created]"
-    print(return_str)
-    display(
-        {
-            MimeType.APPLICATION_ARTIFACT: json.dumps(
-                {
-                    "name": name,
-                    "content": artifacts[name],
-                    "action": "create",
-                }
-            )
-        },
-        raw=True,
-    )
-    return return_str
-def edit_code_artifact(
-    artifacts: Artifacts, name: str, start: int, end: int, content: str
-) -> str:
-    """Edits the given code artifact with the provided content. The content will be
-    inserted between the `start` and `end` line numbers. If the `start` and `end` are
-    the same, the content will be inserted at the `start` line number. If the `end` is
-    greater than the total number of lines in the file, the content will be inserted at
-    the end of the file. If the `start` or `end` are negative, the function will return
-    an error message.
-    Parameters:
-        artifacts (Artifacts): The artifacts object to edit the artifact from.
-        name (str): The name of the artifact to edit.
-        start (int): The line number to start the edit, can be in [-1, total_lines]
-            where -1 represents the end of the file.
-        end (int): The line number to end the edit, can be in [-1, total_lines] where
-            -1 represents the end of the file.
-        content (str): The content to insert.
-    """
-    # just make the artifact if it doesn't exist instead of forcing agent to call
-    # create_artifact
-    if name not in artifacts:
-        artifacts[name] = ""
-    total_lines = len(artifacts[name].splitlines())
-    if start == -1:
-        start = total_lines
-    if end == -1:
-        end = total_lines
-    if start < 0 or end < 0 or start > end or end > total_lines:
-        print("[Invalid line range]")
-        return "[Invalid line range]"
-    new_content_lines = content.splitlines(keepends=True)
-    new_content_lines = [
-        line if line.endswith("\n") else line + "\n" for line in new_content_lines
-    ]
-    lines = artifacts[name].splitlines(keepends=True)
-    lines = [line if line.endswith("\n") else line + "\n" for line in lines]
-    edited_lines = lines[:start] + new_content_lines + lines[end:]
-    cur_line = start + len(content.split("\n")) // 2
-    with tempfile.NamedTemporaryFile(delete=True) as f:
-        with open(f.name, "w") as f:  # type: ignore
-            f.writelines(edited_lines)
-        process = subprocess.Popen(
-            [
-                "flake8",
-                "--isolated",
-                "--select=F821,F822,F831,E111,E112,E113,E999,E902",
-                f.name,
-            ],
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-        )
-        stdout, _ = process.communicate()
-        if stdout != "":
-            stdout = stdout.replace(f.name, name)
-            error_msg = "[Edit failed with the following status]\n" + stdout
-            original_view = view_lines(
-                lines,
-                start + ((end - start) // 2),
-                DEFAULT_WINDOW_SIZE,
-                name,
-                total_lines,
-                print_output=False,
-            )
-            total_lines_edit = sum(1 for _ in edited_lines)
-            edited_view = view_lines(
-                edited_lines,
-                cur_line,
-                DEFAULT_WINDOW_SIZE,
-                name,
-                total_lines_edit,
-                print_output=False,
-            )
-            error_msg += f"\n[This is how your edit would have looked like if applied]\n{edited_view}\n\n[This is the original code before your edit]\n{original_view}"
-            print(error_msg)
-            return error_msg
-    artifacts[name] = "".join(edited_lines)
-    display(
-        {
-            MimeType.APPLICATION_ARTIFACT: json.dumps(
-                {
-                    "name": name,
-                    "content": artifacts[name],
-                    "action": "edit",
-                }
-            )
-        },
-        raw=True,
-    )
-    return open_code_artifact(artifacts, name, cur_line)
-def generate_vision_plan(
-    artifacts: Artifacts,
-    name: str,
-    chat: str,
-    media: List[str],
-    test_multi_plan: bool = True,
-    custom_tool_names: Optional[List[str]] = None,
-) -> str:
-    """Generates a plan to solve vision based tasks.
-    Parameters:
-        artifacts (Artifacts): The artifacts object to save the plan to.
-        name (str): The name of the artifact to save the plan context to.
-        chat (str): The chat message from the user.
-        media (List[str]): The media files to use.
-        test_multi_plan (bool): Do not change this parameter.
-        custom_tool_names (Optional[List[str]]): Do not change this parameter.
-    Returns:
-        str: The generated plan.
-    Examples
-    --------
-        >>> generate_vision_plan(artifacts, "plan.json", "Can you detect the dogs in this image?", ["image.jpg"])
-        [Start Plan Context]
-        plan1: This is a plan to detect dogs in an image
-        -load image
-        -detect dogs
-        -return detections
-        [End Plan Context]
-    """
-    # verbosity is set to 0 to avoid adding extra content to the VisionAgent conversation
-    if ZMQ_PORT is not None:
-        agent = va.agent.VisionAgentPlanner(
-            report_progress_callback=lambda inp: report_progress_callback(
-                int(ZMQ_PORT), inp
-            ),
-            verbosity=0,
-        )
-    else:
-        agent = va.agent.VisionAgentPlanner(verbosity=0)
-    fixed_chat: List[Message] = [{"role": "user", "content": chat, "media": media}]
-    response = agent.generate_plan(
-        fixed_chat,
-        test_multi_plan=test_multi_plan,
-        custom_tool_names=custom_tool_names,
-    )
-    if response.test_results is not None:
-        redisplay_results(response.test_results)
-    response.test_results = None
-    artifacts[name] = response.model_dump_json()
-    output_str = f"[Start Plan Context, saved at {name}]"
-    for plan in response.plans.keys():
-        output_str += f"\n{plan}: {response.plans[plan]['thoughts'].strip()}\n"  # type: ignore
-        output_str += "    -" + "\n    -".join(
-            e.strip() for e in response.plans[plan]["instructions"]
-        )
-    output_str += f"\nbest plan: {response.best_plan}\n"
-    output_str += "thoughts: " + response.plan_thoughts.strip() + "\n"
-    output_str += "[End Plan Context]"
-    print(output_str)
-    return output_str
-def generate_vision_code(
-    artifacts: Artifacts,
-    name: str,
-    chat: str,
-    media: List[str],
-    test_multi_plan: bool = True,
-    custom_tool_names: Optional[List[str]] = None,
-) -> str:
-    """Generates python code to solve vision based tasks.
-    Parameters:
-        artifacts (Artifacts): The artifacts object to save the code to.
-        name (str): The name of the artifact to save the code to.
-        chat (str): The chat message from the user.
-        media (List[str]): The media files to use.
-        test_multi_plan (bool): Do not change this parameter.
-        custom_tool_names (Optional[List[str]]): Do not change this parameter.
-    Returns:
-        str: The generated code.
-    Examples
-    --------
-        >>> generate_vision_code(artifacts, "code.py", "Can you detect the dogs in this image?", ["image.jpg"])
-        from vision_agent.tools import load_image, owl_v2
-        def detect_dogs(image_path: str):
-            image = load_image(image_path)
-            dogs = owl_v2("dog", image)
-            return dogs
-    """
-    # verbosity is set to 0 to avoid adding extra content to the VisionAgent conversation
-    if ZMQ_PORT is not None:
-        agent = va.agent.VisionAgentCoder(
-            report_progress_callback=lambda inp: report_progress_callback(
-                int(ZMQ_PORT), inp
-            ),
-            verbosity=0,
-        )
-    else:
-        agent = va.agent.VisionAgentCoder(verbosity=0)
-    fixed_chat: List[Message] = [{"role": "user", "content": chat, "media": media}]
-    response = agent.generate_code(
-        fixed_chat,
-        test_multi_plan=test_multi_plan,
-        custom_tool_names=custom_tool_names,
-    )
-    redisplay_results(response["test_result"])
-    code = response["code"]
-    artifacts[name] = code
-    code_lines = code.splitlines(keepends=True)
-    total_lines = len(code_lines)
-    display(
-        {
-            MimeType.APPLICATION_ARTIFACT: json.dumps(
-                {
-                    "name": name,
-                    "content": code,
-                    "contentType": "vision_code",
-                    "action": "create",
-                }
-            )
-        },
-        raw=True,
-    )
-    return view_lines(code_lines, 0, total_lines, name, total_lines)
-def edit_vision_code(
-    artifacts: Artifacts,
-    name: str,
-    chat_history: List[str],
-    media: List[str],
-    custom_tool_names: Optional[List[str]] = None,
-) -> str:
-    """Edits python code to solve a vision based task.
-    Parameters:
-        artifacts (Artifacts): The artifacts object to save the code to.
-        name (str): The file path to the code.
-        chat_history (List[str]): The chat history to used to generate the code.
-        custom_tool_names (Optional[List[str]]): Do not change this parameter.
-    Returns:
-        str: The edited code.
-    Examples
-    --------
-        >>> edit_vision_code(
-        >>>     artifacts,
-        >>>     "code.py",
-        >>>     ["Can you detect the dogs in this image?", "Can you use a higher threshold?"],
-        >>>     ["dog.jpg"],
-        >>> )
-        from vision_agent.tools import load_image, owl_v2
-        def detect_dogs(image_path: str):
-            image = load_image(image_path)
-            dogs = owl_v2("dog", image, threshold=0.8)
-            return dogs
-    """
-    # verbosity is set to 0 to avoid adding extra content to the VisionAgent conversation
-    agent = va.agent.VisionAgentCoder(verbosity=0)
-    if name not in artifacts:
-        print(f"[Artifact {name} does not exist]")
-        return f"[Artifact {name} does not exist]"
-    code = artifacts[name]
-    # Append latest code to second to last message from assistant
-    fixed_chat_history: List[Message] = []
-    user_message = "Previous user requests:"
-    for i, chat in enumerate(chat_history):
-        if i < len(chat_history) - 1:
-            user_message += " " + chat
-        else:
-            fixed_chat_history.append(
-                {"role": "user", "content": user_message, "media": media}
-            )
-            fixed_chat_history.append({"role": "assistant", "content": code})
-            fixed_chat_history.append({"role": "user", "content": chat})
-    response = agent.generate_code(
-        fixed_chat_history,
-        test_multi_plan=False,
-        custom_tool_names=custom_tool_names,
-    )
-    redisplay_results(response["test_result"])
-    code = response["code"]
-    artifacts[name] = code
-    code_lines = code.splitlines(keepends=True)
-    total_lines = len(code_lines)
-    display(
-        {
-            MimeType.APPLICATION_ARTIFACT: json.dumps(
-                {
-                    "name": name,
-                    "content": code,
-                    "action": "edit",
-                }
-            )
-        },
-        raw=True,
-    )
-    return view_lines(code_lines, 0, total_lines, name, total_lines)
-def list_artifacts(artifacts: Artifacts) -> str:
-    """Lists all the artifacts that have been loaded into the artifacts object."""
-    output_str = artifacts.show()
-    print(output_str)
-    return output_str
 def check_and_load_image(code: str) -> List[str]:
     if not code.strip():
         return []
@@ -584,108 +192,9 @@ def get_diff_with_prompts(name: str, before: str, after: str) -> str:
     return f"[Artifact {name} edits]\n{diff}\n[End of edits]"
-def use_extra_vision_agent_args(
-    code: Optional[str],
-    test_multi_plan: bool = True,
-    custom_tool_names: Optional[List[str]] = None,
-) -> Optional[str]:
-    """This is for forcing arguments passed by the user to VisionAgent into the
-    VisionAgentCoder call.
-    Parameters:
-        code (str): The code to edit.
-        test_multi_plan (bool): Do not change this parameter.
-        custom_tool_names (Optional[List[str]]): Do not change this parameter.
-    Returns:
-        str: The edited code.
-    """
-    if code is None:
-        return None
-    class VisionAgentTransformer(cst.CSTTransformer):
-        def __init__(
-            self, test_multi_plan: bool, custom_tool_names: Optional[List[str]]
-        ):
-            self.test_multi_plan = test_multi_plan
-            self.custom_tool_names = custom_tool_names
-        def leave_Call(
-            self, original_node: cst.Call, updated_node: cst.Call
-        ) -> cst.Call:
-            # Check if the function being called is generate_vision_code or edit_vision_code
-            if isinstance(updated_node.func, cst.Name) and updated_node.func.value in [
-                "generate_vision_code",
-                "edit_vision_code",
-            ]:
-                # Add test_multi_plan argument to generate_vision_code calls
-                if updated_node.func.value == "generate_vision_code":
-                    new_arg = cst.Arg(
-                        keyword=cst.Name("test_multi_plan"),
-                        value=cst.Name(str(self.test_multi_plan)),
-                        equal=cst.AssignEqual(
-                            whitespace_before=cst.SimpleWhitespace(""),
-                            whitespace_after=cst.SimpleWhitespace(""),
-                        ),
-                    )
-                    updated_node = updated_node.with_changes(
-                        args=[*updated_node.args, new_arg]
-                    )
-                # Add custom_tool_names if provided
-                if self.custom_tool_names is not None:
-                    list_arg = []
-                    for i, tool_name in enumerate(self.custom_tool_names):
-                        if i < len(self.custom_tool_names) - 1:
-                            list_arg.append(
-                                cst._nodes.expression.Element(
-                                    value=cst.SimpleString(value=f'"{tool_name}"'),
-                                    comma=cst.Comma(
-                                        whitespace_before=cst.SimpleWhitespace(""),
-                                        whitespace_after=cst.SimpleWhitespace(" "),
-                                    ),
-                                )
-                            )
-                        else:
-                            list_arg.append(
-                                cst._nodes.expression.Element(
-                                    value=cst.SimpleString(value=f'"{tool_name}"'),
-                                )
-                            )
-                    new_arg = cst.Arg(
-                        keyword=cst.Name("custom_tool_names"),
-                        value=cst.List(list_arg),
-                        equal=cst.AssignEqual(
-                            whitespace_before=cst.SimpleWhitespace(""),
-                            whitespace_after=cst.SimpleWhitespace(""),
-                        ),
-                    )
-                    updated_node = updated_node.with_changes(
-                        args=[*updated_node.args, new_arg]
-                    )
-            return updated_node
-    # Parse the input code into a CST node
-    tree = cst.parse_module(code)
-    # Apply the transformer to modify the CST
-    transformer = VisionAgentTransformer(test_multi_plan, custom_tool_names)
-    modified_tree = tree.visit(transformer)
-    # Return the modified code as a string
-    return modified_tree.code
 META_TOOL_DOCSTRING = get_tool_documentation(
     [
         get_tool_descriptions,
-        open_code_artifact,
-        create_code_artifact,
-        edit_code_artifact,
-        generate_vision_code,
-        edit_vision_code,
         view_media_artifact,
-        list_artifacts,
     ]
 )

vision_agent/utils/tools.py CHANGED Viewed

@@ -27,7 +27,9 @@ def get_vision_agent_api_key() -> str:
     if vision_agent_api_key:
         return vision_agent_api_key
     else:
-        raise ValueError("VISION_AGENT_API_KEY not found in environment variables.")
+        raise ValueError(
+            "VISION_AGENT_API_KEY not found in environment variables, required for tool usage. You can get a free key from https://va.landing.ai/account/api-key"
+        )
 def should_report_tool_traces() -> bool:

vision-agent 1.0.5__py3-none-any.whl → 1.0.8__py3-none-any.whl

vision-agent 1.0.5py3-none-any.whl → 1.0.8py3-none-any.whl