PyPI - vision-agent - Versions diffs - 0.2.29__py3-none-any.whl → 0.2.31__py3-none-any.whl - Mend

vision-agent 0.2.29py3-none-any.whl → 0.2.31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

vision_agent/agent/__init__.py +2 -2
vision_agent/agent/agent.py +2 -2
vision_agent/agent/agent_coder.py +8 -8
vision_agent/agent/{vision_agent_v2.py → data_interpreter.py} +12 -12
vision_agent/agent/{vision_agent_v2_prompts.py → data_interpreter_prompts.py} +3 -3
vision_agent/agent/easytool.py +8 -8
vision_agent/agent/easytool_v2.py +778 -0
vision_agent/agent/easytool_v2_prompts.py +152 -0
vision_agent/agent/reflexion.py +8 -8
vision_agent/agent/vision_agent.py +360 -691
vision_agent/agent/vision_agent_prompts.py +231 -149
vision_agent/llm/llm.py +3 -4
vision_agent/lmm/lmm.py +6 -6
vision_agent/tools/__init__.py +21 -22
vision_agent/tools/easytool_tools.py +1242 -0
vision_agent/tools/tools.py +533 -1090
vision_agent-0.2.31.dist-info/METADATA +175 -0
vision_agent-0.2.31.dist-info/RECORD +36 -0
vision_agent/agent/vision_agent_v3.py +0 -386
vision_agent/agent/vision_agent_v3_prompts.py +0 -226
vision_agent/tools/tools_v2.py +0 -685
vision_agent-0.2.29.dist-info/METADATA +0 -226
vision_agent-0.2.29.dist-info/RECORD +0 -36
{vision_agent-0.2.29.dist-info → vision_agent-0.2.31.dist-info}/LICENSE +0 -0
{vision_agent-0.2.29.dist-info → vision_agent-0.2.31.dist-info}/WHEEL +0 -0

vision_agent/agent/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from .agent import Agent
 from .agent_coder import AgentCoder
+from .data_interpreter import DataInterpreter
 from .easytool import EasyTool
+from .easytool_v2 import EasyToolV2
 from .reflexion import Reflexion
 from .vision_agent import VisionAgent
-from .vision_agent_v2 import VisionAgentV2
-from .vision_agent_v3 import VisionAgentV3

vision_agent/agent/agent.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Dict, List, Optional, Union, Any
+from typing import Any, Dict, List, Optional, Union
 class Agent(ABC):
@@ -8,7 +8,7 @@ class Agent(ABC):
     def __call__(
         self,
         input: Union[List[Dict[str, str]], str],
-        image: Optional[Union[str, Path]] = None,
+        media: Optional[Union[str, Path]] = None,
     ) -> str:
         pass

vision_agent/agent/agent_coder.py CHANGED Viewed

@@ -3,7 +3,7 @@ import logging
 import os
 import sys
 from pathlib import Path
-from typing import Dict, List, Optional, Union, Any
+from typing import Any, Dict, List, Optional, Union
 from rich.console import Console
 from rich.syntax import Syntax
@@ -18,7 +18,7 @@ from vision_agent.agent.agent_coder_prompts import (
 )
 from vision_agent.llm import LLM, OpenAILLM
 from vision_agent.lmm import LMM, OpenAILMM
-from vision_agent.tools.tools_v2 import TOOL_DOCSTRING, UTILITIES_DOCSTRING
+from vision_agent.tools import TOOL_DOCSTRING, UTILITIES_DOCSTRING
 from vision_agent.utils import Execute
 IMPORT_HELPER = """
@@ -38,7 +38,7 @@ import numpy as np
 import string
 from typing import *
 from collections import *
-from vision_agent.tools.tools_v2 import *
+from vision_agent.tools import *
 """
 logging.basicConfig(stream=sys.stdout)
 _LOGGER = logging.getLogger(__name__)
@@ -150,20 +150,20 @@ class AgentCoder(Agent):
     def __call__(
         self,
         input: Union[List[Dict[str, str]], str],
-        image: Optional[Union[str, Path]] = None,
+        media: Optional[Union[str, Path]] = None,
     ) -> str:
         if isinstance(input, str):
             input = [{"role": "user", "content": input}]
-        return self.chat(input, image)
+        return self.chat(input, media)
     def chat(
         self,
         input: List[Dict[str, str]],
-        image: Optional[Union[str, Path]] = None,
+        media: Optional[Union[str, Path]] = None,
     ) -> str:
         question = input[0]["content"]
-        if image:
-            question += f" Input file path: {os.path.abspath(image)}"
+        if media:
+            question += f" Input file path: {os.path.abspath(media)}"
         code = ""
         feedback = ""

vision_agent/agent/{vision_agent_v2.py → data_interpreter.py} RENAMED Viewed

@@ -10,7 +10,7 @@ from rich.syntax import Syntax
 from tabulate import tabulate
 from vision_agent.agent import Agent
-from vision_agent.agent.vision_agent_v2_prompts import (
+from vision_agent.agent.data_interpreter_prompts import (
     CODE,
     CODE_SYS_MSG,
     DEBUG,
@@ -25,7 +25,7 @@ from vision_agent.agent.vision_agent_v2_prompts import (
     USER_REQ_SUBTASK_WM_CONTEXT,
 )
 from vision_agent.llm import LLM, OpenAILLM
-from vision_agent.tools.tools_v2 import TOOL_DESCRIPTIONS, TOOLS_DF
+from vision_agent.tools import TOOL_DESCRIPTIONS, TOOLS_DF
 from vision_agent.utils import Execute, Sim
 logging.basicConfig(level=logging.INFO)
@@ -331,11 +331,11 @@ def run_plan(
     return current_code, current_test, plan, working_memory
-class VisionAgentV2(Agent):
-    """Vision Agent is an AI agentic framework geared towards outputting Python code to
-    solve vision tasks. It is inspired by MetaGPT's Data Interpreter
-    https://arxiv.org/abs/2402.18679. Vision Agent has several key features to help it
-    generate code:
+class DataInterpreter(Agent):
+    """This version of Data Interpreter is an AI agentic framework geared towards
+    outputting Python code to solve vision tasks. It is inspired by MetaGPT's Data
+    Interpreter https://arxiv.org/abs/2402.18679. This version of Data Interpreter has
+    several key features to help it generate code:
     - A planner to generate a plan of tasks to solve a user requirement. The planner
     can output code tasks or test tasks, where test tasks are used to verify the code.
@@ -379,29 +379,29 @@ class VisionAgentV2(Agent):
     def __call__(
         self,
         input: Union[List[Dict[str, str]], str],
-        image: Optional[Union[str, Path]] = None,
+        media: Optional[Union[str, Path]] = None,
         plan: Optional[List[Dict[str, Any]]] = None,
     ) -> str:
         if isinstance(input, str):
             input = [{"role": "user", "content": input}]
-        results = self.chat_with_workflow(input, image, plan)
+        results = self.chat_with_workflow(input, media, plan)
         return results["code"]  # type: ignore
     @traceable
     def chat_with_workflow(
         self,
         chat: List[Dict[str, str]],
-        image: Optional[Union[str, Path]] = None,
+        media: Optional[Union[str, Path]] = None,
         plan: Optional[List[Dict[str, Any]]] = None,
     ) -> Dict[str, Any]:
         if len(chat) == 0:
             raise ValueError("Input cannot be empty.")
-        if image is not None:
+        if media is not None:
             # append file names to all user messages
             for chat_i in chat:
                 if chat_i["role"] == "user":
-                    chat_i["content"] += f" Image name {image}"
+                    chat_i["content"] += f" Image name {media}"
         working_code = ""
         if plan is not None:

vision_agent/agent/{vision_agent_v2_prompts.py → data_interpreter_prompts.py} RENAMED Viewed

@@ -74,15 +74,15 @@ CODE = """
 # Constraints
 - Write a function that accomplishes the 'Current Subtask'. You are supplied code from a previous task under 'Previous Code', do not delete or change previous code unless it contains a bug or it is necessary to complete the 'Current Subtask'.
-- Always prioritize using pre-defined tools or code for the same functionality from 'Tool Info' when working on 'Current Subtask'. You have access to all these tools through the `from vision_agent.tools.tools_v2 import *` import.
+- Always prioritize using pre-defined tools or code for the same functionality from 'Tool Info' when working on 'Current Subtask'. You have access to all these tools through the `from vision_agent.tools import *` import.
 - You may recieve previous trials and errors under 'Previous Task', this is code, output and reflections from previous tasks. You can use these to avoid running in to the same issues when writing your code.
-- Use the `save_json` function from `vision_agent.tools.tools_v2` to save your output as a json file.
+- Use the `save_json` function from `vision_agent.tools` to save your output as a json file.
 - Write clean, readable, and well-documented code.
 # Output
 While some concise thoughts are helpful, code is absolutely required. If possible, execute your defined functions in the code output. Output code in the following format:
 ```python
-from vision_agent.tools.tools_v2 imoprt *
+from vision_agent.tools imoprt *
 # your code goes here
 ```

vision_agent/agent/easytool.py CHANGED Viewed

@@ -6,7 +6,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 from vision_agent.llm import LLM, OpenAILLM
 from vision_agent.lmm import LMM
-from vision_agent.tools import TOOLS
+from vision_agent.tools.easytool_tools import TOOLS
 from .agent import Agent
 from .easytool_prompts import (
@@ -272,7 +272,7 @@ class EasyTool(Agent):
     def __call__(
         self,
         input: Union[List[Dict[str, str]], str],
-        image: Optional[Union[str, Path]] = None,
+        media: Optional[Union[str, Path]] = None,
     ) -> str:
         """Invoke the vision agent.
@@ -285,14 +285,14 @@ class EasyTool(Agent):
         """
         if isinstance(input, str):
             input = [{"role": "user", "content": input}]
-        return self.chat(input, image=image)
+        return self.chat(input, media=media)
     def chat_with_workflow(
-        self, chat: List[Dict[str, str]], image: Optional[Union[str, Path]] = None
+        self, chat: List[Dict[str, str]], media: Optional[Union[str, Path]] = None
     ) -> Tuple[str, List[Dict]]:
         question = chat[0]["content"]
-        if image:
-            question += f" Image name: {image}"
+        if media:
+            question += f" Image name: {media}"
         tasks = task_decompose(
             self.task_model,
             question,
@@ -340,7 +340,7 @@ class EasyTool(Agent):
         return answer_summarize(self.answer_model, question, answers), all_tool_results
     def chat(
-        self, chat: List[Dict[str, str]], image: Optional[Union[str, Path]] = None
+        self, chat: List[Dict[str, str]], media: Optional[Union[str, Path]] = None
     ) -> str:
-        answer, _ = self.chat_with_workflow(chat, image=image)
+        answer, _ = self.chat_with_workflow(chat, media=media)
         return answer

vision-agent 0.2.29__py3-none-any.whl → 0.2.31__py3-none-any.whl

vision-agent 0.2.29py3-none-any.whl → 0.2.31py3-none-any.whl