PyPI - vision-agent - Versions diffs - 0.2.160__py3-none-any.whl → 0.2.162__py3-none-any.whl - Mend

vision-agent 0.2.160py3-none-any.whl → 0.2.162py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

vision_agent/agent/__init__.py +8 -0
vision_agent/agent/agent_utils.py +76 -2
vision_agent/agent/vision_agent.py +57 -17
vision_agent/agent/vision_agent_coder.py +163 -489
vision_agent/agent/vision_agent_coder_prompts.py +0 -203
vision_agent/agent/vision_agent_planner.py +553 -0
vision_agent/agent/vision_agent_planner_prompts.py +199 -0
vision_agent/tools/__init__.py +0 -1
vision_agent/tools/meta_tools.py +87 -5
{vision_agent-0.2.160.dist-info → vision_agent-0.2.162.dist-info}/METADATA +7 -7
{vision_agent-0.2.160.dist-info → vision_agent-0.2.162.dist-info}/RECORD +13 -11
{vision_agent-0.2.160.dist-info → vision_agent-0.2.162.dist-info}/LICENSE +0 -0
{vision_agent-0.2.160.dist-info → vision_agent-0.2.162.dist-info}/WHEEL +0 -0

vision_agent/agent/__init__.py CHANGED Viewed

@@ -7,3 +7,11 @@ from .vision_agent_coder import (
     OpenAIVisionAgentCoder,
     VisionAgentCoder,
 )
+from .vision_agent_planner import (
+    AnthropicVisionAgentPlanner,
+    AzureVisionAgentPlanner,
+    OllamaVisionAgentPlanner,
+    OpenAIVisionAgentPlanner,
+    PlanContext,
+    VisionAgentPlanner,
+)

vision_agent/agent/agent_utils.py CHANGED Viewed

@@ -2,10 +2,17 @@ import json
 import logging
 import re
 import sys
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional
+from rich.console import Console
+from rich.style import Style
+from rich.syntax import Syntax
+import vision_agent.tools as T
 logging.basicConfig(stream=sys.stdout)
 _LOGGER = logging.getLogger(__name__)
+_CONSOLE = Console()
 def _extract_sub_json(json_str: str) -> Optional[Dict[str, Any]]:
@@ -41,11 +48,16 @@ def _strip_markdown_code(inp_str: str) -> str:
 def extract_json(json_str: str) -> Dict[str, Any]:
     json_str_mod = json_str.replace("\n", " ").strip()
-    json_str_mod = json_str_mod.replace("'", '"')
     json_str_mod = json_str_mod.replace(": True", ": true").replace(
         ": False", ": false"
     )
+    # sometimes the json is in single quotes
+    try:
+        return json.loads(json_str_mod.replace("'", '"'))  # type: ignore
+    except json.JSONDecodeError:
+        pass
     try:
         return json.loads(json_str_mod)  # type: ignore
     except json.JSONDecodeError:
@@ -83,3 +95,65 @@ def remove_installs_from_code(code: str) -> str:
     pattern = r"\n!pip install.*?(\n|\Z)\n"
     code = re.sub(pattern, "", code, flags=re.DOTALL)
     return code
+def format_memory(memory: List[Dict[str, str]]) -> str:
+    output_str = ""
+    for i, m in enumerate(memory):
+        output_str += f"### Feedback {i}:\n"
+        output_str += f"Code {i}:\n```python\n{m['code']}```\n\n"
+        output_str += f"Feedback {i}: {m['feedback']}\n\n"
+        if "edits" in m:
+            output_str += f"Edits {i}:\n{m['edits']}\n"
+        output_str += "\n"
+    return output_str
+def format_plans(plans: Dict[str, Any]) -> str:
+    plan_str = ""
+    for k, v in plans.items():
+        plan_str += "\n" + f"{k}: {v['thoughts']}\n"
+        plan_str += "    -" + "\n    -".join([e for e in v["instructions"]])
+    return plan_str
+class DefaultImports:
+    """Container for default imports used in the code execution."""
+    common_imports = [
+        "import os",
+        "import numpy as np",
+        "from vision_agent.tools import *",
+        "from typing import *",
+        "from pillow_heif import register_heif_opener",
+        "register_heif_opener()",
+    ]
+    @staticmethod
+    def to_code_string() -> str:
+        return "\n".join(DefaultImports.common_imports + T.__new_tools__)
+    @staticmethod
+    def prepend_imports(code: str) -> str:
+        """Run this method to prepend the default imports to the code.
+        NOTE: be sure to run this method after the custom tools have been registered.
+        """
+        return DefaultImports.to_code_string() + "\n\n" + code
+def print_code(title: str, code: str, test: Optional[str] = None) -> None:
+    _CONSOLE.print(title, style=Style(bgcolor="dark_orange3", bold=True))
+    _CONSOLE.print("=" * 30 + " Code " + "=" * 30)
+    _CONSOLE.print(
+        Syntax(
+            DefaultImports.prepend_imports(code),
+            "python",
+            theme="gruvbox-dark",
+            line_numbers=True,
+        )
+    )
+    if test:
+        _CONSOLE.print("=" * 30 + " Test " + "=" * 30)
+        _CONSOLE.print(Syntax(test, "python", theme="gruvbox-dark", line_numbers=True))

vision_agent/agent/vision_agent.py CHANGED Viewed

@@ -14,8 +14,8 @@ from vision_agent.agent.vision_agent_prompts import (
     VA_CODE,
 )
 from vision_agent.lmm import LMM, AnthropicLMM, Message, OpenAILMM
-from vision_agent.tools import META_TOOL_DOCSTRING
 from vision_agent.tools.meta_tools import (
+    META_TOOL_DOCSTRING,
     Artifacts,
     check_and_load_image,
     use_extra_vision_agent_args,
@@ -195,8 +195,8 @@ class VisionAgent(Agent):
         agent: Optional[LMM] = None,
         verbosity: int = 0,
         local_artifacts_path: Optional[Union[str, Path]] = None,
-        code_sandbox_runtime: Optional[str] = None,
         callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
+        code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
     ) -> None:
         """Initialize the VisionAgent.
@@ -206,13 +206,18 @@ class VisionAgent(Agent):
             verbosity (int): The verbosity level of the agent.
             local_artifacts_path (Optional[Union[str, Path]]): The path to the local
                 artifacts file.
-            code_sandbox_runtime (Optional[str]): The code sandbox runtime to use.
+            callback_message (Optional[Callable[[Dict[str, Any]], None]]): Callback
+                function to send intermediate update messages.
+            code_interpreter (Optional[Union[str, CodeInterpreter]]): For string values
+                it can be one of: None, "local" or "e2b". If None, it will read from
+                the environment variable "CODE_SANDBOX_RUNTIME". If a CodeInterpreter
+                object is provided it will use that.
         """
         self.agent = AnthropicLMM(temperature=0.0) if agent is None else agent
         self.max_iterations = 12
         self.verbosity = verbosity
-        self.code_sandbox_runtime = code_sandbox_runtime
+        self.code_interpreter = code_interpreter
         self.callback_message = callback_message
         if self.verbosity >= 1:
             _LOGGER.setLevel(logging.INFO)
@@ -230,7 +235,7 @@ class VisionAgent(Agent):
         input: Union[str, List[Message]],
         media: Optional[Union[str, Path]] = None,
         artifacts: Optional[Artifacts] = None,
-    ) -> List[Message]:
+    ) -> str:
         """Chat with VisionAgent and get the conversation response.
         Parameters:
@@ -247,10 +252,28 @@ class VisionAgent(Agent):
             input = [{"role": "user", "content": input}]
             if media is not None:
                 input[0]["media"] = [media]
-        results, _ = self.chat_with_code(input, artifacts)
-        return results
+        results, _ = self.chat_with_artifacts(input, artifacts)
+        return results[-1]["content"]  # type: ignore
-    def chat_with_code(
+    def chat(
+        self,
+        chat: List[Message],
+    ) -> List[Message]:
+        """Chat with VisionAgent, it will use code to execute actions to accomplish
+        its tasks.
+        Parameters:
+            chat (List[Message]): A conversation in the format of:
+                [{"role": "user", "content": "describe your task here..."}]
+                or if it contains media files, it should be in the format of:
+                [{"role": "user", "content": "describe your task here...", "media": ["image1.jpg", "image2.jpg"]}]
+        Returns:
+            List[Message]: The conversation response.
+        """
+        return self.chat_with_artifacts(chat)[0]
+    def chat_with_artifacts(
         self,
         chat: List[Message],
         artifacts: Optional[Artifacts] = None,
@@ -284,9 +307,16 @@ class VisionAgent(Agent):
             # this is setting remote artifacts path
             artifacts = Artifacts(WORKSPACE / "artifacts.pkl")
-        with CodeInterpreterFactory.new_instance(
-            code_sandbox_runtime=self.code_sandbox_runtime,
-        ) as code_interpreter:
+        # NOTE: each chat should have a dedicated code interpreter instance to avoid concurrency issues
+        code_interpreter = (
+            self.code_interpreter
+            if self.code_interpreter is not None
+            and not isinstance(self.code_interpreter, str)
+            else CodeInterpreterFactory.new_instance(
+                code_sandbox_runtime=self.code_interpreter,
+            )
+        )
+        with code_interpreter:
             orig_chat = copy.deepcopy(chat)
             int_chat = copy.deepcopy(chat)
             last_user_message = chat[-1]
@@ -472,8 +502,8 @@ class OpenAIVisionAgent(VisionAgent):
         agent: Optional[LMM] = None,
         verbosity: int = 0,
         local_artifacts_path: Optional[Union[str, Path]] = None,
-        code_sandbox_runtime: Optional[str] = None,
         callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
+        code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
     ) -> None:
         """Initialize the VisionAgent using OpenAI LMMs.
@@ -483,7 +513,12 @@ class OpenAIVisionAgent(VisionAgent):
             verbosity (int): The verbosity level of the agent.
             local_artifacts_path (Optional[Union[str, Path]]): The path to the local
                 artifacts file.
-            code_sandbox_runtime (Optional[str]): The code sandbox runtime to use.
+            callback_message (Optional[Callable[[Dict[str, Any]], None]]): Callback
+                function to send intermediate update messages.
+            code_interpreter (Optional[Union[str, CodeInterpreter]]): For string values
+                it can be one of: None, "local" or "e2b". If None, it will read from
+                the environment variable "CODE_SANDBOX_RUNTIME". If a CodeInterpreter
+                object is provided it will use that.
         """
         agent = OpenAILMM(temperature=0.0, json_mode=True) if agent is None else agent
@@ -491,8 +526,8 @@ class OpenAIVisionAgent(VisionAgent):
             agent,
             verbosity,
             local_artifacts_path,
-            code_sandbox_runtime,
             callback_message,
+            code_interpreter,
         )
@@ -502,8 +537,8 @@ class AnthropicVisionAgent(VisionAgent):
         agent: Optional[LMM] = None,
         verbosity: int = 0,
         local_artifacts_path: Optional[Union[str, Path]] = None,
-        code_sandbox_runtime: Optional[str] = None,
         callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
+        code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
     ) -> None:
         """Initialize the VisionAgent using Anthropic LMMs.
@@ -513,7 +548,12 @@ class AnthropicVisionAgent(VisionAgent):
             verbosity (int): The verbosity level of the agent.
             local_artifacts_path (Optional[Union[str, Path]]): The path to the local
                 artifacts file.
-            code_sandbox_runtime (Optional[str]): The code sandbox runtime to use.
+            callback_message (Optional[Callable[[Dict[str, Any]], None]]): Callback
+                function to send intermediate update messages.
+            code_interpreter (Optional[Union[str, CodeInterpreter]]): For string values
+                it can be one of: None, "local" or "e2b". If None, it will read from
+                the environment variable "CODE_SANDBOX_RUNTIME". If a CodeInterpreter
+                object is provided it will use that.
         """
         agent = AnthropicLMM(temperature=0.0) if agent is None else agent
@@ -521,6 +561,6 @@ class AnthropicVisionAgent(VisionAgent):
             agent,
             verbosity,
             local_artifacts_path,
-            code_sandbox_runtime,
             callback_message,
+            code_interpreter,
         )

vision-agent 0.2.160__py3-none-any.whl → 0.2.162__py3-none-any.whl

vision-agent 0.2.160py3-none-any.whl → 0.2.162py3-none-any.whl